1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_KERNELS_META_SUPPORT_H_ |
17 | #define TENSORFLOW_CORE_KERNELS_META_SUPPORT_H_ |
18 | |
19 | #include "meta/multi_thread_gemm.h" |
20 | #include "meta/multi_thread_transform.h" |
21 | #include "meta/quantized_mul_kernels.h" |
22 | #include "meta/streams.h" |
23 | #include "meta/transform_kernels.h" |
24 | |
25 | #include "tensorflow/core/framework/numeric_types.h" |
26 | |
27 | namespace tensorflow { |
28 | |
29 | class OpKernelContext; |
30 | |
31 | namespace meta { |
32 | |
33 | // Gemmlowp/meta is a small library of optimized Arm32/64 kernels for quantized |
34 | // matrix multiplication and other quantized computations. |
35 | |
36 | // Set the maximum number of threads of computation that the internal workers |
37 | // pool can use. If num_threads is 0, then use intra_op_parallelism_threads. |
38 | void SetNumThreads(int num_threads); |
39 | |
40 | int GetNumThreads(); |
41 | |
42 | // Toggle the internal workers pool. If set to false, the computations will |
43 | // use the worker pool passed each time in the OpKernelContext. If set to true |
44 | // then the OpKernelContext will be ignored, and the internal optimized workers |
45 | // pool will be used. |
46 | // |
47 | // The internal workers pool is disabled by default (false). |
48 | void SetUseLocalContext(bool use_local_context); |
49 | |
50 | bool GetUseLocalContext(); |
51 | |
52 | // Toggles the codepath. Enabled by default (true) on supported platforms. |
53 | void SetEnabled(bool enabled); |
54 | |
55 | // Returns true if the codepath is supported and is enabled. Use this call |
56 | // before calling the compute functions. If the codepath is not supported, and |
57 | // any of the compute function is called, the library will log a FATAL error. |
58 | bool IsSupportedAndEnabled(); |
59 | |
60 | // Calculate the quantized matrix multiplication: |
61 | // |
62 | // for (i, j) in [0, m) x [0, n) do |
63 | // c_data[i, j] := |
64 | // sum((a_data[i, l] + offset_a) * (b_data[l, j] + offset_b)) : l in [0, k) |
65 | // |
66 | // If transpose_a is false the lhs operand has row major layout, otherwise |
67 | // column major. Similarly transpose_b describes the layout of the rhs operand. |
68 | // lda, ldb, and ldc are the strides of the lhs operand, rhs operand and the |
69 | // result arrays. |
70 | void QuantizedGemm(OpKernelContext* context, bool transpose_a, bool transpose_b, |
71 | const quint8* a_data, const quint8* b_data, qint32* c_data, |
72 | int m, int n, int k, int offset_a, int offset_b, int lda, |
73 | int ldb, int ldc); |
74 | |
75 | // Take an array of numbers from the range [input_min, input_max] quantized |
76 | // uniformly to int32 values, recover their float values, and then quantize |
77 | // them back uniformly to the range [output_min, output_max] as uint8. |
78 | // Saturate the uint8 values. |
79 | void Requantize(OpKernelContext* context, const qint32* input, int count, |
80 | float input_min, float input_max, float output_min, |
81 | float output_max, quint8* output); |
82 | |
83 | // Take an array of numbers from the range [range_min, range_max] quantized |
84 | // uniformly to uint8 values and recover their float values. |
85 | void Dequantize(OpKernelContext* context, const quint8* input, int count, |
86 | float range_min, float range_max, float* output); |
87 | |
88 | // Take an array of float values and quantize them uniformly to the range |
89 | // [range_min, range_max] expressed as uint8. Saturate the uint8 values. |
90 | void Quantize(OpKernelContext*, const float* input, int count, float range_min, |
91 | float range_max, quint8* output); |
92 | |
93 | // Take two arrays: the inputs and the bias quantized uniformly in the ranges |
94 | // [input_min, input_max], and [bias_min, bias_max] accordingly, as uint8 |
95 | // values. Recover their float values. Add the values. Quantize them back |
96 | // uniformly to the range [output_min, output_max] as int32. Saturate the |
97 | // int32 values. |
98 | void QuantizedBiasAdd(OpKernelContext* context, const quint8* input, |
99 | int input_count, const quint8* bias, int bias_count, |
100 | float input_min, float input_max, float bias_min, |
101 | float bias_max, float output_min, float output_max, |
102 | qint32* output); |
103 | |
104 | // Take an array of uint8 values and clamp them to the range [clamp_min, |
105 | // clamp_max]. |
106 | void Clamp(OpKernelContext* context, const quint8* input, int input_count, |
107 | quint8 clamp_min, quint8 clamp_max, quint8* output); |
108 | |
109 | } // namespace meta |
110 | } // namespace tensorflow |
111 | |
112 | #endif // TENSORFLOW_CORE_KERNELS_META_SUPPORT_H_ |
113 | |