1 | // Copyright 2016 The Gemmlowp Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef GEMMLOWP_META_TRANSFORM_KERNELS_H_ |
16 | #define GEMMLOWP_META_TRANSFORM_KERNELS_H_ |
17 | |
18 | #include "base.h" |
19 | |
20 | namespace gemmlowp { |
21 | namespace meta { |
22 | |
23 | struct Quantize { |
24 | float range_min; |
25 | float range_offset; |
26 | float range_scale; |
27 | int count; |
28 | }; |
29 | |
30 | struct Dequantize { |
31 | float range_min; |
32 | float range_offset; |
33 | float range_scale; |
34 | int count; |
35 | }; |
36 | |
37 | struct Requantize { |
38 | float input_range_min; |
39 | float input_range_offset; |
40 | float input_range_scale; |
41 | float output_range_min; |
42 | float output_range_offset; |
43 | float one_over_output_range_scale; |
44 | int count; |
45 | }; |
46 | |
47 | template <typename Type> |
48 | struct MinMax { |
49 | Type min; |
50 | Type max; |
51 | int count; |
52 | }; |
53 | |
54 | template <typename BiasType> |
55 | struct BiasAdd { |
56 | float input_range_min; |
57 | float input_range_offset; |
58 | float input_range_scale; |
59 | float bias_range_min; |
60 | float bias_range_offset; |
61 | float bias_range_scale; |
62 | float output_range_min; |
63 | float output_range_offset; |
64 | float one_over_output_range_scale; |
65 | int count; |
66 | int rows; |
67 | const BiasType* bias; |
68 | }; |
69 | |
70 | template <typename InType, typename OutType, int kernel_size, int leftovers> |
71 | class Transform1DKernel<InType, OutType, Quantize, kernel_size, leftovers> { |
72 | public: |
73 | static void Transform(const InType* in, const Quantize& params, |
74 | OutType* output) { |
75 | #ifdef DEBUG |
76 | #ifdef DEBUG_METAGEMM_VERBOSE |
77 | std::cout << "Quantize::Transform(" << std::string(typeid(InType).name()) |
78 | << ", " << std::string(typeid(OutType).name()) << ") -- " |
79 | << kernel_size << "x" << leftovers << std::endl; |
80 | #endif |
81 | #else |
82 | std::cerr << "FATAL: Quantize::Transform not implemented." << std::endl; |
83 | std::exit(1); |
84 | #endif |
85 | } |
86 | }; |
87 | |
88 | template <typename InType, typename OutType, int kernel_size, int leftovers> |
89 | class Transform1DKernel<InType, OutType, Dequantize, kernel_size, leftovers> { |
90 | public: |
91 | static void Transform(const InType* in, const Dequantize& params, |
92 | OutType* output) { |
93 | #ifdef DEBUG |
94 | #ifdef DEBUG_METAGEMM_VERBOSE |
95 | std::cout << "Dequantize::Transform(" << std::string(typeid(InType).name()) |
96 | << ", " << std::string(typeid(OutType).name()) << ") -- " |
97 | << kernel_size << "x" << leftovers << std::endl; |
98 | #endif |
99 | #else |
100 | std::cerr << "FATAL: Dequantize::Transform not implemented." << std::endl; |
101 | std::exit(1); |
102 | #endif |
103 | } |
104 | }; |
105 | |
106 | template <typename InType, typename OutType, int kernel_size, int leftovers> |
107 | class Transform1DKernel<InType, OutType, Requantize, kernel_size, leftovers> { |
108 | public: |
109 | static void Transform(const InType* in, const Requantize& params, |
110 | OutType* output) { |
111 | #ifdef DEBUG |
112 | #ifdef DEBUG_METAGEMM_VERBOSE |
113 | std::cout << "Requantize::Transform(" << std::string(typeid(InType).name()) |
114 | << ", " << std::string(typeid(OutType).name()) << ") -- " |
115 | << kernel_size << "x" << leftovers << std::endl; |
116 | #endif |
117 | #else |
118 | std::cerr << "FATAL: Requantize::Transform not implemented." << std::endl; |
119 | std::exit(1); |
120 | #endif |
121 | } |
122 | }; |
123 | |
124 | template <typename InType, typename OutType, int kernel_size, int leftovers, |
125 | typename Type> |
126 | class Transform1DKernel<InType, OutType, MinMax<Type>, kernel_size, leftovers> { |
127 | public: |
128 | static void Transform(const InType* in, const MinMax<Type>& params, |
129 | OutType* output) { |
130 | #ifdef DEBUG |
131 | #ifdef DEBUG_METAGEMM_VERBOSE |
132 | std::cout << "MinMax::Transform(" << std::string(typeid(InType).name()) |
133 | << ", " << std::string(typeid(OutType).name()) << ") -- " |
134 | << kernel_size << "x" << leftovers << std::endl; |
135 | #endif |
136 | #else |
137 | std::cerr << "FATAL: MinMax::Transform not implemented." << std::endl; |
138 | std::exit(1); |
139 | #endif |
140 | } |
141 | }; |
142 | |
143 | template <typename InType, typename OutType, int kernel_size, int leftovers, |
144 | typename Type> |
145 | class Transform1DKernel<InType, OutType, BiasAdd<Type>, kernel_size, |
146 | leftovers> { |
147 | public: |
148 | static void Transform(const InType* in, const BiasAdd<Type>& params, |
149 | OutType* output) { |
150 | #ifdef DEBUG |
151 | #ifdef DEBUG_METAGEMM_VERBOSE |
152 | std::cout << "BiasAdd::Transform(" << std::string(typeid(InType).name()) |
153 | << ", " << std::string(typeid(OutType).name()) << ") -- " |
154 | << kernel_size << "x" << leftovers << std::endl; |
155 | #endif |
156 | #else |
157 | std::cerr << "FATAL: BiasAdd::Transform not implemented." << std::endl; |
158 | std::exit(1); |
159 | #endif |
160 | } |
161 | }; |
162 | |
163 | template <typename InType, typename OutType> |
164 | class Transform1DUtil<InType, OutType, Quantize> { |
165 | public: |
166 | static int EstimateComputeCost(const Quantize& params) { |
167 | return params.count * 8; |
168 | } |
169 | |
170 | static const InType* OffsetInput(const Quantize& params, const InType* input, |
171 | int offset) { |
172 | return input + offset; |
173 | } |
174 | |
175 | static OutType* OffsetOutput(const Quantize& params, OutType* output, |
176 | int offset) { |
177 | return output + offset; |
178 | } |
179 | }; |
180 | |
181 | template <typename InType, typename OutType> |
182 | class Transform1DUtil<InType, OutType, Requantize> { |
183 | public: |
184 | static int EstimateComputeCost(const Requantize& params) { |
185 | return params.count * 12; |
186 | } |
187 | |
188 | static const InType* OffsetInput(const Requantize& params, |
189 | const InType* input, int offset) { |
190 | return input + offset; |
191 | } |
192 | |
193 | static OutType* OffsetOutput(const Requantize& params, OutType* output, |
194 | int offset) { |
195 | return output + offset; |
196 | } |
197 | }; |
198 | |
199 | template <typename InType, typename OutType> |
200 | class Transform1DUtil<InType, OutType, Dequantize> { |
201 | public: |
202 | static int EstimateComputeCost(const Dequantize& params) { |
203 | return params.count * 12; |
204 | } |
205 | |
206 | static const InType* OffsetInput(const Dequantize& params, |
207 | const InType* input, int offset) { |
208 | return input + offset; |
209 | } |
210 | |
211 | static OutType* OffsetOutput(const Dequantize& params, OutType* output, |
212 | int offset) { |
213 | return output + offset; |
214 | } |
215 | }; |
216 | |
217 | template <typename InType, typename OutType, typename MinMaxType> |
218 | class Transform1DUtil<InType, OutType, MinMax<MinMaxType>> { |
219 | public: |
220 | static int EstimateComputeCost(const MinMax<MinMaxType>& params) { |
221 | return params.count * 4; |
222 | } |
223 | |
224 | static const InType* OffsetInput(const MinMax<MinMaxType>& params, |
225 | const InType* input, int offset) { |
226 | return input + offset; |
227 | } |
228 | |
229 | static OutType* OffsetOutput(const MinMax<MinMaxType>& params, |
230 | OutType* output, int offset) { |
231 | return output + offset; |
232 | } |
233 | }; |
234 | |
235 | } // namespace meta |
236 | } // namespace gemmlowp |
237 | |
238 | #ifdef GEMMLOWP_NEON_32 |
239 | #include "transform_kernels_arm_32.h" |
240 | #elif defined(GEMMLOWP_NEON_64) |
241 | #include "transform_kernels_arm_64.h" |
242 | #endif |
243 | |
244 | #endif // GEMMLOWP_META_TRANSFORM_KERNELS_H_ |
245 | |