1 | // Copyright 2016 The Gemmlowp Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef GEMMLOWP_META_STREAMS_H_ |
16 | #define GEMMLOWP_META_STREAMS_H_ |
17 | |
18 | #include <iostream> |
19 | #include <typeinfo> |
20 | #include "base.h" |
21 | |
22 | namespace gemmlowp { |
23 | namespace meta { |
24 | |
25 | struct RowMajor { |
26 | public: |
27 | int count; |
28 | int stride; |
29 | }; |
30 | |
31 | struct RowMajorWithSum { |
32 | public: |
33 | int count; |
34 | int stride; |
35 | int multiplicative_sum_offset; |
36 | int additive_sum_offset; |
37 | }; |
38 | |
39 | struct ColumnMajorWithSum { |
40 | public: |
41 | int count; |
42 | int stride; |
43 | int multiplicative_sum_offset; |
44 | int additive_sum_offset; |
45 | }; |
46 | |
47 | template <typename InType> |
48 | class StreamUtil<InType, RowMajor> { |
49 | public: |
50 | static const InType* Offset(const RowMajor& params, const InType* source, |
51 | int offset_stride, int offset_advance) { |
52 | return reinterpret_cast<const InType*>( |
53 | reinterpret_cast<const std::uint8_t*>(source) + |
54 | offset_stride * params.stride + offset_advance * sizeof(InType)); |
55 | } |
56 | |
57 | static InType* Offset(const RowMajor& params, InType* source, |
58 | int offset_stride, int offset_advance) { |
59 | return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + |
60 | offset_stride * params.stride + |
61 | offset_advance * sizeof(InType)); |
62 | } |
63 | |
64 | static int Scratch(const RowMajor& params, int lanes_count, int pack_size) { |
65 | return AlignTo<64>(lanes_count * AlignTo(pack_size, params.stride)); |
66 | } |
67 | }; |
68 | |
69 | template <typename InType> |
70 | class StreamUtil<InType, RowMajorWithSum> { |
71 | public: |
72 | static const InType* Offset(const RowMajorWithSum& params, |
73 | const InType* source, int offset_stride, |
74 | int offset_advance) { |
75 | return reinterpret_cast<const InType*>( |
76 | reinterpret_cast<const std::uint8_t*>(source) + |
77 | offset_stride * params.stride + offset_advance * sizeof(InType)); |
78 | } |
79 | |
80 | static InType* Offset(const RowMajorWithSum& params, InType* source, |
81 | int offset_stride, int offset_advance) { |
82 | return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + |
83 | offset_stride * params.stride + |
84 | offset_advance * sizeof(InType)); |
85 | } |
86 | |
87 | static int Scratch(const RowMajorWithSum& params, int lanes_count, |
88 | int pack_size) { |
89 | return 32 + AlignTo<32>(sizeof(InType) * lanes_count * |
90 | AlignTo(pack_size, params.count)); |
91 | } |
92 | }; |
93 | |
94 | template <typename InType> |
95 | class StreamUtil<InType, ColumnMajorWithSum> { |
96 | public: |
97 | static const InType* Offset(const ColumnMajorWithSum& params, |
98 | const InType* source, int offset_stride, |
99 | int offset_advance) { |
100 | return reinterpret_cast<const InType*>( |
101 | reinterpret_cast<const std::uint8_t*>(source) + |
102 | params.stride * offset_advance + offset_stride * sizeof(InType)); |
103 | } |
104 | |
105 | static const InType* Offset(const ColumnMajorWithSum& params, InType* source, |
106 | int offset_stride, int offset_advance) { |
107 | return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) + |
108 | params.stride * offset_advance + |
109 | offset_stride * sizeof(InType)); |
110 | } |
111 | |
112 | static int Scratch(const ColumnMajorWithSum& params, int lanes_count, |
113 | int pack_size) { |
114 | return 32 + AlignTo<32>(sizeof(InType) * lanes_count * |
115 | AlignTo(pack_size, params.count)); |
116 | } |
117 | }; |
118 | |
119 | template <typename InType, int lanes_count, int pack_size, int leftovers> |
120 | class Stream<InType, lanes_count, pack_size, leftovers, RowMajor> { |
121 | public: |
122 | static void Pack(const InType* in, const RowMajor& params, InType* out) { |
123 | #ifdef DEBUG |
124 | #ifdef DEBUG_METAGEMM_VERBOSE |
125 | std::cout << "RowMajor(" << std::string(typeid(InType).name()) |
126 | << ")::Pack() -- " << lanes_count << "x" << pack_size << " + " |
127 | << leftovers << std::endl; |
128 | #endif |
129 | #else |
130 | if (lanes_count != 0) { |
131 | std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl; |
132 | std::exit(1); |
133 | } |
134 | #endif |
135 | } |
136 | |
137 | static int UnpackedAdvance(const RowMajor& params) { |
138 | return sizeof(InType) * pack_size; |
139 | } |
140 | |
141 | static int PackedAdvance(const RowMajor& params) { |
142 | return sizeof(InType) * pack_size * lanes_count; |
143 | } |
144 | |
145 | static int UnpackedStride(const RowMajor& params) { |
146 | return lanes_count * params.stride; |
147 | } |
148 | |
149 | static int PackedStride(const RowMajor& params) { |
150 | return AlignTo<32>(lanes_count * AlignTo<pack_size>(params.stride)); |
151 | } |
152 | |
153 | static int Scratch(const RowMajor& params) { return PackedStride(params); } |
154 | |
155 | #ifdef DEBUG |
156 | #ifdef DEBUG_METAGEMM_VERBOSE |
157 | static void Debug(const RowMajor& params) { |
158 | std::cout << "RowMajor(" << typeid(InType).name() << ")" << std::endl; |
159 | std::cout << " dims: " << lanes_count << "x" << pack_size << " + " |
160 | << leftovers << std::endl; |
161 | std::cout << " scratch: " << Scratch(params) << std::endl; |
162 | std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; |
163 | std::cout << " packed advance: " << PackedAdvance(params) << std::endl; |
164 | std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; |
165 | std::cout << " packed stride: " << PackedStride(params) << std::endl; |
166 | std::cout << " params:" << std::endl; |
167 | std::cout << " count: " << params.count << std::endl; |
168 | std::cout << " stride: " << params.stride << std::endl; |
169 | } |
170 | #endif |
171 | #endif |
172 | }; |
173 | |
174 | template <typename InType, int lanes_count, int pack_size, int leftovers> |
175 | class Stream<InType, lanes_count, pack_size, leftovers, RowMajorWithSum> { |
176 | public: |
177 | static void Pack(const InType* in, const RowMajorWithSum& params, |
178 | InType* out) { |
179 | #ifdef DEBUG |
180 | #ifdef DEBUG_METAGEMM_VERBOSE |
181 | std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")::Pack() -- " |
182 | << lanes_count << "x" << pack_size << " + " << leftovers |
183 | << std::endl; |
184 | #endif |
185 | #else |
186 | if (lanes_count != 0) { |
187 | std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl; |
188 | std::exit(1); |
189 | } |
190 | #endif |
191 | } |
192 | |
193 | static int UnpackedAdvance(const RowMajorWithSum& params) { |
194 | return sizeof(InType) * pack_size; |
195 | } |
196 | |
197 | static int PackedAdvance(const RowMajorWithSum& params) { |
198 | return sizeof(InType) * pack_size * lanes_count; |
199 | } |
200 | |
201 | static int UnpackedStride(const RowMajorWithSum& params) { |
202 | return sizeof(InType) * lanes_count * params.stride; |
203 | } |
204 | |
205 | static int PackedStride(const RowMajorWithSum& params) { |
206 | return 32 + AlignTo<32>(sizeof(InType) * lanes_count * |
207 | AlignTo<pack_size>(params.count)); |
208 | } |
209 | |
210 | static int Scratch(const RowMajorWithSum& params) { |
211 | return PackedStride(params); |
212 | } |
213 | |
214 | #ifdef DEBUG |
215 | #ifdef DEBUG_METAGEMM_VERBOSE |
216 | static void Debug(const RowMajorWithSum& params) { |
217 | std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")" |
218 | << std::endl; |
219 | std::cout << " dims: " << lanes_count << "x" << pack_size << " + " |
220 | << leftovers << std::endl; |
221 | std::cout << " scratch: " << Scratch(params) << std::endl; |
222 | std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; |
223 | std::cout << " packed advance: " << PackedAdvance(params) << std::endl; |
224 | std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; |
225 | std::cout << " packed stride: " << PackedStride(params) << std::endl; |
226 | std::cout << " params:" << std::endl; |
227 | std::cout << " count: " << params.count << std::endl; |
228 | std::cout << " stride: " << params.stride << std::endl; |
229 | std::cout << " multiplicative_sum_offset: " |
230 | << params.multiplicative_sum_offset << std::endl; |
231 | std::cout << " additive_sum_offset: " << params.additive_sum_offset |
232 | << std::endl; |
233 | } |
234 | #endif |
235 | #endif |
236 | }; |
237 | |
238 | template <typename InType, int lanes_count, int pack_size, int leftovers> |
239 | class Stream<InType, lanes_count, pack_size, leftovers, ColumnMajorWithSum> { |
240 | public: |
241 | static void Pack(const InType* in, const ColumnMajorWithSum& params, |
242 | InType* out) { |
243 | #ifdef DEBUG |
244 | #ifdef DEBUG_METAGEMM_VERBOSE |
245 | std::cout << "ColumnMajorWithSum(" << typeid(InType).name() |
246 | << ")::Pack() -- " << lanes_count << "x" << pack_size << " + " |
247 | << leftovers << std::endl; |
248 | #endif |
249 | #else |
250 | if (lanes_count != 0) { |
251 | std::cerr << "FATAL: ColumnMajorWithSum::Pack not implemented." |
252 | << std::endl; |
253 | std::exit(1); |
254 | } |
255 | #endif |
256 | } |
257 | |
258 | static int UnpackedAdvance(const ColumnMajorWithSum& params) { |
259 | return sizeof(InType) * pack_size * params.stride; |
260 | } |
261 | |
262 | static int PackedAdvance(const ColumnMajorWithSum& params) { |
263 | return sizeof(InType) * pack_size * lanes_count; |
264 | } |
265 | |
266 | static int UnpackedStride(const ColumnMajorWithSum& params) { |
267 | return sizeof(InType) * lanes_count; |
268 | } |
269 | |
270 | static int PackedStride(const ColumnMajorWithSum& params) { |
271 | return 32 + AlignTo<32>(sizeof(InType) * lanes_count * |
272 | AlignTo<pack_size>(params.count)); |
273 | } |
274 | |
275 | static int Scratch(const ColumnMajorWithSum& params) { |
276 | return PackedStride(params); |
277 | } |
278 | |
279 | #ifdef DEBUG |
280 | #ifdef DEBUG_METAGEMM_VERBOSE |
281 | static void Debug(const ColumnMajorWithSum& params) { |
282 | std::cout << "ColumnMajorWithSum(" << typeid(InType).name() << ")" |
283 | << std::endl; |
284 | std::cout << " dims: " << lanes_count << "x" << pack_size << " + " |
285 | << leftovers << std::endl; |
286 | std::cout << " scratch: " << Scratch(params) << std::endl; |
287 | std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl; |
288 | std::cout << " packed advance: " << PackedAdvance(params) << std::endl; |
289 | std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl; |
290 | std::cout << " packed stride: " << PackedStride(params) << std::endl; |
291 | std::cout << " params:" << std::endl; |
292 | std::cout << " count: " << params.count << std::endl; |
293 | std::cout << " stride: " << params.stride << std::endl; |
294 | std::cout << " multiplicative_sum_offset: " |
295 | << params.multiplicative_sum_offset << std::endl; |
296 | std::cout << " additive_sum_offset: " << params.additive_sum_offset |
297 | << std::endl; |
298 | } |
299 | #endif |
300 | #endif |
301 | }; |
302 | |
303 | } // namespace meta |
304 | } // namespace gemmlowp |
305 | |
306 | #ifdef GEMMLOWP_NEON_32 |
307 | #include "streams_arm_32.h" |
308 | #elif defined(GEMMLOWP_NEON_64) |
309 | #include "streams_arm_64.h" |
310 | #endif |
311 | |
312 | #endif // GEMMLOWP_META_STREAMS_H_ |
313 | |