1// Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef GEMMLOWP_META_STREAMS_H_
16#define GEMMLOWP_META_STREAMS_H_
17
18#include <iostream>
19#include <typeinfo>
20#include "base.h"
21
22namespace gemmlowp {
23namespace meta {
24
25struct RowMajor {
26 public:
27 int count;
28 int stride;
29};
30
31struct RowMajorWithSum {
32 public:
33 int count;
34 int stride;
35 int multiplicative_sum_offset;
36 int additive_sum_offset;
37};
38
39struct ColumnMajorWithSum {
40 public:
41 int count;
42 int stride;
43 int multiplicative_sum_offset;
44 int additive_sum_offset;
45};
46
47template <typename InType>
48class StreamUtil<InType, RowMajor> {
49 public:
50 static const InType* Offset(const RowMajor& params, const InType* source,
51 int offset_stride, int offset_advance) {
52 return reinterpret_cast<const InType*>(
53 reinterpret_cast<const std::uint8_t*>(source) +
54 offset_stride * params.stride + offset_advance * sizeof(InType));
55 }
56
57 static InType* Offset(const RowMajor& params, InType* source,
58 int offset_stride, int offset_advance) {
59 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
60 offset_stride * params.stride +
61 offset_advance * sizeof(InType));
62 }
63
64 static int Scratch(const RowMajor& params, int lanes_count, int pack_size) {
65 return AlignTo<64>(lanes_count * AlignTo(pack_size, params.stride));
66 }
67};
68
69template <typename InType>
70class StreamUtil<InType, RowMajorWithSum> {
71 public:
72 static const InType* Offset(const RowMajorWithSum& params,
73 const InType* source, int offset_stride,
74 int offset_advance) {
75 return reinterpret_cast<const InType*>(
76 reinterpret_cast<const std::uint8_t*>(source) +
77 offset_stride * params.stride + offset_advance * sizeof(InType));
78 }
79
80 static InType* Offset(const RowMajorWithSum& params, InType* source,
81 int offset_stride, int offset_advance) {
82 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
83 offset_stride * params.stride +
84 offset_advance * sizeof(InType));
85 }
86
87 static int Scratch(const RowMajorWithSum& params, int lanes_count,
88 int pack_size) {
89 return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
90 AlignTo(pack_size, params.count));
91 }
92};
93
94template <typename InType>
95class StreamUtil<InType, ColumnMajorWithSum> {
96 public:
97 static const InType* Offset(const ColumnMajorWithSum& params,
98 const InType* source, int offset_stride,
99 int offset_advance) {
100 return reinterpret_cast<const InType*>(
101 reinterpret_cast<const std::uint8_t*>(source) +
102 params.stride * offset_advance + offset_stride * sizeof(InType));
103 }
104
105 static const InType* Offset(const ColumnMajorWithSum& params, InType* source,
106 int offset_stride, int offset_advance) {
107 return reinterpret_cast<InType*>(reinterpret_cast<std::uint8_t*>(source) +
108 params.stride * offset_advance +
109 offset_stride * sizeof(InType));
110 }
111
112 static int Scratch(const ColumnMajorWithSum& params, int lanes_count,
113 int pack_size) {
114 return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
115 AlignTo(pack_size, params.count));
116 }
117};
118
119template <typename InType, int lanes_count, int pack_size, int leftovers>
120class Stream<InType, lanes_count, pack_size, leftovers, RowMajor> {
121 public:
122 static void Pack(const InType* in, const RowMajor& params, InType* out) {
123#ifdef DEBUG
124#ifdef DEBUG_METAGEMM_VERBOSE
125 std::cout << "RowMajor(" << std::string(typeid(InType).name())
126 << ")::Pack() -- " << lanes_count << "x" << pack_size << " + "
127 << leftovers << std::endl;
128#endif
129#else
130 if (lanes_count != 0) {
131 std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl;
132 std::exit(1);
133 }
134#endif
135 }
136
137 static int UnpackedAdvance(const RowMajor& params) {
138 return sizeof(InType) * pack_size;
139 }
140
141 static int PackedAdvance(const RowMajor& params) {
142 return sizeof(InType) * pack_size * lanes_count;
143 }
144
145 static int UnpackedStride(const RowMajor& params) {
146 return lanes_count * params.stride;
147 }
148
149 static int PackedStride(const RowMajor& params) {
150 return AlignTo<32>(lanes_count * AlignTo<pack_size>(params.stride));
151 }
152
153 static int Scratch(const RowMajor& params) { return PackedStride(params); }
154
155#ifdef DEBUG
156#ifdef DEBUG_METAGEMM_VERBOSE
157 static void Debug(const RowMajor& params) {
158 std::cout << "RowMajor(" << typeid(InType).name() << ")" << std::endl;
159 std::cout << " dims: " << lanes_count << "x" << pack_size << " + "
160 << leftovers << std::endl;
161 std::cout << " scratch: " << Scratch(params) << std::endl;
162 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl;
163 std::cout << " packed advance: " << PackedAdvance(params) << std::endl;
164 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl;
165 std::cout << " packed stride: " << PackedStride(params) << std::endl;
166 std::cout << " params:" << std::endl;
167 std::cout << " count: " << params.count << std::endl;
168 std::cout << " stride: " << params.stride << std::endl;
169 }
170#endif
171#endif
172};
173
174template <typename InType, int lanes_count, int pack_size, int leftovers>
175class Stream<InType, lanes_count, pack_size, leftovers, RowMajorWithSum> {
176 public:
177 static void Pack(const InType* in, const RowMajorWithSum& params,
178 InType* out) {
179#ifdef DEBUG
180#ifdef DEBUG_METAGEMM_VERBOSE
181 std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")::Pack() -- "
182 << lanes_count << "x" << pack_size << " + " << leftovers
183 << std::endl;
184#endif
185#else
186 if (lanes_count != 0) {
187 std::cerr << "FATAL: RowMajorWithSum::Pack not implemented." << std::endl;
188 std::exit(1);
189 }
190#endif
191 }
192
193 static int UnpackedAdvance(const RowMajorWithSum& params) {
194 return sizeof(InType) * pack_size;
195 }
196
197 static int PackedAdvance(const RowMajorWithSum& params) {
198 return sizeof(InType) * pack_size * lanes_count;
199 }
200
201 static int UnpackedStride(const RowMajorWithSum& params) {
202 return sizeof(InType) * lanes_count * params.stride;
203 }
204
205 static int PackedStride(const RowMajorWithSum& params) {
206 return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
207 AlignTo<pack_size>(params.count));
208 }
209
210 static int Scratch(const RowMajorWithSum& params) {
211 return PackedStride(params);
212 }
213
214#ifdef DEBUG
215#ifdef DEBUG_METAGEMM_VERBOSE
216 static void Debug(const RowMajorWithSum& params) {
217 std::cout << "RowMajorWithSum(" << typeid(InType).name() << ")"
218 << std::endl;
219 std::cout << " dims: " << lanes_count << "x" << pack_size << " + "
220 << leftovers << std::endl;
221 std::cout << " scratch: " << Scratch(params) << std::endl;
222 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl;
223 std::cout << " packed advance: " << PackedAdvance(params) << std::endl;
224 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl;
225 std::cout << " packed stride: " << PackedStride(params) << std::endl;
226 std::cout << " params:" << std::endl;
227 std::cout << " count: " << params.count << std::endl;
228 std::cout << " stride: " << params.stride << std::endl;
229 std::cout << " multiplicative_sum_offset: "
230 << params.multiplicative_sum_offset << std::endl;
231 std::cout << " additive_sum_offset: " << params.additive_sum_offset
232 << std::endl;
233 }
234#endif
235#endif
236};
237
238template <typename InType, int lanes_count, int pack_size, int leftovers>
239class Stream<InType, lanes_count, pack_size, leftovers, ColumnMajorWithSum> {
240 public:
241 static void Pack(const InType* in, const ColumnMajorWithSum& params,
242 InType* out) {
243#ifdef DEBUG
244#ifdef DEBUG_METAGEMM_VERBOSE
245 std::cout << "ColumnMajorWithSum(" << typeid(InType).name()
246 << ")::Pack() -- " << lanes_count << "x" << pack_size << " + "
247 << leftovers << std::endl;
248#endif
249#else
250 if (lanes_count != 0) {
251 std::cerr << "FATAL: ColumnMajorWithSum::Pack not implemented."
252 << std::endl;
253 std::exit(1);
254 }
255#endif
256 }
257
258 static int UnpackedAdvance(const ColumnMajorWithSum& params) {
259 return sizeof(InType) * pack_size * params.stride;
260 }
261
262 static int PackedAdvance(const ColumnMajorWithSum& params) {
263 return sizeof(InType) * pack_size * lanes_count;
264 }
265
266 static int UnpackedStride(const ColumnMajorWithSum& params) {
267 return sizeof(InType) * lanes_count;
268 }
269
270 static int PackedStride(const ColumnMajorWithSum& params) {
271 return 32 + AlignTo<32>(sizeof(InType) * lanes_count *
272 AlignTo<pack_size>(params.count));
273 }
274
275 static int Scratch(const ColumnMajorWithSum& params) {
276 return PackedStride(params);
277 }
278
279#ifdef DEBUG
280#ifdef DEBUG_METAGEMM_VERBOSE
281 static void Debug(const ColumnMajorWithSum& params) {
282 std::cout << "ColumnMajorWithSum(" << typeid(InType).name() << ")"
283 << std::endl;
284 std::cout << " dims: " << lanes_count << "x" << pack_size << " + "
285 << leftovers << std::endl;
286 std::cout << " scratch: " << Scratch(params) << std::endl;
287 std::cout << " unpacked advance: " << UnpackedAdvance(params) << std::endl;
288 std::cout << " packed advance: " << PackedAdvance(params) << std::endl;
289 std::cout << " unpacked stride: " << UnpackedStride(params) << std::endl;
290 std::cout << " packed stride: " << PackedStride(params) << std::endl;
291 std::cout << " params:" << std::endl;
292 std::cout << " count: " << params.count << std::endl;
293 std::cout << " stride: " << params.stride << std::endl;
294 std::cout << " multiplicative_sum_offset: "
295 << params.multiplicative_sum_offset << std::endl;
296 std::cout << " additive_sum_offset: " << params.additive_sum_offset
297 << std::endl;
298 }
299#endif
300#endif
301};
302
303} // namespace meta
304} // namespace gemmlowp
305
306#ifdef GEMMLOWP_NEON_32
307#include "streams_arm_32.h"
308#elif defined(GEMMLOWP_NEON_64)
309#include "streams_arm_64.h"
310#endif
311
312#endif // GEMMLOWP_META_STREAMS_H_
313