1 | /* Copyright 2019 Google LLC. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef RUY_RUY_TRMUL_PARAMS_H_ |
17 | #define RUY_RUY_TRMUL_PARAMS_H_ |
18 | |
19 | #include <cstdint> |
20 | |
21 | #include "ruy/mat.h" |
22 | #include "ruy/mul_params.h" |
23 | #include "ruy/path.h" |
24 | #include "ruy/side_pair.h" |
25 | #include "ruy/tune.h" |
26 | |
27 | namespace ruy { |
28 | |
29 | using RunKernelFn = void(Tuning, const SidePair<PEMat>&, const void*, |
30 | const SidePair<int>&, const SidePair<int>&, EMat*); |
31 | |
32 | using RunPackFn = void(Tuning, const EMat&, PEMat*, int, int); |
33 | |
34 | // This should not be needed since we require c++14, where std::max is already |
35 | // constexpr, but TensorFlow continuous integration uses Ubuntu 16 with a |
36 | // libstdc++ that does not support that. |
37 | constexpr int constexpr_max(int a, int b) { return a > b ? a : b; } |
38 | |
39 | // Under-estimating these values would be caught by a static_assert in |
40 | // StoreMulParams. Over-estimating these values cannot easily be caught, and |
41 | // would cause unnecessary inflation of the TrMulParams data structure. |
42 | constexpr int kMaxMulParamsAlignment = |
43 | constexpr_max(alignof(void*), alignof(double)); |
44 | constexpr int kMaxMulParamsSizeFloatingPointCase = |
45 | sizeof(MulParams<double, double>); |
46 | constexpr int kMaxMulParamsSizeRawIntegerCase = |
47 | sizeof(MulParams<std::int32_t, std::int32_t>); |
48 | constexpr int kMaxMulParamsSizeQuantizedIntegerCase = |
49 | sizeof(MulParams<std::int32_t, std::int16_t>); |
50 | constexpr int kMaxMulParamsSize = |
51 | constexpr_max(kMaxMulParamsSizeFloatingPointCase, |
52 | constexpr_max(kMaxMulParamsSizeRawIntegerCase, |
53 | kMaxMulParamsSizeQuantizedIntegerCase)); |
54 | |
55 | // OK to adjust as needed, but we want to avoid unnecessarily inflating that. |
56 | // Temporarily bumped from 32 to 48 as part of temporarily not using unions |
57 | // in MulParams. |
58 | static_assert(kMaxMulParamsSize <= 48, "" ); |
59 | |
60 | // Type-erased data needed for implementing TrMul. |
61 | struct TrMulParams { |
62 | TrMulParams() : run_pack{nullptr, nullptr}, is_prepacked{false, false} {} |
63 | // Helper functions for invoking the function pointers. |
64 | void RunPack(Side side, Tuning tuning, int start, int end) { |
65 | run_pack[side](tuning, src[side], &packed_matrix[side], start, end); |
66 | } |
67 | void RunKernel(Tuning tuning, const SidePair<int>& start, |
68 | const SidePair<int>& end) { |
69 | run_kernel(tuning, packed_matrix, mul_params_bytes, start, end, &dst); |
70 | } |
71 | |
72 | // path id, can be useful info for some fine-tuning, e.g. to guess reasonable |
73 | // cache sizes when not runtime-detectable. |
74 | Path path; |
75 | |
76 | // Function pointers to type-erased entry points for kernels and packers. |
77 | SidePair<RunPackFn*> run_pack; |
78 | RunKernelFn* run_kernel = nullptr; |
79 | |
80 | // Matrices and packed matrices. |
81 | SidePair<EMat> src; |
82 | EMat dst; |
83 | SidePair<PEMat> packed_matrix; |
84 | SidePair<bool> is_prepacked; |
85 | |
86 | // Bytes underlying the MulParams, used as type-erased storage for MulParams |
87 | // data as it isn't used until we reach the kernel code, where it is casted |
88 | // back to the original MulParams type. |
89 | alignas(kMaxMulParamsAlignment) char mul_params_bytes[kMaxMulParamsSize]; |
90 | }; |
91 | |
92 | } // namespace ruy |
93 | |
94 | #endif // RUY_RUY_TRMUL_PARAMS_H_ |
95 | |