1 | // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | // kernel_default.h: Chooses default GEMM and GEMV kernels for the |
16 | // host platform. |
17 | |
18 | #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ |
19 | #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ |
20 | |
21 | #include "../public/bit_depth.h" |
22 | #include "common.h" |
23 | #include "kernel.h" |
24 | #include "kernel_reference.h" |
25 | |
26 | namespace gemmlowp { |
27 | |
28 | template <bool MaxProductIsLessThan4096, bool IsUnsigned, bool LhsNonZero> |
29 | struct DefaultKernelImpl {}; |
30 | |
31 | // Partial specialization implementing the logic that if we want to use |
32 | // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we |
33 | // fall back to a generic kernel not taking advantage of |
34 | // MaxProductIsLessThan4096. |
35 | template <bool LhsNonZero> |
36 | struct DefaultKernelImpl<true, true, LhsNonZero> |
37 | : DefaultKernelImpl<false, true, LhsNonZero> {}; |
38 | |
39 | // Partial specialization implementing the logic that if we want to use |
40 | // a kernel for LhsNonZero but do not have such a kernel, then we fall |
41 | // back to a generic kernel not taking advantage of LhsNonZero. |
42 | template <bool MaxProductIsLessThan4096> |
43 | struct DefaultKernelImpl<MaxProductIsLessThan4096, true, true> |
44 | : DefaultKernelImpl<MaxProductIsLessThan4096, true, false> {}; |
45 | |
46 | template <typename BitDepthParams> |
47 | struct DefaultKernel |
48 | : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue * |
49 | BitDepthParams::RhsRange::kMaxValue < |
50 | 4096), |
51 | (BitDepthParams::LhsRange::kMinValue >= 0), |
52 | (BitDepthParams::LhsRange::kMinValue > 0 || |
53 | (BitDepthParams::LhsRange::kMaxValue <= 127 && |
54 | BitDepthParams::LhsRange::kMinValue > -128))> {}; |
55 | |
56 | } // end namespace gemmlowp |
57 | |
58 | #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, IsUnsigned, \ |
59 | LhsAlwaysNonZero, Kernel) \ |
60 | namespace gemmlowp { \ |
61 | template <> \ |
62 | struct DefaultKernelImpl<MaxProductIsLessThan4096, IsUnsigned, \ |
63 | LhsAlwaysNonZero> : Kernel {}; \ |
64 | } |
65 | |
66 | // User-provided int8 inputs is only supported in the NEON path currently. |
67 | #if defined GEMMLOWP_NEON_32 |
68 | #include "kernel_neon.h" |
69 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_32_Kernel12x4Depth2) |
70 | GEMMLOWP_SET_DEFAULT_KERNEL(true, true, false, |
71 | NEON_32_Kernel12x4Depth2Assuming12BitProducts) |
72 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, |
73 | NEON_32bit_GEMM_Int8Operands_LhsNonzero) |
74 | GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true, |
75 | NEON_32bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs) |
76 | #elif defined GEMMLOWP_NEON_64 |
77 | #include "kernel_neon.h" |
78 | #if defined GEMMLOWP_DOTPROD_KERNEL |
79 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, |
80 | NEON_64_Kernel12x8Depth4_dotprod) |
81 | #else |
82 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_64_Kernel12x8Depth2) |
83 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, |
84 | NEON_64bit_GEMM_Int8Operands_LhsNonzero) |
85 | #endif |
86 | GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true, |
87 | NEON_64bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs) |
88 | #elif defined(GEMMLOWP_MSA) |
89 | #include "kernel_msa.h" |
90 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, MSA_Kernel12x8Depth2) |
91 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, MSA_GEMM_Int8Operands_LhsNonzero) |
92 | #elif defined GEMMLOWP_SSE4_32 |
93 | #include "kernel_sse.h" |
94 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_32_Kernel4x4Depth2) |
95 | #elif defined GEMMLOWP_SSE4_64 |
96 | #include "kernel_sse.h" |
97 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_64_Kernel12x4Depth2) |
98 | #elif defined GEMMLOWP_AVX2_64 |
99 | #include "kernel_avx.h" |
100 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, AVX2_64_Kernel24x8Depth2) |
101 | #else |
102 | #include "kernel_reference.h" |
103 | namespace gemmlowp { |
104 | typedef ReferenceKernel<KernelFormat< |
105 | KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>, |
106 | KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > > |
107 | DefaultReferenceKernel; |
108 | } |
109 | GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, DefaultReferenceKernel) |
110 | #endif |
111 | |
112 | #endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ |
113 | |