1// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// kernel_default.h: Chooses default GEMM and GEMV kernels for the
16// host platform.
17
18#ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
19#define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
20
21#include "../public/bit_depth.h"
22#include "common.h"
23#include "kernel.h"
24#include "kernel_reference.h"
25
26namespace gemmlowp {
27
28template <bool MaxProductIsLessThan4096, bool IsUnsigned, bool LhsNonZero>
29struct DefaultKernelImpl {};
30
31// Partial specialization implementing the logic that if we want to use
32// a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we
33// fall back to a generic kernel not taking advantage of
34// MaxProductIsLessThan4096.
35template <bool LhsNonZero>
36struct DefaultKernelImpl<true, true, LhsNonZero>
37 : DefaultKernelImpl<false, true, LhsNonZero> {};
38
39// Partial specialization implementing the logic that if we want to use
40// a kernel for LhsNonZero but do not have such a kernel, then we fall
41// back to a generic kernel not taking advantage of LhsNonZero.
42template <bool MaxProductIsLessThan4096>
43struct DefaultKernelImpl<MaxProductIsLessThan4096, true, true>
44 : DefaultKernelImpl<MaxProductIsLessThan4096, true, false> {};
45
46template <typename BitDepthParams>
47struct DefaultKernel
48 : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue *
49 BitDepthParams::RhsRange::kMaxValue <
50 4096),
51 (BitDepthParams::LhsRange::kMinValue >= 0),
52 (BitDepthParams::LhsRange::kMinValue > 0 ||
53 (BitDepthParams::LhsRange::kMaxValue <= 127 &&
54 BitDepthParams::LhsRange::kMinValue > -128))> {};
55
56} // end namespace gemmlowp
57
58#define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, IsUnsigned, \
59 LhsAlwaysNonZero, Kernel) \
60 namespace gemmlowp { \
61 template <> \
62 struct DefaultKernelImpl<MaxProductIsLessThan4096, IsUnsigned, \
63 LhsAlwaysNonZero> : Kernel {}; \
64 }
65
66// User-provided int8 inputs is only supported in the NEON path currently.
67#if defined GEMMLOWP_NEON_32
68#include "kernel_neon.h"
69GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_32_Kernel12x4Depth2)
70GEMMLOWP_SET_DEFAULT_KERNEL(true, true, false,
71 NEON_32_Kernel12x4Depth2Assuming12BitProducts)
72GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true,
73 NEON_32bit_GEMM_Int8Operands_LhsNonzero)
74GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true,
75 NEON_32bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
76#elif defined GEMMLOWP_NEON_64
77#include "kernel_neon.h"
78#if defined GEMMLOWP_DOTPROD_KERNEL
79GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false,
80 NEON_64_Kernel12x8Depth4_dotprod)
81#else
82GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, NEON_64_Kernel12x8Depth2)
83GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true,
84 NEON_64bit_GEMM_Int8Operands_LhsNonzero)
85#endif
86GEMMLOWP_SET_DEFAULT_KERNEL(false, false, true,
87 NEON_64bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
88#elif defined(GEMMLOWP_MSA)
89#include "kernel_msa.h"
90GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, MSA_Kernel12x8Depth2)
91GEMMLOWP_SET_DEFAULT_KERNEL(false, true, true, MSA_GEMM_Int8Operands_LhsNonzero)
92#elif defined GEMMLOWP_SSE4_32
93#include "kernel_sse.h"
94GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_32_Kernel4x4Depth2)
95#elif defined GEMMLOWP_SSE4_64
96#include "kernel_sse.h"
97GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, SSE4_64_Kernel12x4Depth2)
98#elif defined GEMMLOWP_AVX2_64
99#include "kernel_avx.h"
100GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, AVX2_64_Kernel24x8Depth2)
101#else
102#include "kernel_reference.h"
103namespace gemmlowp {
104typedef ReferenceKernel<KernelFormat<
105 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>,
106 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > >
107 DefaultReferenceKernel;
108}
109GEMMLOWP_SET_DEFAULT_KERNEL(false, true, false, DefaultReferenceKernel)
110#endif
111
112#endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
113