1/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_
17#define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_
18
19#include <tuple>
20
21#include "tensorflow/lite/kernels/internal/compatibility.h"
22#ifndef TFLITE_WITH_RUY
23
24#include <cstdint>
25#include <type_traits>
26
27#include "public/gemmlowp.h"
28#include "tensorflow/lite/kernels/cpu_backend_context.h"
29#include "tensorflow/lite/kernels/cpu_backend_gemm_params.h"
30#include "tensorflow/lite/kernels/cpu_backend_gemm_ruy.h"
31
32namespace tflite {
33namespace cpu_backend_gemm {
34namespace detail {
35
36template <typename DstScalar>
37struct GemmlowpSaturatingCastStage {};
38
39template <>
40struct GemmlowpSaturatingCastStage<std::uint8_t> {
41 using Type = gemmlowp::OutputStageSaturatingCastToUint8;
42};
43
44template <>
45struct GemmlowpSaturatingCastStage<std::int8_t> {
46 using Type = gemmlowp::OutputStageSaturatingCastToInt8;
47};
48
49template <>
50struct GemmlowpSaturatingCastStage<std::int16_t> {
51 using Type = gemmlowp::OutputStageSaturatingCastToInt16;
52};
53
54template <typename DstScalar>
55struct GemmlowpBitDepthParams {};
56
57template <>
58struct GemmlowpBitDepthParams<std::uint8_t> {
59 using Type = gemmlowp::L8R8WithLhsNonzeroBitDepthParams;
60};
61
62template <>
63struct GemmlowpBitDepthParams<std::int8_t> {
64 using Type = gemmlowp::SignedL8R8WithLhsNonzeroBitDepthParams;
65};
66
67template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
68 typename DstScalar, QuantizationFlavor quantization_flavor>
69struct GemmImplUsingGemmlowp {};
70
71template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
72 typename DstScalar>
73struct GemmImplUsingGemmlowp<
74 LhsScalar, RhsScalar, AccumScalar, DstScalar,
75 QuantizationFlavor::kIntegerWithUniformMultiplier> {
76 static_assert(std::is_same<LhsScalar, RhsScalar>::value, "");
77 static_assert(std::is_same<AccumScalar, std::int32_t>::value, "");
78 using SrcScalar = LhsScalar;
79
80 static void Run(
81 const MatrixParams<SrcScalar>& lhs_params, const SrcScalar* lhs_data,
82 const MatrixParams<SrcScalar>& rhs_params, const SrcScalar* rhs_data,
83 const MatrixParams<DstScalar>& dst_params, DstScalar* dst_data,
84 const GemmParams<std::int32_t, DstScalar,
85 QuantizationFlavor::kIntegerWithUniformMultiplier>&
86 params,
87 CpuBackendContext* context) {
88 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::RowMajor>
89 gemmlowp_lhs(lhs_data, lhs_params.rows, lhs_params.cols);
90 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::ColMajor>
91 gemmlowp_rhs(rhs_data, rhs_params.rows, rhs_params.cols);
92 gemmlowp::MatrixMap<DstScalar, gemmlowp::MapOrder::ColMajor> gemmlowp_dst(
93 dst_data, dst_params.rows, dst_params.cols);
94
95 using ColVectorMap =
96 gemmlowp::VectorMap<const int32, gemmlowp::VectorShape::Col>;
97 gemmlowp::OutputStageScaleInt32ByFixedPointAndExponent scale_stage;
98 scale_stage.result_offset_after_shift = dst_params.zero_point;
99 scale_stage.result_fixedpoint_multiplier = params.multiplier_fixedpoint;
100 scale_stage.result_exponent = params.multiplier_exponent;
101 using SaturatingCastStageType =
102 typename GemmlowpSaturatingCastStage<DstScalar>::Type;
103 gemmlowp::OutputStageClamp clamp_stage;
104 clamp_stage.min = params.clamp_min;
105 clamp_stage.max = params.clamp_max;
106 SaturatingCastStageType saturating_cast_stage;
107 using BitDepthParams = typename GemmlowpBitDepthParams<SrcScalar>::Type;
108 if (params.bias) {
109 ColVectorMap bias_vector(params.bias, lhs_params.rows);
110 gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
111 bias_addition_stage.bias_vector = bias_vector;
112 auto output_pipeline = std::make_tuple(
113 bias_addition_stage, scale_stage, clamp_stage, saturating_cast_stage);
114 gemmlowp::GemmWithOutputPipeline<SrcScalar, DstScalar, BitDepthParams>(
115 context->gemmlowp_context(), gemmlowp_lhs, gemmlowp_rhs,
116 &gemmlowp_dst, -lhs_params.zero_point, -rhs_params.zero_point,
117 output_pipeline);
118 } else {
119 auto output_pipeline =
120 std::make_tuple(scale_stage, clamp_stage, saturating_cast_stage);
121 gemmlowp::GemmWithOutputPipeline<SrcScalar, DstScalar, BitDepthParams>(
122 context->gemmlowp_context(), gemmlowp_lhs, gemmlowp_rhs,
123 &gemmlowp_dst, -lhs_params.zero_point, -rhs_params.zero_point,
124 output_pipeline);
125 }
126 }
127};
128
129template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
130 typename DstScalar>
131struct GemmImplUsingGemmlowp<LhsScalar, RhsScalar, AccumScalar, DstScalar,
132 QuantizationFlavor::kIntegerWithPerRowMultiplier> {
133 static_assert(std::is_same<LhsScalar, RhsScalar>::value, "");
134 static_assert(std::is_same<AccumScalar, std::int32_t>::value, "");
135 using SrcScalar = LhsScalar;
136
137 static void Run(
138 const MatrixParams<SrcScalar>& lhs_params, const SrcScalar* lhs_data,
139 const MatrixParams<SrcScalar>& rhs_params, const SrcScalar* rhs_data,
140 const MatrixParams<DstScalar>& dst_params, DstScalar* dst_data,
141 const GemmParams<std::int32_t, DstScalar,
142 QuantizationFlavor::kIntegerWithPerRowMultiplier>&
143 params,
144 CpuBackendContext* context) {
145 // gemmlowp support for this per-channel path is limited to NEON.
146 // We fall back to ruy outside of NEON.
147#ifdef GEMMLOWP_NEON
148 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::RowMajor>
149 gemmlowp_lhs(lhs_data, lhs_params.rows, lhs_params.cols);
150 gemmlowp::MatrixMap<const SrcScalar, gemmlowp::MapOrder::ColMajor>
151 gemmlowp_rhs(rhs_data, rhs_params.rows, rhs_params.cols);
152 gemmlowp::MatrixMap<DstScalar, gemmlowp::MapOrder::ColMajor> gemmlowp_dst(
153 dst_data, dst_params.rows, dst_params.cols);
154
155 using ColVectorMap =
156 gemmlowp::VectorMap<const int32, gemmlowp::VectorShape::Col>;
157 ColVectorMap bias_vector(params.bias, lhs_params.rows);
158 gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
159 bias_addition_stage.bias_vector = bias_vector;
160 gemmlowp::OutputStageScaleInt32ByFixedPointAndExponentPC<
161 gemmlowp::VectorShape::Col>
162 scale_stage;
163 scale_stage.result_offset_after_shift = dst_params.zero_point;
164 scale_stage.result_fixedpoint_multiplier =
165 ColVectorMap(params.multiplier_fixedpoint_perchannel, dst_params.rows);
166 scale_stage.result_exponent =
167 ColVectorMap(params.multiplier_exponent_perchannel, dst_params.rows);
168 using SaturatingCastStageType =
169 typename GemmlowpSaturatingCastStage<DstScalar>::Type;
170 gemmlowp::OutputStageClamp clamp_stage;
171 clamp_stage.min = params.clamp_min;
172 clamp_stage.max = params.clamp_max;
173 SaturatingCastStageType saturating_cast_stage;
174 auto output_pipeline = std::make_tuple(bias_addition_stage, scale_stage,
175 clamp_stage, saturating_cast_stage);
176 using BitDepthParams = typename GemmlowpBitDepthParams<SrcScalar>::Type;
177 gemmlowp::GemmWithOutputPipeline<SrcScalar, DstScalar, BitDepthParams>(
178 context->gemmlowp_context(), gemmlowp_lhs, gemmlowp_rhs, &gemmlowp_dst,
179 -lhs_params.zero_point, -rhs_params.zero_point, output_pipeline);
180#else
181 GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar, DstScalar,
182 QuantizationFlavor::kIntegerWithPerRowMultiplier>::
183 Run(lhs_params, lhs_data, rhs_params, rhs_data, dst_params, dst_data,
184 params, context);
185#endif
186 }
187};
188
189} // namespace detail
190} // namespace cpu_backend_gemm
191} // namespace tflite
192
193#endif // not TFLITE_WITH_RUY
194
195#endif // TENSORFLOW_LITE_KERNELS_CPU_BACKEND_GEMM_GEMMLOWP_H_
196