apply_multiplier.cc source code [tensorflow/external/ruy/ruy/apply_multiplier.cc]

1	/ Copyright 2020 Google LLC. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "ruy/apply_multiplier.h"
17
18	#include <cmath>
19	#include <cstdint>
20	#include <cstdlib>
21	#include <limits>
22
23	namespace ruy {
24	namespace detail {
25
26	// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
27	// Warning: this code is not meant to be bit-exact-normative.
28	// Please refer to the class comment of ruy::MulParams, in mul_params.h.
29	// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
30	// Simplified multiplier application function
31	//
32	// Double rounding and symmetric rounding are removed compared to reference.
33	// Double rounding seems unnecessary and can complicate implementations.
34	// Symmetric rounding also adds implementation complexity.
35	//
36	// Composed of a single rounding shift right and can lead to more HW
37	// friendly implementations.
38	//
39	// On NEON this can be translated to a SQDMULH + rounding shift right sequence.
40	// The use of SQDMULH rather than SQRDMULH gives a result that is
41	// equivalent to a single rounded shift since the truncating shift of SQDMULH
42	// can be combined with the rounding right shift via the formula (for k>=1):
43	// ((x>>31)+(1<<(k-1)))>>k = (x + (1<<(30+k))>>(31+k)
44	//
45	// Preconditions:
46	// - quantized_multiplier >= 0
47	// - shift is -31 to +7 (negative for right shift)
48	std::int32_t MultiplyByQuantizedMultiplier(std::int32_t x,
49	std::int32_t quantized_multiplier,
50	int shift) {
51	RUY_CHECK_GE(shift, -`31`);
52
53	int total_shift = `31` - shift;
54
55	std::int64_t x_64(x);
56	std::int64_t quantized_multiplier_64(quantized_multiplier);
57	std::int64_t round = (int64_t)`1` << (total_shift - `1`);
58	int64_t result = x_64 * quantized_multiplier_64 + round;
59	result = result >> total_shift;
60
61	RUY_DCHECK_GE(result, std::numeric_limits<std::int32_t>::lowest());
62	RUY_DCHECK_LE(result, std::numeric_limits<std::int32_t>::max());
63
64	return static_cast<std::int32_t>(result);
65	}
66
67	} // namespace detail
68
69	} // namespace ruy
70

Browse the source code of tensorflow/external/ruy/ruy/apply_multiplier.cc