bfloat16.cpp source code [oneDNN/src/common/bfloat16.cpp]

1	/*******************************************************************************
2	* Copyright 2021 Intel Corporation
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*******************************************************************************/
16
17	#include "common/bfloat16.hpp"
18
19	namespace dnnl {
20	namespace impl {
21
22	bfloat16_t &bfloat16_t::operator=(float f) {
23	#if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE
24	if (try_cvt_float_to_bfloat16(this, &f)) { return *this; }
25	#endif
26	auto iraw = utils::bit_cast<std::array<uint16_t, `2`>>(f);
27	switch (std::fpclassify(f)) {
28	case FP_SUBNORMAL:
29	case FP_ZERO:
30	// sign preserving zero (denormal go to zero)
31	raw_bits_ = iraw[`1`];
32	raw_bits_ &= `0x8000`;
33	break;
34	case FP_INFINITE: raw_bits_ = iraw[`1`]; break;
35	case FP_NAN:
36	// truncate and set MSB of the mantissa force QNAN
37	raw_bits_ = iraw[`1`];
38	raw_bits_ \|= `1` << `6`;
39	break;
40	case FP_NORMAL:
41	// round to nearest even and truncate
42	const uint32_t rounding_bias = `0x00007FFF` + (iraw[`1`] & `0x1`);
43	const uint32_t int_raw
44	= utils::bit_cast<uint32_t>(f) + rounding_bias;
45	iraw = utils::bit_cast<std::array<uint16_t, `2`>>(int_raw);
46	raw_bits_ = iraw[`1`];
47	break;
48	}
49
50	return *this;
51	}
52
53	bfloat16_t::operator float() const {
54	std::array<uint16_t, `2`> iraw = {{`0`, raw_bits_}};
55	return utils::bit_cast<float>(iraw);
56	}
57
58	} // namespace impl
59	} // namespace dnnl
60

Browse the source code of oneDNN/src/common/bfloat16.cpp