1/*******************************************************************************
2* Copyright 2021 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#include "common/bfloat16.hpp"
18
19namespace dnnl {
20namespace impl {
21
22bfloat16_t &bfloat16_t::operator=(float f) {
23#if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE
24 if (try_cvt_float_to_bfloat16(this, &f)) { return *this; }
25#endif
26 auto iraw = utils::bit_cast<std::array<uint16_t, 2>>(f);
27 switch (std::fpclassify(f)) {
28 case FP_SUBNORMAL:
29 case FP_ZERO:
30 // sign preserving zero (denormal go to zero)
31 raw_bits_ = iraw[1];
32 raw_bits_ &= 0x8000;
33 break;
34 case FP_INFINITE: raw_bits_ = iraw[1]; break;
35 case FP_NAN:
36 // truncate and set MSB of the mantissa force QNAN
37 raw_bits_ = iraw[1];
38 raw_bits_ |= 1 << 6;
39 break;
40 case FP_NORMAL:
41 // round to nearest even and truncate
42 const uint32_t rounding_bias = 0x00007FFF + (iraw[1] & 0x1);
43 const uint32_t int_raw
44 = utils::bit_cast<uint32_t>(f) + rounding_bias;
45 iraw = utils::bit_cast<std::array<uint16_t, 2>>(int_raw);
46 raw_bits_ = iraw[1];
47 break;
48 }
49
50 return *this;
51}
52
53bfloat16_t::operator float() const {
54 std::array<uint16_t, 2> iraw = {{0, raw_bits_}};
55 return utils::bit_cast<float>(iraw);
56}
57
58} // namespace impl
59} // namespace dnnl
60