1 | /******************************************************************************* |
2 | * Copyright 2021 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #include "common/bfloat16.hpp" |
18 | |
19 | namespace dnnl { |
20 | namespace impl { |
21 | |
22 | bfloat16_t &bfloat16_t::operator=(float f) { |
23 | #if DNNL_CPU_RUNTIME != DNNL_RUNTIME_NONE |
24 | if (try_cvt_float_to_bfloat16(this, &f)) { return *this; } |
25 | #endif |
26 | auto iraw = utils::bit_cast<std::array<uint16_t, 2>>(f); |
27 | switch (std::fpclassify(f)) { |
28 | case FP_SUBNORMAL: |
29 | case FP_ZERO: |
30 | // sign preserving zero (denormal go to zero) |
31 | raw_bits_ = iraw[1]; |
32 | raw_bits_ &= 0x8000; |
33 | break; |
34 | case FP_INFINITE: raw_bits_ = iraw[1]; break; |
35 | case FP_NAN: |
36 | // truncate and set MSB of the mantissa force QNAN |
37 | raw_bits_ = iraw[1]; |
38 | raw_bits_ |= 1 << 6; |
39 | break; |
40 | case FP_NORMAL: |
41 | // round to nearest even and truncate |
42 | const uint32_t rounding_bias = 0x00007FFF + (iraw[1] & 0x1); |
43 | const uint32_t int_raw |
44 | = utils::bit_cast<uint32_t>(f) + rounding_bias; |
45 | iraw = utils::bit_cast<std::array<uint16_t, 2>>(int_raw); |
46 | raw_bits_ = iraw[1]; |
47 | break; |
48 | } |
49 | |
50 | return *this; |
51 | } |
52 | |
53 | bfloat16_t::operator float() const { |
54 | std::array<uint16_t, 2> iraw = {{0, raw_bits_}}; |
55 | return utils::bit_cast<float>(iraw); |
56 | } |
57 | |
58 | } // namespace impl |
59 | } // namespace dnnl |
60 | |