1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #ifndef GLOW_SUPPORT_FLOAT16_H |
17 | #define GLOW_SUPPORT_FLOAT16_H |
18 | |
19 | #include "fp16.h" |
20 | |
21 | #include <cstdint> |
22 | #include <iostream> |
23 | |
24 | namespace glow { |
25 | |
26 | /// Smallest allowed scale in FP16 (at least for NNPI) |
27 | constexpr float kMinScaleFP16 = 1.f / 65504.f; |
28 | /// Minimum and maximum values for FP16: |
29 | constexpr float kMinFP16 = -65504.0f; |
30 | constexpr float kMaxFP16 = 65504.0f; |
31 | |
32 | /// Use a proxy type in case we need to change it in the future. |
33 | using Float16Storage = uint16_t; |
34 | class float16 { |
35 | Float16Storage data_; |
36 | |
37 | public: |
38 | float16(float data = 0.0) { data_ = fp16_ieee_from_fp32_value(data); } |
39 | |
40 | /// Arithmetic operators. |
41 | float16 operator*(const float16 &b) const { |
42 | return float16(operator float() * float(b)); |
43 | } |
44 | float16 operator/(const float16 &b) const { |
45 | return float16(operator float() / float(b)); |
46 | } |
47 | float16 operator+(const float16 &b) const { |
48 | return float16(operator float() + float(b)); |
49 | } |
50 | float16 operator-(const float16 &b) const { |
51 | return float16(operator float() - float(b)); |
52 | } |
53 | float16 operator+=(const float16 &b) { |
54 | *this = *this + b; |
55 | return *this; |
56 | } |
57 | float16 operator-=(const float16 &b) { |
58 | *this = *this - b; |
59 | return *this; |
60 | } |
61 | float16 operator*=(const float16 &b) { |
62 | *this = *this * b; |
63 | return *this; |
64 | } |
65 | float16 operator/=(const float16 &b) { |
66 | *this = *this / b; |
67 | return *this; |
68 | } |
69 | |
70 | /// Comparisons. |
71 | bool operator<(const float16 &b) const { return operator float() < float(b); } |
72 | bool operator>(const float16 &b) const { return operator float() > float(b); } |
73 | bool operator==(const float16 &b) const { |
74 | return operator float() == float(b); |
75 | } |
76 | bool operator>=(const float16 &b) const { return !(operator<(b)); } |
77 | bool operator<=(const float16 &b) const { return !(operator>(b)); } |
78 | |
79 | /// Cast operators. |
80 | operator double() const { return double(operator float()); } |
81 | operator float() const { return fp16_ieee_to_fp32_value(data_); } |
82 | operator int64_t() const { |
83 | return static_cast<int64_t>(fp16_ieee_to_fp32_value(data_)); |
84 | } |
85 | operator int32_t() const { |
86 | return static_cast<int32_t>(fp16_ieee_to_fp32_value(data_)); |
87 | } |
88 | }; // End class float16. |
89 | |
90 | /// Allow float16_t to be passed to an ostream. |
91 | inline std::ostream &operator<<(std::ostream &os, const float16 &b) { |
92 | os << float(b); |
93 | return os; |
94 | } |
95 | |
96 | } // End namespace glow. |
97 | |
98 | #endif // GLOW_SUPPORT_FLOAT16_H |
99 | |