1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef GLOW_SUPPORT_FLOAT16_H
17#define GLOW_SUPPORT_FLOAT16_H
18
19#include "fp16.h"
20
21#include <cstdint>
22#include <iostream>
23
24namespace glow {
25
26/// Smallest allowed scale in FP16 (at least for NNPI)
27constexpr float kMinScaleFP16 = 1.f / 65504.f;
28/// Minimum and maximum values for FP16:
29constexpr float kMinFP16 = -65504.0f;
30constexpr float kMaxFP16 = 65504.0f;
31
32/// Use a proxy type in case we need to change it in the future.
33using Float16Storage = uint16_t;
34class float16 {
35 Float16Storage data_;
36
37public:
38 float16(float data = 0.0) { data_ = fp16_ieee_from_fp32_value(data); }
39
40 /// Arithmetic operators.
41 float16 operator*(const float16 &b) const {
42 return float16(operator float() * float(b));
43 }
44 float16 operator/(const float16 &b) const {
45 return float16(operator float() / float(b));
46 }
47 float16 operator+(const float16 &b) const {
48 return float16(operator float() + float(b));
49 }
50 float16 operator-(const float16 &b) const {
51 return float16(operator float() - float(b));
52 }
53 float16 operator+=(const float16 &b) {
54 *this = *this + b;
55 return *this;
56 }
57 float16 operator-=(const float16 &b) {
58 *this = *this - b;
59 return *this;
60 }
61 float16 operator*=(const float16 &b) {
62 *this = *this * b;
63 return *this;
64 }
65 float16 operator/=(const float16 &b) {
66 *this = *this / b;
67 return *this;
68 }
69
70 /// Comparisons.
71 bool operator<(const float16 &b) const { return operator float() < float(b); }
72 bool operator>(const float16 &b) const { return operator float() > float(b); }
73 bool operator==(const float16 &b) const {
74 return operator float() == float(b);
75 }
76 bool operator>=(const float16 &b) const { return !(operator<(b)); }
77 bool operator<=(const float16 &b) const { return !(operator>(b)); }
78
79 /// Cast operators.
80 operator double() const { return double(operator float()); }
81 operator float() const { return fp16_ieee_to_fp32_value(data_); }
82 operator int64_t() const {
83 return static_cast<int64_t>(fp16_ieee_to_fp32_value(data_));
84 }
85 operator int32_t() const {
86 return static_cast<int32_t>(fp16_ieee_to_fp32_value(data_));
87 }
88}; // End class float16.
89
90/// Allow float16_t to be passed to an ostream.
91inline std::ostream &operator<<(std::ostream &os, const float16 &b) {
92 os << float(b);
93 return os;
94}
95
96} // End namespace glow.
97
98#endif // GLOW_SUPPORT_FLOAT16_H
99