1 | /** |
2 | * Copyright 2021 Alibaba, Inc. and its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | |
16 | * \author Hechong.xyf |
17 | * \date Dec 2019 |
18 | * \brief Interface of AiLego Utility Float Helper |
19 | */ |
20 | |
21 | #ifndef __AILEGO_UTILITY_FLOAT_HELPER_H__ |
22 | #define __AILEGO_UTILITY_FLOAT_HELPER_H__ |
23 | |
24 | #include <ailego/internal/platform.h> |
25 | |
26 | namespace ailego { |
27 | |
28 | /*! Float Helper |
29 | */ |
30 | struct FloatHelper { |
31 | //! Convert FP16 to FP32 |
32 | static float ToFP32(uint16_t val); |
33 | |
34 | //! Convert FP16 to FP32 (array) |
35 | static void ToFP32(const uint16_t *arr, size_t size, float *out); |
36 | |
37 | //! Convert FP16 to FP32 with normalization (array) |
38 | static void ToFP32(const uint16_t *arr, size_t size, float norm, float *out); |
39 | |
40 | //! Convert FP32 to FP16 |
41 | static uint16_t ToFP16(float val); |
42 | |
43 | //! Convert FP32 to FP16 (array) |
44 | static void ToFP16(const float *arr, size_t size, uint16_t *out); |
45 | |
46 | //! Convert FP32 to FP16 with normalization (array) |
47 | static void ToFP16(const float *arr, size_t size, float norm, uint16_t *out); |
48 | |
49 | //! Convert FP16 to FP32 with normalization |
50 | static inline float ToFP32(uint16_t val, float norm) { |
51 | return (FloatHelper::ToFP32(val) / norm); |
52 | } |
53 | |
54 | //! Convert FP32 to FP16 with normalization |
55 | static inline uint16_t ToFP16(float val, float norm) { |
56 | return FloatHelper::ToFP16(val / norm); |
57 | } |
58 | }; |
59 | |
60 | #if !defined(__aarch64__) |
61 | /*! Half-Precision Floating Point |
62 | */ |
63 | class Float16 { |
64 | public: |
65 | //! Constructor |
66 | Float16(void) : value_(0) {} |
67 | |
68 | //! Constructor |
69 | Float16(float val) : value_(FloatHelper::ToFP16(val)) {} |
70 | |
71 | //! Constructor |
72 | Float16(double val) : value_(FloatHelper::ToFP16(static_cast<float>(val))) {} |
73 | |
74 | //! Assigment |
75 | Float16 &operator=(float val) { |
76 | this->value_ = FloatHelper::ToFP16(val); |
77 | return *this; |
78 | } |
79 | |
80 | //! Assigment |
81 | Float16 &operator+=(float val) { |
82 | this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) + val); |
83 | return *this; |
84 | } |
85 | |
86 | //! Assigment |
87 | Float16 &operator-=(float val) { |
88 | this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) - val); |
89 | return *this; |
90 | } |
91 | |
92 | //! Assigment |
93 | Float16 &operator*=(float val) { |
94 | this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) * val); |
95 | return *this; |
96 | } |
97 | |
98 | //! Assigment |
99 | Float16 &operator/=(float val) { |
100 | this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) / val); |
101 | return *this; |
102 | } |
103 | |
104 | //! Retrieve value in FP32 |
105 | operator float() const { |
106 | return FloatHelper::ToFP32(this->value_); |
107 | } |
108 | |
109 | //! Equal operator |
110 | bool operator==(const Float16 &rhs) const { |
111 | return this->value_ == rhs.value_; |
112 | } |
113 | |
114 | //! No equal operator |
115 | bool operator!=(const Float16 &rhs) const { |
116 | return this->value_ != rhs.value_; |
117 | } |
118 | |
119 | //! Less than operator |
120 | bool operator<(const Float16 &rhs) const { |
121 | return FloatHelper::ToFP32(this->value_) < FloatHelper::ToFP32(rhs.value_); |
122 | } |
123 | |
124 | //! Less than or equal operator |
125 | bool operator<=(const Float16 &rhs) const { |
126 | return FloatHelper::ToFP32(this->value_) <= FloatHelper::ToFP32(rhs.value_); |
127 | } |
128 | |
129 | //! Greater than operator |
130 | bool operator>(const Float16 &rhs) const { |
131 | return FloatHelper::ToFP32(this->value_) > FloatHelper::ToFP32(rhs.value_); |
132 | } |
133 | |
134 | //! Greater than or equal operator |
135 | bool operator>=(const Float16 &rhs) const { |
136 | return FloatHelper::ToFP32(this->value_) >= FloatHelper::ToFP32(rhs.value_); |
137 | } |
138 | |
139 | //! Calculate the absolute value |
140 | static inline Float16 Absolute(const Float16 &x) { |
141 | Float16 abs; |
142 | abs.value_ = static_cast<uint16_t>(x.value_ & 0x7fff); |
143 | return abs; |
144 | } |
145 | |
146 | private: |
147 | uint16_t value_; |
148 | }; |
149 | #else |
150 | /*! Half-Precision Floating Point |
151 | */ |
152 | class Float16 { |
153 | public: |
154 | //! Constructor |
155 | Float16(void) : value_(0) {} |
156 | |
157 | //! Constructor |
158 | Float16(__fp16 val) : value_(val) {} |
159 | |
160 | //! Assigment |
161 | Float16 &operator=(__fp16 val) { |
162 | this->value_ = val; |
163 | return *this; |
164 | } |
165 | |
166 | //! Assigment |
167 | Float16 &operator+=(__fp16 val) { |
168 | this->value_ = this->value_ + val; |
169 | return *this; |
170 | } |
171 | |
172 | //! Assigment |
173 | Float16 &operator-=(__fp16 val) { |
174 | this->value_ = this->value_ - val; |
175 | return *this; |
176 | } |
177 | |
178 | //! Assigment |
179 | Float16 &operator*=(__fp16 val) { |
180 | this->value_ = this->value_ * val; |
181 | return *this; |
182 | } |
183 | |
184 | //! Assigment |
185 | Float16 &operator/=(__fp16 val) { |
186 | this->value_ = this->value_ / val; |
187 | return *this; |
188 | } |
189 | |
190 | //! Retrieve value in FP16 |
191 | operator __fp16() const { |
192 | return this->value_; |
193 | } |
194 | |
195 | //! Equal operator |
196 | bool operator==(const Float16 &rhs) const { |
197 | return this->value_ == rhs.value_; |
198 | } |
199 | |
200 | //! No equal operator |
201 | bool operator!=(const Float16 &rhs) const { |
202 | return this->value_ != rhs.value_; |
203 | } |
204 | |
205 | //! Less than operator |
206 | bool operator<(const Float16 &rhs) const { |
207 | return this->value_ < rhs.value_; |
208 | } |
209 | |
210 | //! Less than or equal operator |
211 | bool operator<=(const Float16 &rhs) const { |
212 | return this->value_ <= rhs.value_; |
213 | } |
214 | |
215 | //! Greater than operator |
216 | bool operator>(const Float16 &rhs) const { |
217 | return this->value_ > rhs.value_; |
218 | } |
219 | |
220 | //! Greater than or equal operator |
221 | bool operator>=(const Float16 &rhs) const { |
222 | return this->value_ >= rhs.value_; |
223 | } |
224 | |
225 | //! Calculate the absolute value |
226 | static inline Float16 Absolute(const Float16 &x) { |
227 | Float16 abs(x.value_); |
228 | uint16_t *p = reinterpret_cast<uint16_t *>(&abs.value_); |
229 | *p &= 0x7fff; |
230 | return abs; |
231 | } |
232 | |
233 | private: |
234 | __fp16 value_; |
235 | }; |
236 | #endif |
237 | |
238 | // Check size of Float16 |
239 | static_assert(sizeof(Float16) == 2, "Float16 must be aligned with 2 bytes" ); |
240 | |
241 | } // namespace ailego |
242 | |
243 | #endif // __AILEGO_UTILITY_FLOAT_HELPER_H__ |
244 | |