1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_FRAMEWORK_BFLOAT16_H_ |
17 | #define TENSORFLOW_CORE_FRAMEWORK_BFLOAT16_H_ |
18 | |
19 | #include "tensorflow/core/framework/numeric_types.h" |
20 | #include "tensorflow/core/platform/types.h" |
21 | |
22 | // Compact 16-bit encoding of floating point numbers. This representation uses |
23 | // 1 bit for the sign, 8 bits for the exponent and 7 bits for the mantissa. It |
24 | // is assumed that floats are in IEEE 754 format so the representation is just |
25 | // bits 16-31 of a single precision float. |
26 | // |
27 | // NOTE: The IEEE floating point standard defines a float16 format that |
28 | // is different than this format (it has fewer bits of exponent and more |
29 | // bits of mantissa). We don't use that format here because conversion |
30 | // to/from 32-bit floats is more complex for that format, and the |
31 | // conversion for this format is very simple. |
32 | // |
33 | // Because of the existing IEEE float16 type, we do not name our representation |
34 | // "float16" but just use "uint16". |
35 | // |
36 | // <-----our 16bits float-------> |
37 | // s e e e e e e e e f f f f f f f f f f f f f f f f f f f f f f f |
38 | // <------------------------------float--------------------------> |
39 | // 3 3 2 2 1 1 0 |
40 | // 1 0 3 2 5 4 0 |
41 | // |
42 | // |
43 | // This type only supports conversion back and forth with float. |
44 | // |
45 | // This file must be compilable by nvcc. |
46 | // |
47 | // The type is defined in framework/numeric_types.h. |
48 | |
49 | namespace tensorflow { |
50 | |
51 | // Convert from float to bfloat16 with rounding-to-nearest-even. |
52 | void RoundFloatToBFloat16(const float* src, bfloat16* dst, int64_t size); |
53 | // Convert from float to bfloat16 with truncation. Notice this conversion is |
54 | // lossy since it truncates the float to 7 mantissa bits without rounding. |
55 | void FloatToBFloat16(const float* src, bfloat16* dst, int64_t size); |
56 | // Convert from bfloat16 to float. This conversion is lossless. |
57 | void BFloat16ToFloat(const bfloat16* src, float* dst, int64_t size); |
58 | |
59 | } // namespace tensorflow |
60 | |
61 | #endif // TENSORFLOW_CORE_FRAMEWORK_BFLOAT16_H_ |
62 | |