1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/platform/cpu_feature_guard.h" |
17 | |
18 | #ifndef __ANDROID__ |
19 | #include <iostream> |
20 | #endif |
21 | #include <mutex> |
22 | #include <string> |
23 | |
24 | #include "absl/base/call_once.h" |
25 | #include "tensorflow/core/platform/byte_order.h" |
26 | #include "tensorflow/core/platform/cpu_info.h" |
27 | #include "tensorflow/core/platform/logging.h" |
28 | |
29 | namespace tensorflow { |
30 | namespace port { |
31 | namespace { |
32 | |
33 | // If the CPU feature isn't present, log a fatal error. |
34 | void CheckFeatureOrDie(CPUFeature feature, const std::string& feature_name) { |
35 | if (!TestCPUFeature(feature)) { |
36 | const auto error_msg = |
37 | "The TensorFlow library was compiled to use " + feature_name + |
38 | " instructions, but these aren't available on your machine." ; |
39 | #ifdef __ANDROID__ |
40 | // Some Android emulators seem to indicate they don't support SSE, so we |
41 | // only issue a warning to avoid crashes when testing. We use the logging |
42 | // framework here because std::cout and std::cerr made some Android targets |
43 | // crash. |
44 | LOG(WARNING) << error_msg; |
45 | #else |
46 | // Avoiding use of the logging framework here as that might trigger a SIGILL |
47 | // by itself. |
48 | std::cerr << error_msg << std::endl; |
49 | std::abort(); |
50 | #endif |
51 | } |
52 | } |
53 | |
54 | // Check if CPU feature is included in the TensorFlow binary. |
55 | void CheckIfFeatureUnused(CPUFeature feature, const std::string& feature_name, |
56 | std::string& missing_instructions) { |
57 | if (TestCPUFeature(feature)) { |
58 | missing_instructions.append(" " ); |
59 | missing_instructions.append(feature_name); |
60 | } |
61 | } |
62 | |
63 | // Raises an error if the binary has been compiled for a CPU feature (like AVX) |
64 | // that isn't available on the current machine. It also warns of performance |
65 | // loss if there's a feature available that's not being used. |
66 | // Depending on the compiler and initialization order, a SIGILL exception may |
67 | // occur before this code is reached, but this at least offers a chance to give |
68 | // a more meaningful error message. |
69 | class CPUFeatureGuard { |
70 | public: |
71 | CPUFeatureGuard() { |
72 | #ifdef __SSE__ |
73 | CheckFeatureOrDie(CPUFeature::SSE, "SSE" ); |
74 | #endif // __SSE__ |
75 | #ifdef __SSE2__ |
76 | CheckFeatureOrDie(CPUFeature::SSE2, "SSE2" ); |
77 | #endif // __SSE2__ |
78 | #ifdef __SSE3__ |
79 | CheckFeatureOrDie(CPUFeature::SSE3, "SSE3" ); |
80 | #endif // __SSE3__ |
81 | #ifdef __SSE4_1__ |
82 | CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1" ); |
83 | #endif // __SSE4_1__ |
84 | #ifdef __SSE4_2__ |
85 | CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2" ); |
86 | #endif // __SSE4_2__ |
87 | #ifdef __AVX__ |
88 | CheckFeatureOrDie(CPUFeature::AVX, "AVX" ); |
89 | #endif // __AVX__ |
90 | #ifdef __AVX2__ |
91 | CheckFeatureOrDie(CPUFeature::AVX2, "AVX2" ); |
92 | #endif // __AVX2__ |
93 | #ifdef __AVX512F__ |
94 | CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F" ); |
95 | #endif // __AVX512F__ |
96 | #ifdef __AVX512VNNI__ |
97 | CheckFeatureOrDie(CPUFeature::AVX512_VNNI, "AVX512_VNNI" ); |
98 | #endif // __AVX512VNNI__ |
99 | #ifdef __AVX512BF16__ |
100 | CheckFeatureOrDie(CPUFeature::AVX512_BF16, "AVX512_BF16" ); |
101 | #endif // __AVX512BF16__ |
102 | #ifdef __AVXVNNI__ |
103 | CheckFeatureOrDie(CPUFeature::AVX_VNNI, "AVX_VNNI" ); |
104 | #endif // __AVXVNNI__ |
105 | #ifdef __AMXTILE__ |
106 | CheckFeatureOrDie(CPUFeature::AMX_TILE, "AMX_TILE" ); |
107 | #endif // __AMXTILE__ |
108 | #ifdef __AMXINT8__ |
109 | CheckFeatureOrDie(CPUFeature::AMX_INT8, "AMX_INT8" ); |
110 | #endif // __AMXINT8__ |
111 | #ifdef __AMXBF16__ |
112 | CheckFeatureOrDie(CPUFeature::AMX_BF16, "AMX_BF16" ); |
113 | #endif // __AMXBF16__ |
114 | #ifdef __FMA__ |
115 | CheckFeatureOrDie(CPUFeature::FMA, "FMA" ); |
116 | #endif // __FMA__ |
117 | } |
118 | }; |
119 | |
120 | CPUFeatureGuard g_cpu_feature_guard_singleton; |
121 | |
122 | absl::once_flag g_cpu_feature_guard_warn_once_flag; |
123 | |
124 | } // namespace |
125 | |
126 | void InfoAboutUnusedCPUFeatures() { |
127 | absl::call_once(g_cpu_feature_guard_warn_once_flag, [] { |
128 | std::string missing_instructions; |
129 | #if defined(_MSC_VER) && !defined(__clang__) |
130 | |
131 | #ifndef __AVX__ |
132 | CheckIfFeatureUnused(CPUFeature::AVX, "AVX" , missing_instructions); |
133 | #endif // __AVX__ |
134 | #ifndef __AVX2__ |
135 | CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2" , missing_instructions); |
136 | #endif // __AVX2__ |
137 | |
138 | #else // if defined(_MSC_VER) && !defined(__clang__) |
139 | |
140 | #ifndef __SSE__ |
141 | CheckIfFeatureUnused(CPUFeature::SSE, "SSE" , missing_instructions); |
142 | #endif // __SSE__ |
143 | #ifndef __SSE2__ |
144 | CheckIfFeatureUnused(CPUFeature::SSE2, "SSE2" , missing_instructions); |
145 | #endif // __SSE2__ |
146 | #ifndef __SSE3__ |
147 | CheckIfFeatureUnused(CPUFeature::SSE3, "SSE3" , missing_instructions); |
148 | #endif // __SSE3__ |
149 | #ifndef __SSE4_1__ |
150 | CheckIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1" , missing_instructions); |
151 | #endif // __SSE4_1__ |
152 | #ifndef __SSE4_2__ |
153 | CheckIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2" , missing_instructions); |
154 | #endif // __SSE4_2__ |
155 | #ifndef __AVX__ |
156 | CheckIfFeatureUnused(CPUFeature::AVX, "AVX" , missing_instructions); |
157 | #endif // __AVX__ |
158 | #ifndef __AVX2__ |
159 | CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2" , missing_instructions); |
160 | #endif // __AVX2__ |
161 | #ifndef __AVX512F__ |
162 | CheckIfFeatureUnused(CPUFeature::AVX512F, "AVX512F" , missing_instructions); |
163 | #endif // __AVX512F__ |
164 | #ifndef __AVX512VNNI__ |
165 | CheckIfFeatureUnused(CPUFeature::AVX512_VNNI, "AVX512_VNNI" , |
166 | missing_instructions); |
167 | #endif // __AVX512VNNI__ |
168 | #ifndef __AVX512BF16__ |
169 | CheckIfFeatureUnused(CPUFeature::AVX512_BF16, "AVX512_BF16" , |
170 | missing_instructions); |
171 | #endif // __AVX512BF16___ |
172 | #ifndef __AVXVNNI__ |
173 | CheckIfFeatureUnused(CPUFeature::AVX_VNNI, "AVX_VNNI" , |
174 | missing_instructions); |
175 | #endif // __AVXVNNI__ |
176 | #ifndef __AMXTILE__ |
177 | CheckIfFeatureUnused(CPUFeature::AMX_TILE, "AMX_TILE" , |
178 | missing_instructions); |
179 | #endif // __AMXTILE__ |
180 | #ifndef __AMXINT8__ |
181 | CheckIfFeatureUnused(CPUFeature::AMX_INT8, "AMX_INT8" , |
182 | missing_instructions); |
183 | #endif // __AMXINT8__ |
184 | #ifndef __AMXBF16__ |
185 | CheckIfFeatureUnused(CPUFeature::AMX_BF16, "AMX_BF16" , |
186 | missing_instructions); |
187 | #endif // __AMXBF16__ |
188 | #ifndef __FMA__ |
189 | CheckIfFeatureUnused(CPUFeature::FMA, "FMA" , missing_instructions); |
190 | #endif // __FMA__ |
191 | #endif // else of if defined(_MSC_VER) && !defined(__clang__) |
192 | if (!missing_instructions.empty()) { |
193 | LOG(INFO) << "This TensorFlow binary is optimized with " |
194 | << "oneAPI Deep Neural Network Library (oneDNN) " |
195 | << "to use the following CPU instructions in performance-" |
196 | << "critical operations: " << missing_instructions << std::endl |
197 | << "To enable them in other operations, rebuild TensorFlow " |
198 | << "with the appropriate compiler flags." ; |
199 | } |
200 | }); |
201 | } |
202 | |
203 | } // namespace port |
204 | } // namespace tensorflow |
205 | |