1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/core/platform/cpu_feature_guard.h"
17
18#ifndef __ANDROID__
19#include <iostream>
20#endif
21#include <mutex>
22#include <string>
23
24#include "absl/base/call_once.h"
25#include "tensorflow/core/platform/byte_order.h"
26#include "tensorflow/core/platform/cpu_info.h"
27#include "tensorflow/core/platform/logging.h"
28
29namespace tensorflow {
30namespace port {
31namespace {
32
33// If the CPU feature isn't present, log a fatal error.
34void CheckFeatureOrDie(CPUFeature feature, const std::string& feature_name) {
35 if (!TestCPUFeature(feature)) {
36 const auto error_msg =
37 "The TensorFlow library was compiled to use " + feature_name +
38 " instructions, but these aren't available on your machine.";
39#ifdef __ANDROID__
40 // Some Android emulators seem to indicate they don't support SSE, so we
41 // only issue a warning to avoid crashes when testing. We use the logging
42 // framework here because std::cout and std::cerr made some Android targets
43 // crash.
44 LOG(WARNING) << error_msg;
45#else
46 // Avoiding use of the logging framework here as that might trigger a SIGILL
47 // by itself.
48 std::cerr << error_msg << std::endl;
49 std::abort();
50#endif
51 }
52}
53
54// Check if CPU feature is included in the TensorFlow binary.
55void CheckIfFeatureUnused(CPUFeature feature, const std::string& feature_name,
56 std::string& missing_instructions) {
57 if (TestCPUFeature(feature)) {
58 missing_instructions.append(" ");
59 missing_instructions.append(feature_name);
60 }
61}
62
63// Raises an error if the binary has been compiled for a CPU feature (like AVX)
64// that isn't available on the current machine. It also warns of performance
65// loss if there's a feature available that's not being used.
66// Depending on the compiler and initialization order, a SIGILL exception may
67// occur before this code is reached, but this at least offers a chance to give
68// a more meaningful error message.
69class CPUFeatureGuard {
70 public:
71 CPUFeatureGuard() {
72#ifdef __SSE__
73 CheckFeatureOrDie(CPUFeature::SSE, "SSE");
74#endif // __SSE__
75#ifdef __SSE2__
76 CheckFeatureOrDie(CPUFeature::SSE2, "SSE2");
77#endif // __SSE2__
78#ifdef __SSE3__
79 CheckFeatureOrDie(CPUFeature::SSE3, "SSE3");
80#endif // __SSE3__
81#ifdef __SSE4_1__
82 CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1");
83#endif // __SSE4_1__
84#ifdef __SSE4_2__
85 CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2");
86#endif // __SSE4_2__
87#ifdef __AVX__
88 CheckFeatureOrDie(CPUFeature::AVX, "AVX");
89#endif // __AVX__
90#ifdef __AVX2__
91 CheckFeatureOrDie(CPUFeature::AVX2, "AVX2");
92#endif // __AVX2__
93#ifdef __AVX512F__
94 CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F");
95#endif // __AVX512F__
96#ifdef __AVX512VNNI__
97 CheckFeatureOrDie(CPUFeature::AVX512_VNNI, "AVX512_VNNI");
98#endif // __AVX512VNNI__
99#ifdef __AVX512BF16__
100 CheckFeatureOrDie(CPUFeature::AVX512_BF16, "AVX512_BF16");
101#endif // __AVX512BF16__
102#ifdef __AVXVNNI__
103 CheckFeatureOrDie(CPUFeature::AVX_VNNI, "AVX_VNNI");
104#endif // __AVXVNNI__
105#ifdef __AMXTILE__
106 CheckFeatureOrDie(CPUFeature::AMX_TILE, "AMX_TILE");
107#endif // __AMXTILE__
108#ifdef __AMXINT8__
109 CheckFeatureOrDie(CPUFeature::AMX_INT8, "AMX_INT8");
110#endif // __AMXINT8__
111#ifdef __AMXBF16__
112 CheckFeatureOrDie(CPUFeature::AMX_BF16, "AMX_BF16");
113#endif // __AMXBF16__
114#ifdef __FMA__
115 CheckFeatureOrDie(CPUFeature::FMA, "FMA");
116#endif // __FMA__
117 }
118};
119
120CPUFeatureGuard g_cpu_feature_guard_singleton;
121
122absl::once_flag g_cpu_feature_guard_warn_once_flag;
123
124} // namespace
125
126void InfoAboutUnusedCPUFeatures() {
127 absl::call_once(g_cpu_feature_guard_warn_once_flag, [] {
128 std::string missing_instructions;
129#if defined(_MSC_VER) && !defined(__clang__)
130
131#ifndef __AVX__
132 CheckIfFeatureUnused(CPUFeature::AVX, "AVX", missing_instructions);
133#endif // __AVX__
134#ifndef __AVX2__
135 CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2", missing_instructions);
136#endif // __AVX2__
137
138#else // if defined(_MSC_VER) && !defined(__clang__)
139
140#ifndef __SSE__
141 CheckIfFeatureUnused(CPUFeature::SSE, "SSE", missing_instructions);
142#endif // __SSE__
143#ifndef __SSE2__
144 CheckIfFeatureUnused(CPUFeature::SSE2, "SSE2", missing_instructions);
145#endif // __SSE2__
146#ifndef __SSE3__
147 CheckIfFeatureUnused(CPUFeature::SSE3, "SSE3", missing_instructions);
148#endif // __SSE3__
149#ifndef __SSE4_1__
150 CheckIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1", missing_instructions);
151#endif // __SSE4_1__
152#ifndef __SSE4_2__
153 CheckIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2", missing_instructions);
154#endif // __SSE4_2__
155#ifndef __AVX__
156 CheckIfFeatureUnused(CPUFeature::AVX, "AVX", missing_instructions);
157#endif // __AVX__
158#ifndef __AVX2__
159 CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2", missing_instructions);
160#endif // __AVX2__
161#ifndef __AVX512F__
162 CheckIfFeatureUnused(CPUFeature::AVX512F, "AVX512F", missing_instructions);
163#endif // __AVX512F__
164#ifndef __AVX512VNNI__
165 CheckIfFeatureUnused(CPUFeature::AVX512_VNNI, "AVX512_VNNI",
166 missing_instructions);
167#endif // __AVX512VNNI__
168#ifndef __AVX512BF16__
169 CheckIfFeatureUnused(CPUFeature::AVX512_BF16, "AVX512_BF16",
170 missing_instructions);
171#endif // __AVX512BF16___
172#ifndef __AVXVNNI__
173 CheckIfFeatureUnused(CPUFeature::AVX_VNNI, "AVX_VNNI",
174 missing_instructions);
175#endif // __AVXVNNI__
176#ifndef __AMXTILE__
177 CheckIfFeatureUnused(CPUFeature::AMX_TILE, "AMX_TILE",
178 missing_instructions);
179#endif // __AMXTILE__
180#ifndef __AMXINT8__
181 CheckIfFeatureUnused(CPUFeature::AMX_INT8, "AMX_INT8",
182 missing_instructions);
183#endif // __AMXINT8__
184#ifndef __AMXBF16__
185 CheckIfFeatureUnused(CPUFeature::AMX_BF16, "AMX_BF16",
186 missing_instructions);
187#endif // __AMXBF16__
188#ifndef __FMA__
189 CheckIfFeatureUnused(CPUFeature::FMA, "FMA", missing_instructions);
190#endif // __FMA__
191#endif // else of if defined(_MSC_VER) && !defined(__clang__)
192 if (!missing_instructions.empty()) {
193 LOG(INFO) << "This TensorFlow binary is optimized with "
194 << "oneAPI Deep Neural Network Library (oneDNN) "
195 << "to use the following CPU instructions in performance-"
196 << "critical operations: " << missing_instructions << std::endl
197 << "To enable them in other operations, rebuild TensorFlow "
198 << "with the appropriate compiler flags.";
199 }
200 });
201}
202
203} // namespace port
204} // namespace tensorflow
205