1 | /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_ |
17 | #define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_ |
18 | |
19 | // This header defines the macro TF_PLATFORM_STRINGS() which should be used |
20 | // once in each dynamically loadable TensorFlow module. It embeds static |
21 | // strings into the compilation unit that allow TensorFlow to determine what |
22 | // compilation options were in effect when the compilation unit was built. All |
23 | // compilation units within the same dynamically loadable library should be |
24 | // built with the same options (or at least, the strings should be embedded in |
25 | // the compilation unit built with the most restrictive options). |
26 | |
27 | // The platform strings embedded into a binary may be retrieved with the |
28 | // GetPlatformStrings function. |
29 | |
30 | // Rationale: |
31 | // We wish to load only those libraries that this CPU can execute. For |
32 | // example, we should not load a library compiled with avx256 instructions on a |
33 | // CPU that cannot execute them. |
34 | // |
35 | // One might think that one could dlopen() the library, and call a routine that |
36 | // would return which cpu type it was compiled for. Alas, this does not work, |
37 | // because at dlopen() time, a library containing C++ will execute constructors |
38 | // of class variables with static storage class. Even code that looks |
39 | // innocuous may use optional platform-specific instructions. For example, |
40 | // the fastest way to zero a region of memory might use optional instructions. |
41 | // |
42 | // One might think one could run a tool such as "objdump" to read flags from |
43 | // the libraries' headers, or perhaps disassemble each library to look for |
44 | // particular instructions. Unfortunately, the desired flags are not present |
45 | // in the headers, and disassembly can be prohibitively slow ("objdump -d" is |
46 | // very slow, for example). Moreover, a tool to examine the library may not |
47 | // be present on the system unless the user has installed special packages (for |
48 | // example, on Windows). |
49 | // |
50 | // Instead, we adopt a crude but straightforward solution: We require |
51 | // developers to use the macro TF_PLATFORM_STRINGS() in their library, to |
52 | // embed the compilation options as constant strings. The compiler's |
53 | // predefined macros pick which strings are included. We then search for the |
54 | // strings in the files, and then dlopen() only those libraries that have or |
55 | // lack strings as needed. |
56 | // |
57 | // We adopt the approach of placing in the binary a fairly raw copy of the |
58 | // predefined macros, rather than trying to interpret them in complex ways at |
59 | // compile time. This allows the loading binary to alter its interpretation of |
60 | // the strings without library developers having to recompile. |
61 | |
62 | #include <stdio.h> |
63 | |
64 | #include <string> |
65 | #include <vector> |
66 | |
67 | // Aside from the header guard, the internal macros defined here have the form: |
68 | // TF_PLAT_STR_* |
69 | |
70 | // If a macro is removed from the list of tested macros, the major version in |
71 | // the following version number should be incremented, and the minor version |
72 | // set to zero. Otherwise, if a macro is added to the list of tested macros, |
73 | // the minor number should be incremented. |
74 | #define TF_PLAT_STR_VERSION_ "1.0" |
75 | |
76 | // Prefix of each option string indicator in the binary. |
77 | // After the prefix, such strings have the form: |
78 | // [A-Za-z_0-9]=<value> |
79 | // followed by a terminating nul. To simplify searching, this prefix is all |
80 | // ASCII, starts with a nul, and contains no character twice. |
81 | #define TF_PLAT_STR_MAGIC_PREFIX_ "\0S\\s\":^p*L}" |
82 | |
83 | // A helper macro for TF_PLAT_STR_AS_STR_(). |
84 | #define TF_PLAT_STR_STR_1_(x) #x |
85 | |
86 | // Yield a constant string corresponding to x, after macro expansion. |
87 | #define TF_PLAT_STR_AS_STR_(x) TF_PLAT_STR_STR_1_(x) |
88 | |
89 | // An empty definition to make lists more uniform. |
90 | #define TF_PLAT_STR_TERMINATOR_ |
91 | |
92 | // TF_PLAT_STR_(x) introduces a constant string indicating whether a |
93 | // particular compilation option has been turned on. |
94 | // |
95 | // In gcc and clang, we might imagine using something like |
96 | // #define TF_PLAT_STR_(x) \ |
97 | // (sizeof (#x) != sizeof (TF_PLAT_STR_AS_STR_ (x))? \ |
98 | // TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_ (x) : \ |
99 | // TF_PLAT_STR_MAGIC_PREFIX_ #x "=0"), |
100 | // but some compilers (notably MSVC) place both "foo" and "bar" in the binary |
101 | // when presented with |
102 | // (true? "foo" : "bar") |
103 | // so we must use #if to select the strings we need, which is rather verbose. |
104 | #define TF_PLAT_STR_(x) TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_(x) |
105 | |
106 | // Include the #if machinery that sets the macros used below. |
107 | // platform_strings_computed.h can be generated by filtering this header file |
108 | // through: |
109 | // awk ' |
110 | // header == "" { print; } |
111 | // /\*\// && header == "" { |
112 | // print "// Generated from platform_strings.h."; |
113 | // print ""; |
114 | // print "#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_"; |
115 | // print "#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_"; |
116 | // print ""; |
117 | // header = 1; |
118 | // } |
119 | // /^#define TF_PLAT_STR_LIST_[a-zA-Z0-9_]*\(\) *\\$/ { active = 1; } |
120 | // /TF_PLAT_STR_TERMINATOR_/ { active = 0; } |
121 | // /^ *TF_PLAT_STR_[A-Za-z0-9_]* *\\$/ && active { |
122 | // x = $0; |
123 | // sub(/^ *TF_PLAT_STR_/, "", x); |
124 | // sub(/ *\\$/, "", x); |
125 | // printf ("#if defined(%s)\n", x); |
126 | // printf ("#define TF_PLAT_STR_%s TF_PLAT_STR_(%s)\n", x, x); |
127 | // printf ("#else\n"); |
128 | // printf ("#define TF_PLAT_STR_%s\n", x); |
129 | // printf ("#endif\n"); |
130 | // } |
131 | // END { |
132 | // print ""; |
133 | // print "#endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_"; |
134 | // }' |
135 | #include "tensorflow/core/platform/platform_strings_computed.h" |
136 | |
137 | // clang-format butchers the following lines. |
138 | // clang-format off |
139 | |
140 | // x86_64 and x86_32 optional features. |
141 | #define TF_PLAT_STR_LIST___x86_64__() \ |
142 | TF_PLAT_STR__M_IX86_FP \ |
143 | TF_PLAT_STR__NO_PREFETCHW \ |
144 | TF_PLAT_STR___3dNOW_A__ \ |
145 | TF_PLAT_STR___3dNOW__ \ |
146 | TF_PLAT_STR___ABM__ \ |
147 | TF_PLAT_STR___ADX__ \ |
148 | TF_PLAT_STR___AES__ \ |
149 | TF_PLAT_STR___AVX2__ \ |
150 | TF_PLAT_STR___AVX512BW__ \ |
151 | TF_PLAT_STR___AVX512CD__ \ |
152 | TF_PLAT_STR___AVX512DQ__ \ |
153 | TF_PLAT_STR___AVX512ER__ \ |
154 | TF_PLAT_STR___AVX512F__ \ |
155 | TF_PLAT_STR___AVX512IFMA__ \ |
156 | TF_PLAT_STR___AVX512PF__ \ |
157 | TF_PLAT_STR___AVX512VBMI__ \ |
158 | TF_PLAT_STR___AVX512VL__ \ |
159 | TF_PLAT_STR___AVX__ \ |
160 | TF_PLAT_STR___BMI2__ \ |
161 | TF_PLAT_STR___BMI__ \ |
162 | TF_PLAT_STR___CLFLUSHOPT__ \ |
163 | TF_PLAT_STR___CLZERO__ \ |
164 | TF_PLAT_STR___F16C__ \ |
165 | TF_PLAT_STR___FMA4__ \ |
166 | TF_PLAT_STR___FMA__ \ |
167 | TF_PLAT_STR___FP_FAST_FMA \ |
168 | TF_PLAT_STR___FP_FAST_FMAF \ |
169 | TF_PLAT_STR___FSGSBASE__ \ |
170 | TF_PLAT_STR___FXSR__ \ |
171 | TF_PLAT_STR___LWP__ \ |
172 | TF_PLAT_STR___LZCNT__ \ |
173 | TF_PLAT_STR___MMX__ \ |
174 | TF_PLAT_STR___MWAITX__ \ |
175 | TF_PLAT_STR___PCLMUL__ \ |
176 | TF_PLAT_STR___PKU__ \ |
177 | TF_PLAT_STR___POPCNT__ \ |
178 | TF_PLAT_STR___PRFCHW__ \ |
179 | TF_PLAT_STR___RDRND__ \ |
180 | TF_PLAT_STR___RDSEED__ \ |
181 | TF_PLAT_STR___RTM__ \ |
182 | TF_PLAT_STR___SHA__ \ |
183 | TF_PLAT_STR___SSE2_MATH__ \ |
184 | TF_PLAT_STR___SSE2__ \ |
185 | TF_PLAT_STR___SSE_MATH__ \ |
186 | TF_PLAT_STR___SSE__ \ |
187 | TF_PLAT_STR___SSE3__ \ |
188 | TF_PLAT_STR___SSE4A__ \ |
189 | TF_PLAT_STR___SSE4_1__ \ |
190 | TF_PLAT_STR___SSE4_2__ \ |
191 | TF_PLAT_STR___SSSE3__ \ |
192 | TF_PLAT_STR___TBM__ \ |
193 | TF_PLAT_STR___XOP__ \ |
194 | TF_PLAT_STR___XSAVEC__ \ |
195 | TF_PLAT_STR___XSAVEOPT__ \ |
196 | TF_PLAT_STR___XSAVES__ \ |
197 | TF_PLAT_STR___XSAVE__ \ |
198 | TF_PLAT_STR_TERMINATOR_ |
199 | |
200 | // PowerPC (64- and 32-bit) optional features. |
201 | #define TF_PLAT_STR_LIST___powerpc64__() \ |
202 | TF_PLAT_STR__SOFT_DOUBLE \ |
203 | TF_PLAT_STR__SOFT_FLOAT \ |
204 | TF_PLAT_STR___ALTIVEC__ \ |
205 | TF_PLAT_STR___APPLE_ALTIVEC__ \ |
206 | TF_PLAT_STR___CRYPTO__ \ |
207 | TF_PLAT_STR___FLOAT128_HARDWARE__ \ |
208 | TF_PLAT_STR___FLOAT128_TYPE__ \ |
209 | TF_PLAT_STR___FP_FAST_FMA \ |
210 | TF_PLAT_STR___FP_FAST_FMAF \ |
211 | TF_PLAT_STR___HTM__ \ |
212 | TF_PLAT_STR___NO_FPRS__ \ |
213 | TF_PLAT_STR___NO_LWSYNC__ \ |
214 | TF_PLAT_STR___POWER8_VECTOR__ \ |
215 | TF_PLAT_STR___POWER9_VECTOR__ \ |
216 | TF_PLAT_STR___PPC405__ \ |
217 | TF_PLAT_STR___QUAD_MEMORY_ATOMIC__ \ |
218 | TF_PLAT_STR___RECIPF__ \ |
219 | TF_PLAT_STR___RECIP_PRECISION__ \ |
220 | TF_PLAT_STR___RECIP__ \ |
221 | TF_PLAT_STR___RSQRTEF__ \ |
222 | TF_PLAT_STR___RSQRTE__ \ |
223 | TF_PLAT_STR___TM_FENCE__ \ |
224 | TF_PLAT_STR___UPPER_REGS_DF__ \ |
225 | TF_PLAT_STR___UPPER_REGS_SF__ \ |
226 | TF_PLAT_STR___VEC__ \ |
227 | TF_PLAT_STR___VSX__ \ |
228 | TF_PLAT_STR_TERMINATOR_ |
229 | |
230 | // aarch64 and 32-bit arm optional features |
231 | #define TF_PLAT_STR_LIST___aarch64__() \ |
232 | TF_PLAT_STR___ARM_ARCH \ |
233 | TF_PLAT_STR___ARM_FEATURE_CLZ \ |
234 | TF_PLAT_STR___ARM_FEATURE_CRC32 \ |
235 | TF_PLAT_STR___ARM_FEATURE_CRC32 \ |
236 | TF_PLAT_STR___ARM_FEATURE_CRYPTO \ |
237 | TF_PLAT_STR___ARM_FEATURE_DIRECTED_ROUNDING \ |
238 | TF_PLAT_STR___ARM_FEATURE_DSP \ |
239 | TF_PLAT_STR___ARM_FEATURE_FMA \ |
240 | TF_PLAT_STR___ARM_FEATURE_IDIV \ |
241 | TF_PLAT_STR___ARM_FEATURE_LDREX \ |
242 | TF_PLAT_STR___ARM_FEATURE_NUMERIC_MAXMIN \ |
243 | TF_PLAT_STR___ARM_FEATURE_QBIT \ |
244 | TF_PLAT_STR___ARM_FEATURE_QRDMX \ |
245 | TF_PLAT_STR___ARM_FEATURE_SAT \ |
246 | TF_PLAT_STR___ARM_FEATURE_SIMD32 \ |
247 | TF_PLAT_STR___ARM_FEATURE_UNALIGNED \ |
248 | TF_PLAT_STR___ARM_FP \ |
249 | TF_PLAT_STR___ARM_NEON_FP \ |
250 | TF_PLAT_STR___ARM_NEON__ \ |
251 | TF_PLAT_STR___ARM_WMMX \ |
252 | TF_PLAT_STR___IWMMXT2__ \ |
253 | TF_PLAT_STR___IWMMXT__ \ |
254 | TF_PLAT_STR___VFP_FP__ \ |
255 | TF_PLAT_STR_TERMINATOR_ |
256 | |
257 | // Generic features, including indication of architecture and OS. |
258 | // The _M_* macros are defined by Visual Studio. |
259 | // It doesn't define __LITTLE_ENDIAN__ or __BYTE_ORDER__; |
260 | // Windows is assumed to be little endian. |
261 | #define TF_PLAT_STR_LIST___generic__() \ |
262 | TF_PLAT_STR_TARGET_IPHONE_SIMULATOR \ |
263 | TF_PLAT_STR_TARGET_OS_IOS \ |
264 | TF_PLAT_STR_TARGET_OS_IPHONE \ |
265 | TF_PLAT_STR__MSC_VER \ |
266 | TF_PLAT_STR__M_ARM \ |
267 | TF_PLAT_STR__M_ARM64 \ |
268 | TF_PLAT_STR__M_ARM_ARMV7VE \ |
269 | TF_PLAT_STR__M_ARM_FP \ |
270 | TF_PLAT_STR__M_IX86 \ |
271 | TF_PLAT_STR__M_X64 \ |
272 | TF_PLAT_STR__WIN32 \ |
273 | TF_PLAT_STR__WIN64 \ |
274 | TF_PLAT_STR___ANDROID__ \ |
275 | TF_PLAT_STR___APPLE__ \ |
276 | TF_PLAT_STR___BYTE_ORDER__ \ |
277 | TF_PLAT_STR___CYGWIN__ \ |
278 | TF_PLAT_STR___FreeBSD__ \ |
279 | TF_PLAT_STR___LITTLE_ENDIAN__ \ |
280 | TF_PLAT_STR___NetBSD__ \ |
281 | TF_PLAT_STR___OpenBSD__ \ |
282 | TF_PLAT_STR_____MSYS__ \ |
283 | TF_PLAT_STR___aarch64__ \ |
284 | TF_PLAT_STR___alpha__ \ |
285 | TF_PLAT_STR___arm__ \ |
286 | TF_PLAT_STR___i386__ \ |
287 | TF_PLAT_STR___i686__ \ |
288 | TF_PLAT_STR___ia64__ \ |
289 | TF_PLAT_STR___linux__ \ |
290 | TF_PLAT_STR___mips32__ \ |
291 | TF_PLAT_STR___mips64__ \ |
292 | TF_PLAT_STR___powerpc64__ \ |
293 | TF_PLAT_STR___powerpc__ \ |
294 | TF_PLAT_STR___riscv___ \ |
295 | TF_PLAT_STR___s390x__ \ |
296 | TF_PLAT_STR___sparc64__ \ |
297 | TF_PLAT_STR___sparc__ \ |
298 | TF_PLAT_STR___x86_64__ \ |
299 | TF_PLAT_STR_TERMINATOR_ |
300 | |
301 | #if !defined(__x86_64__) && !defined(_M_X64) && \ |
302 | !defined(__i386__) && !defined(_M_IX86) |
303 | #undef TF_PLAT_STR_LIST___x86_64__ |
304 | #define TF_PLAT_STR_LIST___x86_64__() |
305 | #endif |
306 | #if !defined(__powerpc64__) && !defined(__powerpc__) |
307 | #undef TF_PLAT_STR_LIST___powerpc64__ |
308 | #define TF_PLAT_STR_LIST___powerpc64__() |
309 | #endif |
310 | #if !defined(__aarch64__) && !defined(_M_ARM64) && \ |
311 | !defined(__arm__) && !defined(_M_ARM) |
312 | #undef TF_PLAT_STR_LIST___aarch64__ |
313 | #define TF_PLAT_STR_LIST___aarch64__() |
314 | #endif |
315 | |
316 | // Macro to be used in each dynamically loadable library. |
317 | // |
318 | // The BSS global variable tf_cpu_option_global and the class |
319 | // instance tf_cpu_option_avoid_omit_class are needed to prevent |
320 | // compilers/linkers such as clang from omitting the static variable |
321 | // tf_cpu_option[], which would otherwise appear to be unused. We cannot make |
322 | // tf_cpu_option[] global, because we then might get multiply-defined symbols |
323 | // if TF_PLAT_STR() is used twice in the same library. |
324 | // (tf_cpu_option_global doesn't see such errors because it is |
325 | // defined in BSS, so multiple definitions are combined by the linker.) gcc's |
326 | // __attribute__((used)) is insufficient because it seems to be ignored by |
327 | // linkers. |
328 | #define TF_PLATFORM_STRINGS() \ |
329 | static const char tf_cpu_option[] = \ |
330 | TF_PLAT_STR_MAGIC_PREFIX_ "TF_PLAT_STR_VERSION=" TF_PLAT_STR_VERSION_ \ |
331 | TF_PLAT_STR_LIST___x86_64__() \ |
332 | TF_PLAT_STR_LIST___powerpc64__() \ |
333 | TF_PLAT_STR_LIST___aarch64__() \ |
334 | TF_PLAT_STR_LIST___generic__() \ |
335 | ; \ |
336 | const char *tf_cpu_option_global; \ |
337 | namespace { \ |
338 | class TFCPUOptionHelper { \ |
339 | public: \ |
340 | TFCPUOptionHelper() { \ |
341 | /* Compilers/linkers remove unused variables aggressively. The */ \ |
342 | /* following gyrations subvert most such optimizations. */ \ |
343 | tf_cpu_option_global = tf_cpu_option; \ |
344 | /* Nothing is printed because the string starts with a nul. */ \ |
345 | printf("%s%s", tf_cpu_option, ""); \ |
346 | } \ |
347 | } tf_cpu_option_avoid_omit_class; \ |
348 | } /* anonymous namespace */ |
349 | // clang-format on |
350 | |
351 | namespace tensorflow { |
352 | |
353 | // Retrieves the platform strings from the file at the given path and appends |
354 | // them to the given vector. If the returned int is non-zero, an error occurred |
355 | // reading the file and vector may or may not be modified. The returned error |
356 | // code is suitable for use with strerror(). |
357 | int GetPlatformStrings(const std::string& path, |
358 | std::vector<std::string>* found); |
359 | |
360 | } // namespace tensorflow |
361 | |
362 | #endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_ |
363 | |