1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
17#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
18
19// This header defines the macro TF_PLATFORM_STRINGS() which should be used
20// once in each dynamically loadable TensorFlow module. It embeds static
21// strings into the compilation unit that allow TensorFlow to determine what
22// compilation options were in effect when the compilation unit was built. All
23// compilation units within the same dynamically loadable library should be
24// built with the same options (or at least, the strings should be embedded in
25// the compilation unit built with the most restrictive options).
26
27// The platform strings embedded into a binary may be retrieved with the
28// GetPlatformStrings function.
29
30// Rationale:
31// We wish to load only those libraries that this CPU can execute. For
32// example, we should not load a library compiled with avx256 instructions on a
33// CPU that cannot execute them.
34//
35// One might think that one could dlopen() the library, and call a routine that
36// would return which cpu type it was compiled for. Alas, this does not work,
37// because at dlopen() time, a library containing C++ will execute constructors
38// of class variables with static storage class. Even code that looks
39// innocuous may use optional platform-specific instructions. For example,
40// the fastest way to zero a region of memory might use optional instructions.
41//
42// One might think one could run a tool such as "objdump" to read flags from
43// the libraries' headers, or perhaps disassemble each library to look for
44// particular instructions. Unfortunately, the desired flags are not present
45// in the headers, and disassembly can be prohibitively slow ("objdump -d" is
46// very slow, for example). Moreover, a tool to examine the library may not
47// be present on the system unless the user has installed special packages (for
48// example, on Windows).
49//
50// Instead, we adopt a crude but straightforward solution: We require
51// developers to use the macro TF_PLATFORM_STRINGS() in their library, to
52// embed the compilation options as constant strings. The compiler's
53// predefined macros pick which strings are included. We then search for the
54// strings in the files, and then dlopen() only those libraries that have or
55// lack strings as needed.
56//
57// We adopt the approach of placing in the binary a fairly raw copy of the
58// predefined macros, rather than trying to interpret them in complex ways at
59// compile time. This allows the loading binary to alter its interpretation of
60// the strings without library developers having to recompile.
61
62#include <stdio.h>
63
64#include <string>
65#include <vector>
66
67// Aside from the header guard, the internal macros defined here have the form:
68// TF_PLAT_STR_*
69
70// If a macro is removed from the list of tested macros, the major version in
71// the following version number should be incremented, and the minor version
72// set to zero. Otherwise, if a macro is added to the list of tested macros,
73// the minor number should be incremented.
74#define TF_PLAT_STR_VERSION_ "1.0"
75
76// Prefix of each option string indicator in the binary.
77// After the prefix, such strings have the form:
78// [A-Za-z_0-9]=<value>
79// followed by a terminating nul. To simplify searching, this prefix is all
80// ASCII, starts with a nul, and contains no character twice.
81#define TF_PLAT_STR_MAGIC_PREFIX_ "\0S\\s\":^p*L}"
82
83// A helper macro for TF_PLAT_STR_AS_STR_().
84#define TF_PLAT_STR_STR_1_(x) #x
85
86// Yield a constant string corresponding to x, after macro expansion.
87#define TF_PLAT_STR_AS_STR_(x) TF_PLAT_STR_STR_1_(x)
88
89// An empty definition to make lists more uniform.
90#define TF_PLAT_STR_TERMINATOR_
91
92// TF_PLAT_STR_(x) introduces a constant string indicating whether a
93// particular compilation option has been turned on.
94//
95// In gcc and clang, we might imagine using something like
96// #define TF_PLAT_STR_(x) \
97// (sizeof (#x) != sizeof (TF_PLAT_STR_AS_STR_ (x))? \
98// TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_ (x) : \
99// TF_PLAT_STR_MAGIC_PREFIX_ #x "=0"),
100// but some compilers (notably MSVC) place both "foo" and "bar" in the binary
101// when presented with
102// (true? "foo" : "bar")
103// so we must use #if to select the strings we need, which is rather verbose.
104#define TF_PLAT_STR_(x) TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_(x)
105
106// Include the #if machinery that sets the macros used below.
107// platform_strings_computed.h can be generated by filtering this header file
108// through:
109// awk '
110// header == "" { print; }
111// /\*\// && header == "" {
112// print "// Generated from platform_strings.h.";
113// print "";
114// print "#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
115// print "#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
116// print "";
117// header = 1;
118// }
119// /^#define TF_PLAT_STR_LIST_[a-zA-Z0-9_]*\(\) *\\$/ { active = 1; }
120// /TF_PLAT_STR_TERMINATOR_/ { active = 0; }
121// /^ *TF_PLAT_STR_[A-Za-z0-9_]* *\\$/ && active {
122// x = $0;
123// sub(/^ *TF_PLAT_STR_/, "", x);
124// sub(/ *\\$/, "", x);
125// printf ("#if defined(%s)\n", x);
126// printf ("#define TF_PLAT_STR_%s TF_PLAT_STR_(%s)\n", x, x);
127// printf ("#else\n");
128// printf ("#define TF_PLAT_STR_%s\n", x);
129// printf ("#endif\n");
130// }
131// END {
132// print "";
133// print "#endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
134// }'
135#include "tensorflow/core/platform/platform_strings_computed.h"
136
137// clang-format butchers the following lines.
138// clang-format off
139
140// x86_64 and x86_32 optional features.
141#define TF_PLAT_STR_LIST___x86_64__() \
142 TF_PLAT_STR__M_IX86_FP \
143 TF_PLAT_STR__NO_PREFETCHW \
144 TF_PLAT_STR___3dNOW_A__ \
145 TF_PLAT_STR___3dNOW__ \
146 TF_PLAT_STR___ABM__ \
147 TF_PLAT_STR___ADX__ \
148 TF_PLAT_STR___AES__ \
149 TF_PLAT_STR___AVX2__ \
150 TF_PLAT_STR___AVX512BW__ \
151 TF_PLAT_STR___AVX512CD__ \
152 TF_PLAT_STR___AVX512DQ__ \
153 TF_PLAT_STR___AVX512ER__ \
154 TF_PLAT_STR___AVX512F__ \
155 TF_PLAT_STR___AVX512IFMA__ \
156 TF_PLAT_STR___AVX512PF__ \
157 TF_PLAT_STR___AVX512VBMI__ \
158 TF_PLAT_STR___AVX512VL__ \
159 TF_PLAT_STR___AVX__ \
160 TF_PLAT_STR___BMI2__ \
161 TF_PLAT_STR___BMI__ \
162 TF_PLAT_STR___CLFLUSHOPT__ \
163 TF_PLAT_STR___CLZERO__ \
164 TF_PLAT_STR___F16C__ \
165 TF_PLAT_STR___FMA4__ \
166 TF_PLAT_STR___FMA__ \
167 TF_PLAT_STR___FP_FAST_FMA \
168 TF_PLAT_STR___FP_FAST_FMAF \
169 TF_PLAT_STR___FSGSBASE__ \
170 TF_PLAT_STR___FXSR__ \
171 TF_PLAT_STR___LWP__ \
172 TF_PLAT_STR___LZCNT__ \
173 TF_PLAT_STR___MMX__ \
174 TF_PLAT_STR___MWAITX__ \
175 TF_PLAT_STR___PCLMUL__ \
176 TF_PLAT_STR___PKU__ \
177 TF_PLAT_STR___POPCNT__ \
178 TF_PLAT_STR___PRFCHW__ \
179 TF_PLAT_STR___RDRND__ \
180 TF_PLAT_STR___RDSEED__ \
181 TF_PLAT_STR___RTM__ \
182 TF_PLAT_STR___SHA__ \
183 TF_PLAT_STR___SSE2_MATH__ \
184 TF_PLAT_STR___SSE2__ \
185 TF_PLAT_STR___SSE_MATH__ \
186 TF_PLAT_STR___SSE__ \
187 TF_PLAT_STR___SSE3__ \
188 TF_PLAT_STR___SSE4A__ \
189 TF_PLAT_STR___SSE4_1__ \
190 TF_PLAT_STR___SSE4_2__ \
191 TF_PLAT_STR___SSSE3__ \
192 TF_PLAT_STR___TBM__ \
193 TF_PLAT_STR___XOP__ \
194 TF_PLAT_STR___XSAVEC__ \
195 TF_PLAT_STR___XSAVEOPT__ \
196 TF_PLAT_STR___XSAVES__ \
197 TF_PLAT_STR___XSAVE__ \
198 TF_PLAT_STR_TERMINATOR_
199
200// PowerPC (64- and 32-bit) optional features.
201#define TF_PLAT_STR_LIST___powerpc64__() \
202 TF_PLAT_STR__SOFT_DOUBLE \
203 TF_PLAT_STR__SOFT_FLOAT \
204 TF_PLAT_STR___ALTIVEC__ \
205 TF_PLAT_STR___APPLE_ALTIVEC__ \
206 TF_PLAT_STR___CRYPTO__ \
207 TF_PLAT_STR___FLOAT128_HARDWARE__ \
208 TF_PLAT_STR___FLOAT128_TYPE__ \
209 TF_PLAT_STR___FP_FAST_FMA \
210 TF_PLAT_STR___FP_FAST_FMAF \
211 TF_PLAT_STR___HTM__ \
212 TF_PLAT_STR___NO_FPRS__ \
213 TF_PLAT_STR___NO_LWSYNC__ \
214 TF_PLAT_STR___POWER8_VECTOR__ \
215 TF_PLAT_STR___POWER9_VECTOR__ \
216 TF_PLAT_STR___PPC405__ \
217 TF_PLAT_STR___QUAD_MEMORY_ATOMIC__ \
218 TF_PLAT_STR___RECIPF__ \
219 TF_PLAT_STR___RECIP_PRECISION__ \
220 TF_PLAT_STR___RECIP__ \
221 TF_PLAT_STR___RSQRTEF__ \
222 TF_PLAT_STR___RSQRTE__ \
223 TF_PLAT_STR___TM_FENCE__ \
224 TF_PLAT_STR___UPPER_REGS_DF__ \
225 TF_PLAT_STR___UPPER_REGS_SF__ \
226 TF_PLAT_STR___VEC__ \
227 TF_PLAT_STR___VSX__ \
228 TF_PLAT_STR_TERMINATOR_
229
230// aarch64 and 32-bit arm optional features
231#define TF_PLAT_STR_LIST___aarch64__() \
232 TF_PLAT_STR___ARM_ARCH \
233 TF_PLAT_STR___ARM_FEATURE_CLZ \
234 TF_PLAT_STR___ARM_FEATURE_CRC32 \
235 TF_PLAT_STR___ARM_FEATURE_CRC32 \
236 TF_PLAT_STR___ARM_FEATURE_CRYPTO \
237 TF_PLAT_STR___ARM_FEATURE_DIRECTED_ROUNDING \
238 TF_PLAT_STR___ARM_FEATURE_DSP \
239 TF_PLAT_STR___ARM_FEATURE_FMA \
240 TF_PLAT_STR___ARM_FEATURE_IDIV \
241 TF_PLAT_STR___ARM_FEATURE_LDREX \
242 TF_PLAT_STR___ARM_FEATURE_NUMERIC_MAXMIN \
243 TF_PLAT_STR___ARM_FEATURE_QBIT \
244 TF_PLAT_STR___ARM_FEATURE_QRDMX \
245 TF_PLAT_STR___ARM_FEATURE_SAT \
246 TF_PLAT_STR___ARM_FEATURE_SIMD32 \
247 TF_PLAT_STR___ARM_FEATURE_UNALIGNED \
248 TF_PLAT_STR___ARM_FP \
249 TF_PLAT_STR___ARM_NEON_FP \
250 TF_PLAT_STR___ARM_NEON__ \
251 TF_PLAT_STR___ARM_WMMX \
252 TF_PLAT_STR___IWMMXT2__ \
253 TF_PLAT_STR___IWMMXT__ \
254 TF_PLAT_STR___VFP_FP__ \
255 TF_PLAT_STR_TERMINATOR_
256
257// Generic features, including indication of architecture and OS.
258// The _M_* macros are defined by Visual Studio.
259// It doesn't define __LITTLE_ENDIAN__ or __BYTE_ORDER__;
260// Windows is assumed to be little endian.
261#define TF_PLAT_STR_LIST___generic__() \
262 TF_PLAT_STR_TARGET_IPHONE_SIMULATOR \
263 TF_PLAT_STR_TARGET_OS_IOS \
264 TF_PLAT_STR_TARGET_OS_IPHONE \
265 TF_PLAT_STR__MSC_VER \
266 TF_PLAT_STR__M_ARM \
267 TF_PLAT_STR__M_ARM64 \
268 TF_PLAT_STR__M_ARM_ARMV7VE \
269 TF_PLAT_STR__M_ARM_FP \
270 TF_PLAT_STR__M_IX86 \
271 TF_PLAT_STR__M_X64 \
272 TF_PLAT_STR__WIN32 \
273 TF_PLAT_STR__WIN64 \
274 TF_PLAT_STR___ANDROID__ \
275 TF_PLAT_STR___APPLE__ \
276 TF_PLAT_STR___BYTE_ORDER__ \
277 TF_PLAT_STR___CYGWIN__ \
278 TF_PLAT_STR___FreeBSD__ \
279 TF_PLAT_STR___LITTLE_ENDIAN__ \
280 TF_PLAT_STR___NetBSD__ \
281 TF_PLAT_STR___OpenBSD__ \
282 TF_PLAT_STR_____MSYS__ \
283 TF_PLAT_STR___aarch64__ \
284 TF_PLAT_STR___alpha__ \
285 TF_PLAT_STR___arm__ \
286 TF_PLAT_STR___i386__ \
287 TF_PLAT_STR___i686__ \
288 TF_PLAT_STR___ia64__ \
289 TF_PLAT_STR___linux__ \
290 TF_PLAT_STR___mips32__ \
291 TF_PLAT_STR___mips64__ \
292 TF_PLAT_STR___powerpc64__ \
293 TF_PLAT_STR___powerpc__ \
294 TF_PLAT_STR___riscv___ \
295 TF_PLAT_STR___s390x__ \
296 TF_PLAT_STR___sparc64__ \
297 TF_PLAT_STR___sparc__ \
298 TF_PLAT_STR___x86_64__ \
299 TF_PLAT_STR_TERMINATOR_
300
301#if !defined(__x86_64__) && !defined(_M_X64) && \
302 !defined(__i386__) && !defined(_M_IX86)
303#undef TF_PLAT_STR_LIST___x86_64__
304#define TF_PLAT_STR_LIST___x86_64__()
305#endif
306#if !defined(__powerpc64__) && !defined(__powerpc__)
307#undef TF_PLAT_STR_LIST___powerpc64__
308#define TF_PLAT_STR_LIST___powerpc64__()
309#endif
310#if !defined(__aarch64__) && !defined(_M_ARM64) && \
311 !defined(__arm__) && !defined(_M_ARM)
312#undef TF_PLAT_STR_LIST___aarch64__
313#define TF_PLAT_STR_LIST___aarch64__()
314#endif
315
316// Macro to be used in each dynamically loadable library.
317//
318// The BSS global variable tf_cpu_option_global and the class
319// instance tf_cpu_option_avoid_omit_class are needed to prevent
320// compilers/linkers such as clang from omitting the static variable
321// tf_cpu_option[], which would otherwise appear to be unused. We cannot make
322// tf_cpu_option[] global, because we then might get multiply-defined symbols
323// if TF_PLAT_STR() is used twice in the same library.
324// (tf_cpu_option_global doesn't see such errors because it is
325// defined in BSS, so multiple definitions are combined by the linker.) gcc's
326// __attribute__((used)) is insufficient because it seems to be ignored by
327// linkers.
328#define TF_PLATFORM_STRINGS() \
329 static const char tf_cpu_option[] = \
330 TF_PLAT_STR_MAGIC_PREFIX_ "TF_PLAT_STR_VERSION=" TF_PLAT_STR_VERSION_ \
331 TF_PLAT_STR_LIST___x86_64__() \
332 TF_PLAT_STR_LIST___powerpc64__() \
333 TF_PLAT_STR_LIST___aarch64__() \
334 TF_PLAT_STR_LIST___generic__() \
335 ; \
336 const char *tf_cpu_option_global; \
337 namespace { \
338 class TFCPUOptionHelper { \
339 public: \
340 TFCPUOptionHelper() { \
341 /* Compilers/linkers remove unused variables aggressively. The */ \
342 /* following gyrations subvert most such optimizations. */ \
343 tf_cpu_option_global = tf_cpu_option; \
344 /* Nothing is printed because the string starts with a nul. */ \
345 printf("%s%s", tf_cpu_option, ""); \
346 } \
347 } tf_cpu_option_avoid_omit_class; \
348 } /* anonymous namespace */
349// clang-format on
350
351namespace tensorflow {
352
353// Retrieves the platform strings from the file at the given path and appends
354// them to the given vector. If the returned int is non-zero, an error occurred
355// reading the file and vector may or may not be modified. The returned error
356// code is suitable for use with strerror().
357int GetPlatformStrings(const std::string& path,
358 std::vector<std::string>* found);
359
360} // namespace tensorflow
361
362#endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
363