1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_TSL_PLATFORM_PREFETCH_H_ |
17 | #define TENSORFLOW_TSL_PLATFORM_PREFETCH_H_ |
18 | |
19 | #include "tensorflow/tsl/platform/platform.h" |
20 | |
21 | namespace tsl { |
22 | namespace port { |
23 | |
24 | // Prefetching support |
25 | // |
26 | // Defined behavior on some of the uarchs: |
27 | // PREFETCH_HINT_T0: |
28 | // prefetch to all levels of the hierarchy (except on p4: prefetch to L2) |
29 | // PREFETCH_HINT_NTA: |
30 | // p4: fetch to L2, but limit to 1 way (out of the 8 ways) |
31 | // core: skip L2, go directly to L1 |
32 | // k8 rev E and later: skip L2, can go to either of the 2-ways in L1 |
33 | enum PrefetchHint { |
34 | PREFETCH_HINT_T0 = 3, // More temporal locality |
35 | PREFETCH_HINT_T1 = 2, |
36 | PREFETCH_HINT_T2 = 1, // Less temporal locality |
37 | PREFETCH_HINT_NTA = 0 // No temporal locality |
38 | }; |
39 | template <PrefetchHint hint> |
40 | void prefetch(const void* x); |
41 | |
42 | // --------------------------------------------------------------------------- |
43 | // Inline implementation |
44 | // --------------------------------------------------------------------------- |
45 | template <PrefetchHint hint> |
46 | inline void prefetch(const void* x) { |
47 | // Check of COMPILER_GCC macro below is kept only for backward-compatibility |
48 | // reasons. COMPILER_GCC3 is the macro that actually enables prefetch. |
49 | #if defined(__llvm__) || defined(COMPILER_GCC) || defined(COMPILER_GCC3) |
50 | __builtin_prefetch(x, 0, hint); |
51 | #else |
52 | // You get no effect. Feel free to add more sections above. |
53 | #endif |
54 | } |
55 | |
56 | } // namespace port |
57 | } // namespace tsl |
58 | |
59 | #endif // TENSORFLOW_TSL_PLATFORM_PREFETCH_H_ |
60 | |