1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ |
17 | #define TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ |
18 | #ifdef INTEL_MKL |
19 | |
20 | #include "absl/base/call_once.h" |
21 | #include "tensorflow/core/framework/op_kernel.h" |
22 | #include "tensorflow/core/framework/types.pb.h" |
23 | #include "tensorflow/core/graph/graph.h" |
24 | #include "tensorflow/core/lib/core/status.h" |
25 | #include "tensorflow/core/platform/cpu_info.h" |
26 | #include "tensorflow/core/util/env_var.h" |
27 | |
28 | namespace tensorflow { |
29 | // Since our ops are going to produce and also consume N addition tensors |
30 | // (Mkl) for N Tensorflow tensors, we can have following different |
31 | // orderings among these 2N tensors. |
32 | // |
33 | // E.g., for Tensorflow tensors A, B, and C, our ops will produce and |
34 | // consume A_m, B_m, and C_m additionally. |
35 | // |
36 | // INTERLEAVED: in this case 2N tensors are interleaved. So for above |
37 | // example, the ordering looks like: A, A_m, B, B_m, C, C_m. |
38 | // |
39 | // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed |
40 | // by N Mkl tensors. So for above example, the ordering looks |
41 | // like: A, B, C, A_m, B_m, C_m |
42 | // |
43 | // Following APIs map index of original Tensorflow tensors to their |
44 | // appropriate position based on selected ordering. For contiguous ordering, |
45 | // we need to know the total number of tensors (parameter total). |
46 | // |
47 | typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering; |
48 | // NOTE: Currently, we use contiguous ordering. If you change this, then you |
49 | // would need to change Mkl op definitions in nn_ops.cc. |
50 | static const MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS; |
51 | |
52 | // Get index of MetaData tensor from index 'n' of Data tensor. |
53 | inline int DataIndexToMetaDataIndex(int n, int total_tensors) { |
54 | if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { |
55 | // For interleaved ordering, Mkl tensor follows immediately after |
56 | // Tensorflow tensor. |
57 | return n + 1; |
58 | } else { |
59 | CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); |
60 | // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away. |
61 | return n + total_tensors / 2; |
62 | } |
63 | } |
64 | |
65 | int inline GetTensorDataIndex(int n, int total_tensors) { |
66 | if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { |
67 | return 2 * n; // index corresponding to nth input/output tensor |
68 | } else { |
69 | CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); |
70 | return n; |
71 | } |
72 | } |
73 | |
74 | int inline GetTensorMetaDataIndex(int n, int total_tensors) { |
75 | // Get index for TensorData first and then use mapping function |
76 | // to get TensorMetaData index from TensorData index. |
77 | int tidx = GetTensorDataIndex(n, total_tensors); |
78 | return DataIndexToMetaDataIndex(tidx, total_tensors); |
79 | } |
80 | |
81 | // check if the control between src and dst nodes already exists |
82 | bool inline DoesControlEdgeExist(const Node* src, const Node* dst) { |
83 | for (const Edge* edge : src->out_edges()) { |
84 | if (edge->IsControlEdge() && edge->dst() == dst) { |
85 | return true; |
86 | } |
87 | } |
88 | return false; |
89 | } |
90 | |
91 | // In TF 2.8, oneDNN blocked format will not be supported. |
92 | // TODO(intel_tf): Cleanup shall be done in future: |
93 | // (1) Remove this method; |
94 | // (2) Update related code wherever it is called. |
95 | bool inline NativeFormatEnabled() { |
96 | return true; |
97 | } |
98 | |
99 | // Check if the data_format attribute in the node def represents 5D tensor |
100 | bool inline Check5DFormat(const NodeDef& ndef) { |
101 | string data_format; |
102 | TF_CHECK_OK(GetNodeAttr(ndef, "data_format" , &data_format)); |
103 | if (data_format.compare("NCDHW" ) == 0 || data_format.compare("NDHWC" ) == 0) { |
104 | return true; |
105 | } |
106 | return false; |
107 | } |
108 | |
109 | namespace mkl_op_registry { |
110 | // MKL operators whose kernels are registered with 'MklLayoutDependentOp' label |
111 | // (e.g., MklConv2D) understand input tensors in MKL layout. These operators |
112 | // get additional meta-tensors for actual input tensors. |
113 | static const char* kMklLayoutDependentOpLabel = "MklLayoutDependentOp" ; |
114 | static const char* kMklLayoutDependentOpLabelPattern = |
115 | "label='MklLayoutDependentOp'" ; |
116 | // MKL operators whose kernels are registered with 'MklNameChangeOp' label |
117 | // (e.g., MklMatMul, MklTranspose) do not understand input tensors in MKL |
118 | // layout. These operators do not get additional meta-tensors. The signatures of |
119 | // these operators are the same as the original TensorFlow operators that they |
120 | // correspond to. So these ops just go through a name change during graph |
121 | // rewrite pass. |
122 | static const char* kMklNameChangeOpLabel = "MklNameChangeOp" ; |
123 | static const char* kMklNameChangeOpLabelPattern = "label='MklNameChangeOp'" ; |
124 | static const char* kMklQuantizedOpLabel = "QuantizedMklOp" ; |
125 | static const char* kMklQuantizedOpLabelPattern = "label='QuantizedMklOp'" ; |
126 | |
127 | // Prefix that we add to Tensorflow op name to construct Mkl op name. |
128 | static const char* const kMklOpPrefix = "_Mkl" ; |
129 | // TODO(intel-tf): PR review feedback (penpornk) |
130 | // Can we add eager_mode (or is_eager) as an op attribute instead? |
131 | // This way we don't need to rename the op just to pass eager_mode |
132 | // through template parameter. |
133 | static const char* const kMklEagerOpPrefix = "_MklEager" ; |
134 | |
135 | // Prefix that we add to TF op name to construct MKL op that does not |
136 | // depend on layout propagation. It will be used in both Eager and graph |
137 | // modes unless there is a reason to have additional op name with |
138 | // _MklEager prefix. |
139 | static const char* const kMklNativeOpPrefix = "_MklNative" ; |
140 | |
141 | // Get the name of Mkl Native (does not depend on layout propagation) op |
142 | // from original TensorFlow op. |
143 | inline string GetMklNativeOpName(const string& name) { |
144 | // There are few operators that don't depend on layout propagation but are |
145 | // prefixed with _Mkl instead of _MklNative. |
146 | bool result = |
147 | (0 == name.compare("ConjugateTranspose" ) || |
148 | 0 == name.compare("BatchMatMul" ) || 0 == name.compare("BatchMatMulV2" ) || |
149 | 0 == name.compare("Einsum" ) || 0 == name.compare("MatMul" ) || |
150 | 0 == name.compare("Transpose" ) || 0 == name.compare("QuantizeV2" ) || |
151 | 0 == name.compare("Dequantize" ) || 0 == name.rfind("Quantized" , 0)); |
152 | |
153 | if (result) { |
154 | return string(kMklOpPrefix) + name; |
155 | } else { |
156 | return string(kMklNativeOpPrefix) + name; |
157 | } |
158 | } |
159 | |
160 | // Get the name of Mkl op from original TensorFlow op |
161 | // We prefix the original op with _Mkl or _MklNative to get Mkl op. |
162 | inline string GetMklOpName(const string& name) { |
163 | if (!NativeFormatEnabled()) { |
164 | return string(kMklOpPrefix) + name; |
165 | } else { |
166 | return GetMklNativeOpName(name); |
167 | } |
168 | } |
169 | |
170 | // Get the name of Mkl Eager op from original TensorFlow op |
171 | // We prefix 'MklEager' to the original op to get Mkl Eager op. |
172 | inline string GetMklEagerOpName(const string& name) { |
173 | return string(kMklEagerOpPrefix) + name; |
174 | } |
175 | |
176 | static inline bool IsBF16SupportedByOneDNNOnThisCPU() { |
177 | return port::TestCPUFeature(port::CPUFeature::AVX512F); |
178 | } |
179 | |
180 | static inline void BF16UnsupportedWarning() { |
181 | static absl::once_flag cpu_bfloat16_warn_once_flag; |
182 | absl::call_once(cpu_bfloat16_warn_once_flag, [] { |
183 | LOG(ERROR) << "oneDNN BFloat16 support are only on platforms with AVX512. " |
184 | "Falling back to default implementation if present." ; |
185 | }); |
186 | } |
187 | |
188 | // Check whether opname with type T is registered as MKL operator |
189 | // that will go through name change or layout change pass. |
190 | // |
191 | // @input: name of the op |
192 | // @input: T datatype to be used for checking op |
193 | // @return: true if opname is registered as MKL op that will go through name |
194 | // change or layout change pass; false otherwise |
195 | static inline bool IsMklOp(const string& op_name, DataType T, |
196 | bool is_native_op) { |
197 | string label = is_native_op ? kMklNameChangeOpLabelPattern |
198 | : kMklLayoutDependentOpLabelPattern; |
199 | string registered_kernels_key = op_name + label + std::to_string(T); |
200 | thread_local static auto* registered_kernels_map = |
201 | new absl::flat_hash_map<string, bool>(); |
202 | auto kernel_element = registered_kernels_map->find(registered_kernels_key); |
203 | bool kernel_registered = false; |
204 | |
205 | if (kernel_element == registered_kernels_map->end()) { |
206 | string registered_kernels = KernelsRegisteredForOp(op_name); |
207 | // String returned by KernelsRegisteredForOp looks like below: |
208 | // |
209 | // Op = _MklMatMul, kernels = |
210 | // device='CPU'; label='MklNameChangeOp'; T in [DT_COMPLEX128] |
211 | // device='CPU'; label='MklNameChangeOp'; T in [DT_COMPLEX64] |
212 | // device='CPU'; label='MklNameChangeOp'; T in [DT_DOUBLE] |
213 | // device='CPU'; label='MklNameChangeOp'; T in [DT_FLOAT] |
214 | |
215 | if (is_native_op && |
216 | registered_kernels.find(kMklQuantizedOpLabelPattern) != string::npos) { |
217 | // Restrict quantized ops to QUINT8, QINT8 and DT_QINT32 |
218 | kernel_registered = (T == DT_QUINT8 || T == DT_QINT8 || T == DT_QINT32); |
219 | } |
220 | |
221 | // Now we just construct a search string to match what we are looking for. |
222 | string search_string = |
223 | label + string("; T in [" ) + DataType_Name(T) + string("]" ); |
224 | |
225 | if (registered_kernels.find(search_string) != string::npos) { |
226 | kernel_registered = is_native_op |
227 | ? (T == DT_COMPLEX128 || T == DT_COMPLEX64 || |
228 | T == DT_DOUBLE || T == DT_FLOAT) |
229 | : T == DT_FLOAT; |
230 | if (!kernel_registered) { |
231 | if (T == DT_BFLOAT16) { |
232 | if (IsBF16SupportedByOneDNNOnThisCPU()) { |
233 | kernel_registered = true; |
234 | } else { |
235 | // Restrict bfloat16 ops to platforms with at least AVX512 support, |
236 | // fall back to Eigen implementation otherwise. |
237 | BF16UnsupportedWarning(); |
238 | kernel_registered = false; |
239 | } |
240 | } |
241 | } |
242 | } |
243 | registered_kernels_map->insert( |
244 | std::make_pair(registered_kernels_key, kernel_registered)); |
245 | } else { |
246 | // Kernel is visited at least once. Return stored registration result. |
247 | kernel_registered = kernel_element->second; |
248 | } |
249 | return kernel_registered; |
250 | } |
251 | |
252 | // TODO(intel-tf): QuantizedConv2D is registered with input: QUINT8 |
253 | // filter:QINT8 for oneDNN integration. First a dummy kernel is created |
254 | // and then it is replaced by an actual kernel. |
255 | static inline bool IsMklQuantizedOp(const string& op_name, DataType Tinput, |
256 | DataType Tfilter) { |
257 | // Restrict quantized ops to QUINT8 and QINT8 for now |
258 | if (IsMklOp(op_name, Tinput, kMklQuantizedOpLabelPattern)) { |
259 | return (Tfilter == DT_QINT8); |
260 | } |
261 | return false; |
262 | } |
263 | |
264 | // Check if the operator with 'op_name' and type 'T' is an MKL operator that |
265 | // will either understand input tensors in MKL layout or will go through name |
266 | // rewrite that some operators go through. |
267 | static inline bool IsMklOp(const string& op_name, DataType T) { |
268 | return IsMklOp(op_name, T, true) || IsMklOp(op_name, T, false); |
269 | } |
270 | |
271 | static inline bool IsMklOp(const Node* n) { |
272 | DataType T; |
273 | return GetNodeAttr(n->def(), "T" , &T).ok() && IsMklOp(n->type_string(), T); |
274 | } |
275 | |
276 | // Check whether opname with type T is registered as MKL-compliant and |
277 | // is element-wise. |
278 | // |
279 | // @input: name of the op |
280 | // @input: T datatype to be used for checking op |
281 | // @return: true if opname is registered as element-wise Mkl op; |
282 | // false otherwise |
283 | static inline bool IsMklElementWiseOp(const string& op_name, DataType T) { |
284 | if (!IsMklOp(op_name, T)) { |
285 | return false; |
286 | } |
287 | bool result = (0 == op_name.compare(GetMklOpName("Add" )) || |
288 | 0 == op_name.compare(GetMklOpName("AddV2" )) || |
289 | 0 == op_name.compare(GetMklOpName("Sub" )) || |
290 | 0 == op_name.compare(GetMklOpName("Mul" )) || |
291 | 0 == op_name.compare(GetMklOpName("Maximum" )) || |
292 | 0 == op_name.compare(GetMklOpName("SquaredDifference" ))); |
293 | |
294 | return result; |
295 | } |
296 | } // namespace mkl_op_registry |
297 | } // namespace tensorflow |
298 | #endif // INTEL_MKL |
299 | #endif // TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ |
300 | |