1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/tsl/util/use_cudnn.h" |
17 | |
18 | #include <cstdint> |
19 | |
20 | #include "tensorflow/tsl/platform/str_util.h" |
21 | #include "tensorflow/tsl/platform/stringpiece.h" |
22 | #include "tensorflow/tsl/util/env_var.h" |
23 | |
24 | #if GOOGLE_CUDA |
25 | #include "third_party/gpus/cudnn/cudnn.h" |
26 | #endif // GOOGLE_CUDA |
27 | |
28 | namespace tsl { |
29 | |
30 | #define ADD_BOOL_CUDNN_FLAG(func_name, flag_name, default_value) \ |
31 | bool func_name() { \ |
32 | bool value = default_value; \ |
33 | Status status = ReadBoolFromEnvVar(#flag_name, default_value, &value); \ |
34 | if (!status.ok()) { \ |
35 | LOG(ERROR) << status; \ |
36 | } \ |
37 | return value; \ |
38 | } |
39 | |
40 | bool CudnnUseFrontend() { |
41 | static bool result = [] { |
42 | bool value = false; |
43 | #if GOOGLE_CUDA |
44 | if (CUDNN_VERSION >= 8100) { |
45 | // cuDNN 8.1.0 + the frontend has issues regarding fused convolution. |
46 | Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_FRONTEND" , |
47 | CUDNN_VERSION >= 8200, &value); |
48 | if (!status.ok()) { |
49 | LOG(ERROR) << status; |
50 | } |
51 | } |
52 | #endif // GOOGLE_CUDA |
53 | return value; |
54 | }(); |
55 | return result; |
56 | } |
57 | |
58 | // Whether to enable Cudnn runtime compiled kernels which are able to support |
59 | // more general fusion patterns but might increase the warmup time. |
60 | // TODO(kaixih@nvidia): we can make it default when Cudnn further improves the |
61 | // runtime compilation overhead. |
62 | bool CudnnUseRuntimeFusion() { |
63 | static bool result = [] { |
64 | bool value = false; |
65 | #if GOOGLE_CUDA |
66 | if (CUDNN_VERSION >= 8400) { |
67 | Status status = |
68 | ReadBoolFromEnvVar("TF_CUDNN_USE_RUNTIME_FUSION" , false, &value); |
69 | if (!status.ok()) { |
70 | LOG(ERROR) << status; |
71 | } |
72 | } |
73 | #endif // GOOGLE_CUDA |
74 | return value; |
75 | }(); |
76 | return result; |
77 | } |
78 | |
79 | ADD_BOOL_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true); |
80 | // Whether to auto-tuning Cudnn RNN forward and backward pass to pick |
81 | // statistically the best cudnnRNNAlgo_t and cudnnMathType_t. |
82 | // The flag is disabled when TF_DEBUG_CUDNN_RNN is turned on. |
83 | ADD_BOOL_CUDNN_FLAG(CudnnRnnUseAutotune, TF_CUDNN_RNN_USE_AUTOTUNE, true); |
84 | ADD_BOOL_CUDNN_FLAG(CudnnDisableConv1x1Optimization, |
85 | TF_CUDNN_DISABLE_CONV_1X1_OPTIMIZATION, false); |
86 | |
87 | // Whether to run Cudnn RNN forward and backward in debug mode, where users can |
88 | // force a specified cudnnRNNAlgo_t and cudnnMathType_t, when used together with |
89 | // the following two env vars: |
90 | // TF_DEBUG_CUDNN_RNN_USE_TENSOR_OPS |
91 | // TF_DEBUG_CUDNN_RNN_ALGO |
92 | // By default it is disabled and only intended for testing and profiling. |
93 | ADD_BOOL_CUDNN_FLAG(DebugCudnnRnn, TF_DEBUG_CUDNN_RNN, false); |
94 | // If using TENSOR_OP_MATH in Cudnn RNN for both forward and backward pass. Only |
95 | // effective when TF_DEBUG_CUDNN_RNN is true. |
96 | // Note none of the persistent RNN algorithm support TENSOR_OP_MATH before |
97 | // Cudnn 7.1. See Nvidia Cudnn manual for more details. |
98 | ADD_BOOL_CUDNN_FLAG(DebugCudnnRnnUseTensorOps, |
99 | TF_DEBUG_CUDNN_RNN_USE_TENSOR_OPS, false); |
100 | #undef ADD_BOOL_CUDNN_FLAG |
101 | |
102 | #define ADD_INT64_CUDNN_FLAG(func_name, flag_name, default_value) \ |
103 | int64_t func_name() { \ |
104 | int64_t value = default_value; \ |
105 | Status status = ReadInt64FromEnvVar(#flag_name, default_value, &value); \ |
106 | if (!status.ok()) { \ |
107 | LOG(ERROR) << status; \ |
108 | } \ |
109 | return value; \ |
110 | } |
111 | // Cudnn RNN algorithm to use for both forward and backward pass. Only effective |
112 | // when TF_DEBUG_CUDNN_RNN is true. See Nvidia Cudnn manual for allowed |
113 | // cudnnRNNAlgo_t. |
114 | ADD_INT64_CUDNN_FLAG(DebugCudnnRnnAlgo, TF_DEBUG_CUDNN_RNN_ALGO, -1); |
115 | #undef ADD_INT64_CUDNN_FLAG |
116 | |
117 | bool ShouldCudnnGroupedConvolutionBeUsed(const int32_t filter_rows, |
118 | const int32_t filter_cols, |
119 | const int32_t in_depth, |
120 | const int32_t out_depth) { |
121 | return in_depth == out_depth && filter_rows == filter_cols && |
122 | (filter_rows == 1 || filter_rows == 3 || filter_rows == 5 || |
123 | filter_rows == 7); |
124 | } |
125 | |
126 | } // namespace tsl |
127 | |