1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include "tensorflow/tsl/util/use_cudnn.h"
17
18#include <cstdint>
19
20#include "tensorflow/tsl/platform/str_util.h"
21#include "tensorflow/tsl/platform/stringpiece.h"
22#include "tensorflow/tsl/util/env_var.h"
23
24#if GOOGLE_CUDA
25#include "third_party/gpus/cudnn/cudnn.h"
26#endif // GOOGLE_CUDA
27
28namespace tsl {
29
30#define ADD_BOOL_CUDNN_FLAG(func_name, flag_name, default_value) \
31 bool func_name() { \
32 bool value = default_value; \
33 Status status = ReadBoolFromEnvVar(#flag_name, default_value, &value); \
34 if (!status.ok()) { \
35 LOG(ERROR) << status; \
36 } \
37 return value; \
38 }
39
40bool CudnnUseFrontend() {
41 static bool result = [] {
42 bool value = false;
43#if GOOGLE_CUDA
44 if (CUDNN_VERSION >= 8100) {
45 // cuDNN 8.1.0 + the frontend has issues regarding fused convolution.
46 Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_FRONTEND",
47 CUDNN_VERSION >= 8200, &value);
48 if (!status.ok()) {
49 LOG(ERROR) << status;
50 }
51 }
52#endif // GOOGLE_CUDA
53 return value;
54 }();
55 return result;
56}
57
58// Whether to enable Cudnn runtime compiled kernels which are able to support
59// more general fusion patterns but might increase the warmup time.
60// TODO(kaixih@nvidia): we can make it default when Cudnn further improves the
61// runtime compilation overhead.
62bool CudnnUseRuntimeFusion() {
63 static bool result = [] {
64 bool value = false;
65#if GOOGLE_CUDA
66 if (CUDNN_VERSION >= 8400) {
67 Status status =
68 ReadBoolFromEnvVar("TF_CUDNN_USE_RUNTIME_FUSION", false, &value);
69 if (!status.ok()) {
70 LOG(ERROR) << status;
71 }
72 }
73#endif // GOOGLE_CUDA
74 return value;
75 }();
76 return result;
77}
78
79ADD_BOOL_CUDNN_FLAG(CudnnUseAutotune, TF_CUDNN_USE_AUTOTUNE, true);
80// Whether to auto-tuning Cudnn RNN forward and backward pass to pick
81// statistically the best cudnnRNNAlgo_t and cudnnMathType_t.
82// The flag is disabled when TF_DEBUG_CUDNN_RNN is turned on.
83ADD_BOOL_CUDNN_FLAG(CudnnRnnUseAutotune, TF_CUDNN_RNN_USE_AUTOTUNE, true);
84ADD_BOOL_CUDNN_FLAG(CudnnDisableConv1x1Optimization,
85 TF_CUDNN_DISABLE_CONV_1X1_OPTIMIZATION, false);
86
87// Whether to run Cudnn RNN forward and backward in debug mode, where users can
88// force a specified cudnnRNNAlgo_t and cudnnMathType_t, when used together with
89// the following two env vars:
90// TF_DEBUG_CUDNN_RNN_USE_TENSOR_OPS
91// TF_DEBUG_CUDNN_RNN_ALGO
92// By default it is disabled and only intended for testing and profiling.
93ADD_BOOL_CUDNN_FLAG(DebugCudnnRnn, TF_DEBUG_CUDNN_RNN, false);
94// If using TENSOR_OP_MATH in Cudnn RNN for both forward and backward pass. Only
95// effective when TF_DEBUG_CUDNN_RNN is true.
96// Note none of the persistent RNN algorithm support TENSOR_OP_MATH before
97// Cudnn 7.1. See Nvidia Cudnn manual for more details.
98ADD_BOOL_CUDNN_FLAG(DebugCudnnRnnUseTensorOps,
99 TF_DEBUG_CUDNN_RNN_USE_TENSOR_OPS, false);
100#undef ADD_BOOL_CUDNN_FLAG
101
102#define ADD_INT64_CUDNN_FLAG(func_name, flag_name, default_value) \
103 int64_t func_name() { \
104 int64_t value = default_value; \
105 Status status = ReadInt64FromEnvVar(#flag_name, default_value, &value); \
106 if (!status.ok()) { \
107 LOG(ERROR) << status; \
108 } \
109 return value; \
110 }
111// Cudnn RNN algorithm to use for both forward and backward pass. Only effective
112// when TF_DEBUG_CUDNN_RNN is true. See Nvidia Cudnn manual for allowed
113// cudnnRNNAlgo_t.
114ADD_INT64_CUDNN_FLAG(DebugCudnnRnnAlgo, TF_DEBUG_CUDNN_RNN_ALGO, -1);
115#undef ADD_INT64_CUDNN_FLAG
116
117bool ShouldCudnnGroupedConvolutionBeUsed(const int32_t filter_rows,
118 const int32_t filter_cols,
119 const int32_t in_depth,
120 const int32_t out_depth) {
121 return in_depth == out_depth && filter_rows == filter_cols &&
122 (filter_rows == 1 || filter_rows == 3 || filter_rows == 5 ||
123 filter_rows == 7);
124}
125
126} // namespace tsl
127