1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/util/matmul_autotune.h" |
17 | |
18 | #include "tensorflow/core/framework/types.h" |
19 | #include "tensorflow/core/lib/core/stringpiece.h" |
20 | #include "tensorflow/core/util/env_var.h" |
21 | |
22 | namespace tensorflow { |
23 | bool MatmulAutotuneEnable() { |
24 | bool value; |
25 | Status status = |
26 | ReadBoolFromEnvVar("TF_MATMUL_AUTOTUNE_ENABLE" , false, &value); |
27 | if (!status.ok()) { |
28 | LOG(ERROR) << status.error_message(); |
29 | } |
30 | return value; |
31 | } |
32 | |
33 | bool MatmulDoFP32ComputationFP16Input() { |
34 | bool value; |
35 | // Feedback from NVIDIA: the "true floating point 16" compute capability is |
36 | // absent from compute capability SM 5.2. The native 16 bit floating point |
37 | // computation was introduced in SM 5.3 and higher compute capability. So |
38 | // for compatibility, set this to be true by default for now. |
39 | // TODO(yangzihao): In the future, we need to return three possibilities: |
40 | // user-set-true, user-set-false, user-no-setting. In the calling sites, |
41 | // check the compatibilities. Note that user-set-false with compute |
42 | // capability <= 5.2 will cause an error in the later cublasGemmEx() call. |
43 | Status status = |
44 | ReadBoolFromEnvVar("TF_FP16_MATMUL_USE_FP32_COMPUTE" , true, &value); |
45 | if (!status.ok()) { |
46 | LOG(ERROR) << status.error_message(); |
47 | } |
48 | return value; |
49 | } |
50 | |
51 | } // namespace tensorflow |
52 | |