1 | /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/core/kernels/collective_nccl.h" |
16 | |
17 | #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
18 | |
19 | #include "tensorflow/core/common_runtime/collective_util.h" |
20 | #include "tensorflow/core/nccl/nccl_manager.h" |
21 | #include "tensorflow/core/platform/tracing.h" |
22 | #include "tensorflow/core/profiler/lib/traceme.h" |
23 | |
24 | namespace tensorflow { |
25 | |
26 | NcclBase::NcclBase(CollectiveType type, const string& name) |
27 | : type_(type), name_(name), col_ctx_(nullptr), col_params_(nullptr) {} |
28 | |
29 | Status NcclBase::InitializeCollectiveParams(CollectiveParams* col_params) { |
30 | if (type_ != col_params->instance.type) { |
31 | return errors::Internal("Expected initialized type " , type_, |
32 | " to match type in CollectiveParams " , |
33 | col_params->instance.type); |
34 | } |
35 | |
36 | const char* expected_name; |
37 | switch (type_) { |
38 | case REDUCTION_COLLECTIVE: |
39 | expected_name = "NcclReduce" ; |
40 | break; |
41 | case BROADCAST_COLLECTIVE: |
42 | expected_name = "NcclBroadcast" ; |
43 | break; |
44 | case GATHER_COLLECTIVE: |
45 | expected_name = "NcclGather" ; |
46 | break; |
47 | default: |
48 | return errors::Internal("Unexpected CollectiveType " , type_); |
49 | } |
50 | |
51 | if (expected_name != col_params->instance.impl_details.collective_name) { |
52 | return errors::Internal("Unexpected combination of collective type " , |
53 | col_params->instance.type, " and collective name " , |
54 | col_params->instance.impl_details.collective_name, |
55 | ", expected name " , expected_name); |
56 | } |
57 | |
58 | return OkStatus(); |
59 | } |
60 | |
61 | Status NcclBase::InitializeCollectiveContext( |
62 | std::shared_ptr<CollectiveContext> col_ctx) { |
63 | col_ctx_ = col_ctx; |
64 | col_params_ = col_ctx->col_params.get(); |
65 | return collective_util::InitializeDeviceAndLocality( |
66 | col_ctx->dev_mgr, col_ctx->device_name, &col_ctx->device, |
67 | &col_ctx->device_locality); |
68 | } |
69 | |
70 | } // namespace tensorflow |
71 | |
72 | #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM |
73 | |