1 | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PERMUTER_H_ |
16 | #define TENSORFLOW_CORE_COMMON_RUNTIME_PERMUTER_H_ |
17 | |
18 | #include <deque> |
19 | #include <memory> |
20 | #include <string> |
21 | #include <vector> |
22 | |
23 | #include "tensorflow/core/common_runtime/base_collective_executor.h" |
24 | #include "tensorflow/core/framework/collective.h" |
25 | |
26 | namespace tensorflow { |
27 | class Device; |
28 | |
29 | // Implementation of collective permute. |
30 | // |
31 | // Permute takes |
32 | // - a list of devices participating in the collective |
33 | // - a permutation as a list of integers. |
34 | // - a tensor |
35 | // |
36 | // The list of devices replaces the need for group_key and group_size. The |
37 | // number of inputs only scales with the number of devices within one group. |
38 | // |
39 | // The integers in the permutation are based on indices of the list of devices. |
40 | // E.g. devices = {"GPU:0", "GPU:1"} and permutation = {1,0} means |
41 | // - devices[0] sends to devices[permutation[0]] and |
42 | // - devices[1] sends to devices[permutation[1]]. |
43 | // |
44 | // Each device sends exactly one tensor and receives exactly one tensor. |
45 | class Permuter : public CollectiveImplementationInterface { |
46 | public: |
47 | Permuter(); |
48 | ~Permuter() override = default; |
49 | |
50 | void Run(StatusCallback done) override; |
51 | |
52 | Status InitializeCollectiveParams(CollectiveParams* col_params) override { |
53 | return OkStatus(); |
54 | } |
55 | |
56 | // Initializes members of CollectiveContext not yet initialized, i.e. device |
57 | // and device_locality. Also saves the CollectiveContext in this object. |
58 | Status InitializeCollectiveContext( |
59 | std::shared_ptr<CollectiveContext> col_ctx) override; |
60 | |
61 | private: |
62 | std::shared_ptr<CollectiveContext> col_ctx_; |
63 | const CollectiveParams* col_params_; // Not owned |
64 | StatusCallback done_; |
65 | mutex mu_; |
66 | Status status_ TF_GUARDED_BY(mu_); |
67 | int counter_ TF_GUARDED_BY(mu_); |
68 | |
69 | void DispatchSend(int src_rank, int target_rank, const Tensor* tensor, |
70 | const StatusCallback& done); |
71 | |
72 | void DispatchRecv(int src_rank, int target_rank, Tensor* tensor, |
73 | const StatusCallback& done); |
74 | |
75 | // Atomically increments counter_ by one for sending, one for receiving. |
76 | // Invokes done when counter_ reaches 2. |
77 | // The purpose of checking counter_ is to ensure that done_ is called once. |
78 | StatusCallback CheckCounterAndCallDone(); |
79 | }; |
80 | |
81 | } // namespace tensorflow |
82 | #endif // TENSORFLOW_CORE_COMMON_RUNTIME_PERMUTER_H_ |
83 | |