1 | /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_C_EAGER_IMMEDIATE_EXECUTION_DISTRIBUTED_MANAGER_H_ |
17 | #define TENSORFLOW_C_EAGER_IMMEDIATE_EXECUTION_DISTRIBUTED_MANAGER_H_ |
18 | |
19 | #include <string> |
20 | |
21 | #include "tensorflow/core/platform/status.h" |
22 | |
23 | namespace tensorflow { |
24 | class CoordinationServiceAgent; |
25 | class ImmediateExecutionContext; |
26 | class ServerDef; |
27 | class WorkerEnv; |
28 | class WorkerCacheInterface; |
29 | |
30 | class ImmediateExecutionDistributedManager { |
31 | public: |
32 | virtual ~ImmediateExecutionDistributedManager() {} |
33 | |
34 | // Set up distributed execution environment on local and remote tasks. |
35 | // When `reset_context` is true, initialize new cluster context state based on |
36 | // cluster configurations provided in `server_def`; otherwise, update existing |
37 | // context state with the provided `server_def`. |
38 | // Contexts created on remote tasks will be considered stale and garbage |
39 | // collected after `keep_alive_secs` of inactivity. |
40 | virtual Status SetOrUpdateServerDef(const ServerDef& server_def, |
41 | bool reset_context, |
42 | int keep_alive_secs) = 0; |
43 | |
44 | // Set up a multi-client distributed execution environment. Must be called on |
45 | // all tasks in the cluster. |
46 | // This call internally coordinates with other tasks to initialize the eager |
47 | // context and TF server for multi-client execution. |
48 | virtual Status EnableCollectiveOps(const ServerDef& server_def) = 0; |
49 | |
50 | // Check if the remote task is alive. |
51 | virtual Status CheckRemoteAlive(const std::string& remote_task_name, |
52 | bool* is_alive) = 0; |
53 | |
54 | // Get pointer to the coordination service agent instance. |
55 | virtual CoordinationServiceAgent* GetCoordinationServiceAgent() = 0; |
56 | }; |
57 | } // namespace tensorflow |
58 | |
59 | #endif // TENSORFLOW_C_EAGER_IMMEDIATE_EXECUTION_DISTRIBUTED_MANAGER_H_ |
60 | |