1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_REMOTE_DEVICE_H_ |
17 | #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_REMOTE_DEVICE_H_ |
18 | |
19 | #include <functional> |
20 | #include <string> |
21 | #include <vector> |
22 | |
23 | #include "tensorflow/core/lib/core/status.h" |
24 | #include "tensorflow/core/platform/protobuf.h" |
25 | #include "tensorflow/core/platform/stringpiece.h" |
26 | |
27 | namespace tsl { |
28 | class Env; |
29 | } // namespace tsl |
30 | namespace tensorflow { |
31 | using Env = tsl::Env; |
32 | class DeviceAttributes; |
33 | class Device; |
34 | class WorkerCacheInterface; |
35 | |
36 | // This callback should have the same definition as DeviceMgr::LookupDevice |
37 | // It assigns *device with pointer to Device of the given 'name', where 'name' |
38 | // is either a full device name, or just the replica-local suffix. |
39 | typedef std::function<Status(StringPiece name, Device** device)> |
40 | LookupLocalDevice; |
41 | |
42 | // Creates Remote Devices for the provided device attributes. Helpful when the |
43 | // list of attributes is known, and doesn't need to be discovered via RPC. |
44 | void AsRemoteDevices( |
45 | Env* env, |
46 | const protobuf::RepeatedPtrField<DeviceAttributes>& device_attributes, |
47 | LookupLocalDevice lookup_local_device, |
48 | std::vector<std::unique_ptr<Device>>* remote_devices); |
49 | |
50 | // NewRemoteDevices discovers available devices on the |
51 | // 'worker_name'. The implementation uses 'channel_cache' to |
52 | // discover how to communicate with the 'worker_name' (via gRPC, for |
53 | // example). |
54 | // |
55 | // NewRemoteDevices does not block. |
56 | // |
57 | // On success, the 'done' callback is given the OK status and a vector |
58 | // of Device*. The caller should take ownership of these devices. |
59 | // |
60 | // Otherwise, the 'done' callback is given an error status and the |
61 | // vector is empty. |
62 | typedef std::function<void(const Status&, std::vector<Device*>*)> |
63 | NewRemoteDevicesDone; |
64 | void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache, |
65 | const string& worker_name, NewRemoteDevicesDone done); |
66 | |
67 | // Create Remote Device based on the given attributes. |
68 | std::unique_ptr<Device> NewRemoteDevice(Env* env, |
69 | DeviceAttributes device_attribute); |
70 | } // namespace tensorflow |
71 | |
72 | #endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_REMOTE_DEVICE_H_ |
73 | |