1/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15#include "tensorflow/core/distributed_runtime/device_resolver_distributed.h"
16
17#include "tensorflow/core/common_runtime/device_mgr.h"
18#include "tensorflow/core/framework/device_attributes.pb.h"
19#include "tensorflow/core/platform/errors.h"
20
21namespace tensorflow {
22
23DeviceResolverDistributed::DeviceResolverDistributed(const DeviceMgr* dev_mgr) {
24 mutex_lock l(mu_);
25 for (Device* device : dev_mgr->ListDevices()) {
26 attr_table_[device->name()] = device->attributes();
27 }
28}
29
30Status DeviceResolverDistributed::GetDeviceAttributes(
31 const string& device, DeviceAttributes* attributes) {
32 mutex_lock l(mu_);
33 auto it = attr_table_.find(device);
34 if (it == attr_table_.end()) {
35 return errors::NotFound(device, " not found");
36 }
37 *attributes = it->second;
38 return OkStatus();
39}
40
41Status DeviceResolverDistributed::GetAllDeviceAttributes(
42 const string& task, std::vector<DeviceAttributes>* attributes) {
43 mutex_lock l(mu_);
44 attributes->clear();
45 for (const auto& it : attr_table_) {
46 const string& device_name = it.first;
47 if (DeviceNameUtils::IsSameAddressSpace(task, device_name)) {
48 attributes->push_back(it.second);
49 }
50 }
51 if (attributes->empty()) {
52 return errors::NotFound(task, " not found in the cache");
53 }
54 return OkStatus();
55}
56
57Status DeviceResolverDistributed::UpdateDeviceAttributes(
58 const std::vector<DeviceAttributes>& attributes) {
59 mutex_lock l(mu_);
60 for (const DeviceAttributes& attr : attributes) {
61 auto item = attr_table_.insert({attr.name(), attr});
62 auto it = item.first;
63 bool success = item.second;
64 // Returns error if the device already exists in the cache and has a
65 // different incarnation.
66 if (!success && it->second.incarnation() != attr.incarnation()) {
67 return errors::FailedPrecondition(
68 attr.name(),
69 "exists in cache with a different incarnation. "
70 "This usually means the remote worker has restarted");
71 }
72 }
73 return OkStatus();
74}
75
76} // namespace tensorflow
77