1/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_
17#define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_
18
19#include <unordered_map>
20
21#include "tensorflow/core/common_runtime/device.h"
22#include "tensorflow/core/distributed_runtime/call_options.h"
23#include "tensorflow/core/distributed_runtime/master_env.h"
24#include "tensorflow/core/distributed_runtime/master_session.h"
25#include "tensorflow/core/distributed_runtime/recent_request_ids.h"
26#include "tensorflow/core/lib/core/notification.h"
27#include "tensorflow/core/lib/gtl/map_util.h"
28#include "tensorflow/core/platform/macros.h"
29#include "tensorflow/core/platform/mutex.h"
30#include "tensorflow/core/platform/types.h"
31#include "tensorflow/core/protobuf/master.pb.h"
32#include "tensorflow/core/util/util.h"
33
34namespace tensorflow {
35
36class Master {
37 public:
38 explicit Master(MasterEnv* env, double session_gc_seconds);
39 virtual ~Master();
40
41 // Convenient typedef for a closure passing a Status.
42 typedef std::function<void(const Status&)> MyClosure;
43
44 void CreateSession(const CreateSessionRequest* req,
45 CreateSessionResponse* resp, MyClosure done);
46
47 void ExtendSession(const ExtendSessionRequest* req,
48 ExtendSessionResponse* resp, MyClosure done);
49
50 void PartialRunSetup(const PartialRunSetupRequest* req,
51 PartialRunSetupResponse* resp, MyClosure done);
52
53 void RunStep(CallOptions* opts, const RunStepRequestWrapper* req,
54 MutableRunStepResponseWrapper* resp, MyClosure done);
55
56 void CloseSession(const CloseSessionRequest* req, CloseSessionResponse* resp,
57 MyClosure done);
58
59 void ListDevices(const ListDevicesRequest* req, ListDevicesResponse* resp,
60 MyClosure done);
61
62 // See tensorflow::Reset() and the comment on ResetRequest.
63 void Reset(const ResetRequest* req, ResetResponse* resp, MyClosure done);
64
65 void MakeCallable(const MakeCallableRequest* req, MakeCallableResponse* resp,
66 MyClosure done);
67 void RunCallable(CallOptions* opts, const RunCallableRequest* req,
68 RunCallableResponse* resp, MyClosure done);
69 void ReleaseCallable(const ReleaseCallableRequest* req,
70 ReleaseCallableResponse* resp, MyClosure done);
71
72 private:
73 typedef Master ME;
74
75 // Not owned.
76 MasterEnv* env_ = nullptr;
77
78 // Owned.
79 mutex mu_;
80
81 // shutdown_ is set to true by the dtor.
82 condition_variable shutdown_cv_;
83 bool shutdown_ TF_GUARDED_BY(mu_) = false;
84 Thread* gc_thread_;
85
86 // Maps session handles to sessions.
87 std::unordered_map<string, MasterSession*> sessions_ TF_GUARDED_BY(mu_);
88
89 // Moving average of step times.
90 MovingAverage last_1000_steps_ TF_GUARDED_BY(mu_);
91
92 // Cumulative number of steps executed.
93 int64_t step_count_ TF_GUARDED_BY(mu_);
94
95 // If a session is not active for this many seconds, it will be
96 // closed automatically.
97 const double session_gc_seconds_;
98
99 // Used to track ids for incoming requests so we can detect duplicates.
100 RecentRequestIds recent_request_ids_;
101
102 // Call CleanupAll on all workers.
103 void CleanupWorkers(const ResetRequest& reset);
104
105 // Cleanup unused session.
106 void GC();
107
108 // Find master session by session handle, and increments the reference count
109 // on the returned MasterSession if not null.
110 MasterSession* FindMasterSession(const string& handle);
111
112 TF_DISALLOW_COPY_AND_ASSIGN(Master);
113};
114
115} // namespace tensorflow
116
117#endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_
118