1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_ |
17 | #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_ |
18 | |
19 | #include <unordered_map> |
20 | |
21 | #include "tensorflow/core/common_runtime/device.h" |
22 | #include "tensorflow/core/distributed_runtime/call_options.h" |
23 | #include "tensorflow/core/distributed_runtime/master_env.h" |
24 | #include "tensorflow/core/distributed_runtime/master_session.h" |
25 | #include "tensorflow/core/distributed_runtime/recent_request_ids.h" |
26 | #include "tensorflow/core/lib/core/notification.h" |
27 | #include "tensorflow/core/lib/gtl/map_util.h" |
28 | #include "tensorflow/core/platform/macros.h" |
29 | #include "tensorflow/core/platform/mutex.h" |
30 | #include "tensorflow/core/platform/types.h" |
31 | #include "tensorflow/core/protobuf/master.pb.h" |
32 | #include "tensorflow/core/util/util.h" |
33 | |
34 | namespace tensorflow { |
35 | |
36 | class Master { |
37 | public: |
38 | explicit Master(MasterEnv* env, double session_gc_seconds); |
39 | virtual ~Master(); |
40 | |
41 | // Convenient typedef for a closure passing a Status. |
42 | typedef std::function<void(const Status&)> MyClosure; |
43 | |
44 | void CreateSession(const CreateSessionRequest* req, |
45 | CreateSessionResponse* resp, MyClosure done); |
46 | |
47 | void ExtendSession(const ExtendSessionRequest* req, |
48 | ExtendSessionResponse* resp, MyClosure done); |
49 | |
50 | void PartialRunSetup(const PartialRunSetupRequest* req, |
51 | PartialRunSetupResponse* resp, MyClosure done); |
52 | |
53 | void RunStep(CallOptions* opts, const RunStepRequestWrapper* req, |
54 | MutableRunStepResponseWrapper* resp, MyClosure done); |
55 | |
56 | void CloseSession(const CloseSessionRequest* req, CloseSessionResponse* resp, |
57 | MyClosure done); |
58 | |
59 | void ListDevices(const ListDevicesRequest* req, ListDevicesResponse* resp, |
60 | MyClosure done); |
61 | |
62 | // See tensorflow::Reset() and the comment on ResetRequest. |
63 | void Reset(const ResetRequest* req, ResetResponse* resp, MyClosure done); |
64 | |
65 | void MakeCallable(const MakeCallableRequest* req, MakeCallableResponse* resp, |
66 | MyClosure done); |
67 | void RunCallable(CallOptions* opts, const RunCallableRequest* req, |
68 | RunCallableResponse* resp, MyClosure done); |
69 | void ReleaseCallable(const ReleaseCallableRequest* req, |
70 | ReleaseCallableResponse* resp, MyClosure done); |
71 | |
72 | private: |
73 | typedef Master ME; |
74 | |
75 | // Not owned. |
76 | MasterEnv* env_ = nullptr; |
77 | |
78 | // Owned. |
79 | mutex mu_; |
80 | |
81 | // shutdown_ is set to true by the dtor. |
82 | condition_variable shutdown_cv_; |
83 | bool shutdown_ TF_GUARDED_BY(mu_) = false; |
84 | Thread* gc_thread_; |
85 | |
86 | // Maps session handles to sessions. |
87 | std::unordered_map<string, MasterSession*> sessions_ TF_GUARDED_BY(mu_); |
88 | |
89 | // Moving average of step times. |
90 | MovingAverage last_1000_steps_ TF_GUARDED_BY(mu_); |
91 | |
92 | // Cumulative number of steps executed. |
93 | int64_t step_count_ TF_GUARDED_BY(mu_); |
94 | |
95 | // If a session is not active for this many seconds, it will be |
96 | // closed automatically. |
97 | const double session_gc_seconds_; |
98 | |
99 | // Used to track ids for incoming requests so we can detect duplicates. |
100 | RecentRequestIds recent_request_ids_; |
101 | |
102 | // Call CleanupAll on all workers. |
103 | void CleanupWorkers(const ResetRequest& reset); |
104 | |
105 | // Cleanup unused session. |
106 | void GC(); |
107 | |
108 | // Find master session by session handle, and increments the reference count |
109 | // on the returned MasterSession if not null. |
110 | MasterSession* FindMasterSession(const string& handle); |
111 | |
112 | TF_DISALLOW_COPY_AND_ASSIGN(Master); |
113 | }; |
114 | |
115 | } // namespace tensorflow |
116 | |
117 | #endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_MASTER_H_ |
118 | |