worker_env.h source code [tensorflow/tensorflow/core/distributed_runtime/worker_env.h]

1	/ Copyright 2016 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_ENV_H_
17	#define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_ENV_H_
18
19	#include <vector>
20
21	#include "tensorflow/core/platform/types.h"
22
23	namespace tsl {
24	class Env;
25	namespace thread {
26	class ThreadPool;
27	} // namespace thread
28	} // namespace tsl
29	namespace tensorflow {
30	using Env = tsl::Env;
31
32	namespace thread {
33	using tsl::thread::ThreadPool;
34	} // namespace thread
35
36	class CollectiveExecutorMgrInterface;
37	class Device;
38	class DeviceMgr;
39	class RendezvousMgrInterface;
40	class SessionMgr;
41
42	// The worker environment class, which holds a bag of pointers to
43	// per-worker singletons.
44	//
45	// WorkerEnv does not own its member pointers.
46	struct WorkerEnv {
47	Env* env = nullptr;
48
49	// session_mgr encapsulates state for each session.
50	SessionMgr* session_mgr = nullptr;
51
52	// The local devices of this worker. Devices are owned by the device_mgr.
53	//
54	// REQUIRES: !local_devices.empty().
55	std::vector<Device*> local_devices;
56
57	// In large scaled distributed training, many singleton components (e.g.
58	// Rendezvous) can becomes the bottleneck of the system. This field allows
59	// us to shard the single components. This number will scale up with number
60	// of tasks in this cluster. It is always greater than 1.
61	int experimental_num_shards = `1`;
62
63	// device_mgr manages local devices (cpu and gpu). The WorkerService
64	// is the network interface for managed devices.
65	//
66	// Note: Please use the device_mgr associated with your session if appropriate
67	// instead of this one. Using this device_mgr does not support ClusterSpec
68	// propagated sessions.
69	DeviceMgr* device_mgr = nullptr;
70
71	// A set of rendezvous keyed by step ids.
72	RendezvousMgrInterface* rendezvous_mgr = nullptr;
73
74	// Generates per-step CollectiveExecutors and has access to utilities
75	// supporting collective operations.
76	std::unique_ptr<CollectiveExecutorMgrInterface> collective_executor_mgr;
77
78	// A pool of threads for scheduling compute work.
79	thread::ThreadPool* compute_pool = nullptr;
80	};
81
82	} // end namespace tensorflow
83
84	#endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_ENV_H_
85

Browse the source code of tensorflow/tensorflow/core/distributed_runtime/worker_env.h