device_base.h source code [tensorflow/tensorflow/core/framework/device_base.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
17	#define TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
18
19	#include <memory>
20	#include <string>
21	#include <vector>
22
23	#include "absl/base/macros.h"
24	#include "absl/strings/string_view.h"
25	#include "tensorflow/core/framework/device_attributes.pb.h"
26	#include "tensorflow/core/framework/tensor.h"
27	#include "tensorflow/core/lib/core/errors.h"
28	#include "tensorflow/core/lib/core/refcount.h"
29	#include "tensorflow/core/lib/core/status.h"
30	#include "tensorflow/core/lib/core/stringpiece.h"
31	#include "tensorflow/core/platform/logging.h"
32	#include "tensorflow/core/platform/threadpool.h"
33	#include "tensorflow/core/util/device_name_utils.h"
34
35	namespace Eigen {
36	struct ThreadPoolDevice;
37	} // end namespace Eigen
38
39	namespace stream_executor {
40	class Stream;
41	} // namespace stream_executor
42
43	namespace tsl {
44	class Env;
45	namespace thread {
46	class ThreadPool;
47	} // namespace thread
48	} // namespace tsl
49	namespace tensorflow {
50
51	class Device;
52	class DeviceAttributes;
53	class EventMgr;
54	class OpKernelContext;
55	class ResourceMgr;
56	class ScopedAllocatorMgr;
57	class TensorProto;
58
59	// A wrapper for an Eigen Gpu Device that includes per-op state. The
60	// class is defined even for non-GPU devices since the
61	// OpKernelContext::Params structure wants to fill it in.
62	class PerOpGpuDevice {
63	public:
64	virtual ~PerOpGpuDevice() {}
65	virtual const Eigen::GpuDevice& device() const = `0`;
66	};
67
68	// A class that devices can subclass to pass around
69	// Device-specific context to OpKernels.
70	class DeviceContext : public core::RefCounted {
71	public:
72	~DeviceContext() override {}
73	virtual stream_executor::Stream* stream() const { return nullptr; }
74	virtual void MaintainLifetimeOnStream(const Tensor* t,
75	stream_executor::Stream* stream) const {
76	}
77
78	// "cpu_tensor" is a tensor on a CPU. Copies "cpu_tensor" into
79	// "device_tensor" which is on a non-CPU device "device". "device_tensor"
80	// must be allocated to be of the same size as "cpu_tensor".
81	virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
82	Tensor* device_tensor, StatusCallback done,
83	bool sync_dst_compute = true) const {
84	done (errors::Internal("Unrecognized device type in CPU-to-device Copy"));
85	}
86
87	// Same as CopyCPUTensorToDevice, but in a synchronous way.
88	Status CopyCPUTensorToDeviceSync(const Tensor* cpu_tensor, Device* device,
89	Tensor* device_tensor) const;
90
91	// Copies a tensor in this device.
92	virtual void CopyTensorInSameDevice(const Tensor* input_tensor,
93	Device* device, Tensor* output_tensor,
94	StatusCallback done) const {
95	done (errors::Unimplemented("Copy in same device not implemented."));
96	}
97
98	// "device_tensor" is a tensor on a non-CPU device. Copies
99	// device_tensor into "cpu_tensor". "cpu_tensor" must be allocated
100	// to be of the same size as "device_tensor".
101	virtual void CopyDeviceTensorToCPU(const Tensor* device_tensor,
102	StringPiece tensor_name, Device* device,
103	Tensor* cpu_tensor, StatusCallback done) {
104	done (errors::Internal("Unrecognized device type in device-to-CPU Copy"));
105	}
106
107	// Same as `CopyDeviceTensorToCPU`, but blocks until the copy is done.
108	Status CopyDeviceTensorToCPUSync(const Tensor* device_tensor,
109	StringPiece tensor_name, Device* device,
110	Tensor* cpu_tensor);
111
112	// If possible, wait for all events on stream to complete then execute func.*
113	// A non-OK Status is returned otherwise. The stream argument should be the
114	// one provided by AcceleratorDeviceInfo. This function is not applicable to
115	// devices that don't provide such a value.
116	virtual Status ThenExecute(Device* device, stream_executor::Stream* stream,
117	std::function<void()> func) {
118	return errors::Internal("ThenExecute not supported by device");
119	}
120
121	// check if device is a pluggable device
122	virtual bool IsPluggableDevice() { return false; }
123
124	// Returns the pinned host memory allocator for the device.
125	virtual Allocator* host_memory_allocator() const { return nullptr; }
126	};
127
128	class DeviceBase {
129	public:
130	explicit DeviceBase(tsl::Env* env) : env_(env) {}
131	virtual ~DeviceBase();
132
133	tsl::Env* env() const { return env_; }
134
135	struct CpuWorkerThreads {
136	int num_threads = `0`;
137	tsl::thread::ThreadPool* workers = nullptr;
138	};
139
140	// Does not take ownership.
141	void set_tensorflow_cpu_worker_threads(CpuWorkerThreads* t) {
142	cpu_worker_threads_ = t;
143	}
144
145	virtual const CpuWorkerThreads* tensorflow_cpu_worker_threads() const {
146	CHECK(cpu_worker_threads_ != nullptr);
147	return cpu_worker_threads_;
148	}
149
150	// "stream" is used in special circumstances (such as the
151	// constructors of Ops) where there is no available OpKernelContext.
152	// "default_context" is used by OpKernelContext whenever a device does not
153	// supply a DeviceContext for an op in TryGetDeviceContext() (e.g. when only
154	// using a single stream.)
155	// "event_mgr" is used to delay deallocation of temporary GPU buffers.
156	// TODO(pbar) Work out how to move this out of DeviceBase.
157	struct AcceleratorDeviceInfo {
158	// Make sure all the defaults are NULL, so we can spot missing assignments.
159	stream_executor::Stream* stream = nullptr;
160	DeviceContext* default_context = nullptr;
161	EventMgr* event_mgr = nullptr;
162	int gpu_id = -`1`;
163	};
164
165	// Does not take ownership.
166	void set_tensorflow_accelerator_device_info(
167	AcceleratorDeviceInfo* device_info) {
168	accelerator_device_info_ = device_info;
169	}
170
171	virtual const AcceleratorDeviceInfo* tensorflow_accelerator_device_info()
172	const {
173	return accelerator_device_info_;
174	}
175
176	// The preferred thread pool for this device. If it is nullptr, the system
177	// automatically assigns a thread pool for execution.
178	virtual tsl::thread::ThreadPool* tensorflow_device_thread_pool() {
179	return device_thread_pool_;
180	}
181
182	// Does not take ownership.
183	void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d);
184
185	// Return the Allocator implementation to use based on the allocator
186	// attributes requested. See allocator.h for more details.
187	virtual Allocator* GetAllocator(AllocatorAttributes /attr/) {
188	LOG(FATAL) << "GetAllocator() is not implemented.";
189	return nullptr;
190	}
191
192	// This method is provided for backwards compatibility, and will be removed
193	// in a future release.
194	ABSL_DEPRECATED("Use `this->GetAllocator()` or `this->GetScopedAllocator()`.")
195	Allocator* GetStepAllocator(AllocatorAttributes attr, ResourceMgr*) {
196	return GetAllocator(attr);
197	}
198
199	// Return an Allocator prepared for use in particular places by graph
200	// optimization
201	virtual Allocator* GetScopedAllocator(AllocatorAttributes attr,
202	int64_t step_id) {
203	LOG(FATAL) << "Device does not implement GetScopedAllocator()";
204	return nullptr;
205	}
206
207	virtual ScopedAllocatorMgr* GetScopedAllocatorMgr() const { return nullptr; }
208
209	virtual bool has_eigen_cpu_device() const {
210	return !eigen_cpu_devices_.empty();
211	}
212
213	virtual const Eigen::ThreadPoolDevice* eigen_cpu_device();
214
215	// Caller owns the return value. The OpKernelContext calls this even
216	// for devices that do not implement an eigen_gpu_device. Overridden
217	// by GPU devices to return a derived type.
218	virtual PerOpGpuDevice* MakeGpuDevice() { return nullptr; }
219
220	virtual DeviceBase* UnderlyingDevice() { return this; }
221	virtual const DeviceBase* UnderlyingDevice() const { return this; }
222
223	// This is overridden by GPU devices to reinitialize the derived
224	// type returned by MakeGpuDevice.
225	virtual Status ReinitializeGpuDevice(OpKernelContext* /context/,
226	PerOpGpuDevice* /device/,
227	DeviceContext* /dc/,
228	Allocator* /allocator/) {
229	return OkStatus();
230	}
231
232	// Unimplemented by default
233	virtual const DeviceAttributes& attributes() const;
234	virtual int NumaNode() const { return attributes().locality().numa_node(); }
235	virtual const std::string& name() const;
236	virtual const DeviceNameUtils::ParsedName& parsed_name() const;
237
238	// Updates `attributes()`, indicating the XLA global ID associated with this
239	// device. This ID is unique across clients in a multi-client setup. For TPUs
240	// this does not happen until the TPU system has been initialized.
241	//
242	// Implemented in Device.
243	virtual void set_xla_global_id(int64_t id) {}
244
245	// Materializes the given TensorProto into 'tensor' stored in Device
246	// memory. Most devices will want to override this.
247	//
248	// TODO(vrv): We should be able to put this function into
249	// OpKernelContext and handle the copies from device memory via send
250	// and receive nodes, instead of requiring that each device handle
251	// the copies here as well as in copy ops.
252	virtual Status MakeTensorFromProto(const TensorProto& tensor_proto,
253	const AllocatorAttributes alloc_attrs,
254	Tensor* tensor) {
255	return errors::Internal("Device does not implement MakeTensorFromProto()");
256	}
257
258	// Some devices (i.e. GPUs) may free device memory prior to its actual use
259	// being completed on the assumption that subsequent allocations can only be
260	// used serially with respect to pending uses. If this function returns a
261	// non-zero value it is the value of a device-specific counter such that any
262	// device memory tagged with an earlier freed-at count is really unencumbered
263	// by pending uses. For this to be useful the device memory allocator must
264	// be tagging deallocated memory chunks using the same counter.
265	virtual uint64 SafeAllocFrontier(uint64 old_value) { return `0`; }
266
267	// Copies `input_tensor` to `output_tensor`, where both tensors are on this
268	// device. This function assumes that `output_tensor` has already been
269	// allocated with a buffer that is large enough to hold `input_tensor`'s data.
270	// Calls `done` from a device-specific thread after copy is finished, which
271	// may be the same as calling thread.
272	//
273	// NOTE(ayushd): This function is for TensorFlow internal use only. Deep copy
274	// is discouraged and should not be used in OpKernels.
275	virtual void CopyTensorInSameDevice(const Tensor* input_tensor,
276	Tensor* output_tensor,
277	const DeviceContext* device_context,
278	StatusCallback done) {
279	done (errors::Internal("Device ", name(), " does not implement ",
280	"CopyTensorInSameDevice"));
281	}
282
283	protected:
284	// Does not take ownership.
285	void set_tensorflow_device_thread_pool(tsl::thread::ThreadPool* thread_pool) {
286	device_thread_pool_ = thread_pool;
287	}
288
289	private:
290	tsl::Env* const env_;
291	CpuWorkerThreads* cpu_worker_threads_ = nullptr;
292	// Set by GPUs as well as by TPU devices.
293	AcceleratorDeviceInfo* accelerator_device_info_ = nullptr;
294	tsl::thread::ThreadPool* device_thread_pool_ = nullptr;
295	std::vector<Eigen::ThreadPoolDevice*> eigen_cpu_devices_;
296	};
297
298	// Methods to create and check for Symbolic execution devices.
299	// Such devices are mostly used for TF-XLA bridge. TF should not treat these as
300	// normal devices.
301	void AddSymbolicExecutionDevice(absl::string_view device_name);
302	bool IsSymbolicExecutionDevice(absl::string_view device_name);
303
304	} // namespace tensorflow
305
306	#endif // TENSORFLOW_CORE_FRAMEWORK_DEVICE_BASE_H_
307

Browse the source code of tensorflow/tensorflow/core/framework/device_base.h