allocator.h source code [tensorflow/tensorflow/tsl/framework/allocator.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_TSL_FRAMEWORK_ALLOCATOR_H_
17	#define TENSORFLOW_TSL_FRAMEWORK_ALLOCATOR_H_
18
19	#include <stdlib.h>
20
21	#include <functional>
22	#include <limits>
23
24	#include "absl/strings/string_view.h"
25	#include "absl/types/optional.h"
26	#include "tensorflow/tsl/framework/numeric_types.h"
27	#include "tensorflow/tsl/framework/type_traits.h"
28	#include "tensorflow/tsl/platform/logging.h"
29	#include "tensorflow/tsl/platform/macros.h"
30	#include "tensorflow/tsl/platform/numa.h"
31	#include "tensorflow/tsl/platform/types.h"
32
33	namespace tsl {
34
35	// Attributes for a single allocation call. Different calls to the same
36	// allocator could potentially have different allocation attributes.
37	struct AllocationAttributes {
38	AllocationAttributes() = default;
39
40	AllocationAttributes(bool retry_on_failure, bool allocation_will_be_logged,
41	std::function<uint64()>* freed_by_func)
42	: retry_on_failure(retry_on_failure),
43	allocation_will_be_logged(allocation_will_be_logged),
44	freed_by_func(freed_by_func) {}
45
46	// If the first attempt to allocate the memory fails, the allocation should
47	// wait and retry (with a timeout).
48	//
49	// This is usually set to true, but we may set it to false in cases where a
50	// failure has only performance impact (e.g. optional scratch space
51	// allocation).
52	bool retry_on_failure = true;
53	// If a Tensor is allocated without the following set to true, then
54	// it is logged as an unknown allocation. During execution Tensors
55	// should be allocated through the OpKernelContext which records
56	// which Op is performing the allocation, and sets this flag to
57	// true.
58	bool allocation_will_be_logged = false;
59	// EXPERIMENTAL: If provided, then evaluates to a timing count such that only
60	// a memory chunk whose freed_at_count is at this value or earlier may be
61	// returned.
62	std::function<uint64()>* freed_by_func = nullptr; // Not owned.
63
64	TF_DISALLOW_COPY_AND_ASSIGN(AllocationAttributes);
65	};
66
67	// Runtime statistics collected by an allocator. Exactly the same as
68	// stream_executor::AllocatorStats, but independently defined to preserve the
69	// mutual independence of StreamExecutor and TensorFlow.
70	struct AllocatorStats {
71	int64_t num_allocs; // Number of allocations.
72	int64_t bytes_in_use; // Number of bytes in use.
73	int64_t peak_bytes_in_use; // The peak bytes in use.
74	int64_t largest_alloc_size; // The largest single allocation seen.
75
76	// The upper limit of bytes of user allocatable device memory, if such a limit
77	// is known.
78	absl::optional<int64_t> bytes_limit;
79
80	// Stats for reserved memory usage.
81	int64_t bytes_reserved; // Number of bytes reserved.
82	int64_t peak_bytes_reserved; // The peak number of bytes reserved.
83	// The upper limit on the number bytes of reservable memory,
84	// if such a limit is known.
85	absl::optional<int64_t> bytes_reservable_limit;
86
87	int64_t largest_free_block_bytes; // Largest free block's size in heap.
88
89	AllocatorStats()
90	: num_allocs(`0`),
91	bytes_in_use(`0`),
92	peak_bytes_in_use(`0`),
93	largest_alloc_size(`0`),
94	bytes_reserved(`0`),
95	peak_bytes_reserved(`0`),
96	largest_free_block_bytes(`0`) {}
97
98	std::string DebugString() const;
99	};
100
101	// The type of the allocated memory.
102	enum class AllocatorMemoryType {
103	kUnknown = `0`, // Memory type unknown.
104	kDevice = `1`, // Memory on device.
105	kHostPageable = `2`, // Memory on host and it is pagable.
106	kHostPinned = `3`, // Memory on host and it is pinned.
107	};
108
109	// Allocator is an abstract interface for allocating and deallocating
110	// device memory.
111	class Allocator {
112	public:
113	// Align to 64 byte boundary.
114	static constexpr size_t kAllocatorAlignment = `64`;
115
116	virtual ~Allocator();
117
118	// Return a string identifying this allocator
119	virtual std::string Name() = `0`;
120
121	// Return an uninitialized block of memory that is "num_bytes" bytes
122	// in size. The returned pointer is guaranteed to be aligned to a
123	// multiple of "alignment" bytes.
124	// REQUIRES: "alignment" is a power of 2.
125	virtual void* AllocateRaw(size_t alignment, size_t num_bytes) = `0`;
126
127	// Return an uninitialized block of memory that is "num_bytes" bytes
128	// in size with specified allocation attributes. The returned pointer is
129	// guaranteed to be aligned to a multiple of "alignment" bytes.
130	// REQUIRES: "alignment" is a power of 2.
131	virtual void* AllocateRaw(size_t alignment, size_t num_bytes,
132	const AllocationAttributes& allocation_attr) {
133	// The default behavior is to use the implementation without any allocation
134	// attributes.
135	return AllocateRaw(alignment, num_bytes);
136	}
137
138	// Deallocate a block of memory pointer to by "ptr"
139	// REQUIRES: "ptr" was previously returned by a call to AllocateRaw
140	virtual void DeallocateRaw(void* ptr) = `0`;
141
142	// Returns true if this allocator tracks the sizes of allocations.
143	// RequestedSize and AllocatedSize must be overridden if
144	// TracksAllocationSizes is overridden to return true.
145	virtual bool TracksAllocationSizes() const { return false; }
146
147	// Returns true if this allocator allocates an opaque handle rather than the
148	// requested number of bytes.
149	//
150	// This method returns false for most allocators, but may be used by
151	// special-case allocators that track tensor usage. If this method returns
152	// true, AllocateRaw() should be invoked for all values of `num_bytes`,
153	// including 0.
154	//
155	// NOTE: It is the caller's responsibility to track whether an allocated
156	// object is a buffer or an opaque handle. In particular, when this method
157	// returns `true`, users of this allocator must not run any constructors or
158	// destructors for complex objects, since there is no backing store for the
159	// tensor in which to place their outputs.
160	virtual bool AllocatesOpaqueHandle() const { return false; }
161
162	// Returns the user-requested size of the data allocated at
163	// 'ptr'. Note that the actual buffer allocated might be larger
164	// than requested, but this function returns the size requested by
165	// the user.
166	//
167	// REQUIRES: TracksAllocationSizes() is true.
168	//
169	// REQUIRES: 'ptr!=nullptr' and points to a buffer previously
170	// allocated by this allocator.
171	virtual size_t RequestedSize(const void* ptr) const {
172	CHECK(false) << "allocator doesn't track sizes";
173	return size_t(`0`);
174	}
175
176	// Returns the allocated size of the buffer at 'ptr' if known,
177	// otherwise returns RequestedSize(ptr). AllocatedSize(ptr) is
178	// guaranteed to be >= RequestedSize(ptr).
179	//
180	// REQUIRES: TracksAllocationSizes() is true.
181	//
182	// REQUIRES: 'ptr!=nullptr' and points to a buffer previously
183	// allocated by this allocator.
184	virtual size_t AllocatedSize(const void* ptr) const {
185	return RequestedSize(ptr);
186	}
187
188	// Returns either 0 or an identifier assigned to the buffer at 'ptr'
189	// when the buffer was returned by AllocateRaw. If non-zero, the
190	// identifier differs from every other ID assigned by this
191	// allocator.
192	//
193	// REQUIRES: TracksAllocationSizes() is true.
194	//
195	// REQUIRES: 'ptr!=nullptr' and points to a buffer previously
196	// allocated by this allocator.
197	virtual int64_t AllocationId(const void* ptr) const { return `0`; }
198
199	// Returns the allocated size of the buffer at 'ptr' if known,
200	// otherwise returns 0. This method can be called when
201	// TracksAllocationSizes() is false, but can be extremely slow.
202	//
203	// REQUIRES: 'ptr!=nullptr' and points to a buffer previously
204	// allocated by this allocator.
205	virtual size_t AllocatedSizeSlow(const void* ptr) const {
206	if (TracksAllocationSizes()) {
207	return AllocatedSize(ptr);
208	}
209	return `0`;
210	}
211
212	// Fills in 'stats' with statistics collected by this allocator.
213	virtual absl::optional<AllocatorStats> GetStats() { return absl::nullopt; }
214
215	// If implemented, clears the internal stats except for the `in_use` fields
216	// and sets the `peak_bytes_in_use` to be equal to the `bytes_in_use`. Returns
217	// true if implemented.
218	//
219	// REQUIRES: GetStats is overridden.
220	virtual bool ClearStats() TF_MUST_USE_RESULT { return false; }
221
222	virtual void SetSafeFrontier(uint64 count) {}
223
224	// For allocator that are stream aware, allow to specify the compute
225	// stream this allocator is used for. This can also trigger memory
226	// preallocation.
227	virtual void SetStreamAndPreallocateMemory(void* stream) {}
228
229	// Returns the type of the memory allocated by this allocator.
230	virtual AllocatorMemoryType GetMemoryType() const {
231	return AllocatorMemoryType::kUnknown;
232	}
233	};
234
235	// An implementation of Allocator that delegates all calls to another Allocator.
236	//
237	// Useful to clients who want to override part of the functionality of another
238	// allocator.
239	class AllocatorWrapper : public Allocator {
240	public:
241	explicit AllocatorWrapper(Allocator* wrapped) : wrapped_(wrapped) {}
242
243	~AllocatorWrapper() override {}
244
245	// Returns the wrapped allocator to which all calls are delegated.
246	Allocator* wrapped() const { return wrapped_; }
247
248	std::string Name() override { return wrapped_->Name(); }
249
250	void* AllocateRaw(size_t alignment, size_t num_bytes) override {
251	return wrapped_->AllocateRaw(alignment, num_bytes);
252	}
253
254	void* AllocateRaw(size_t alignment, size_t num_bytes,
255	const AllocationAttributes& allocation_attr) override {
256	return wrapped_->AllocateRaw(alignment, num_bytes, allocation_attr);
257	}
258
259	void DeallocateRaw(void* ptr) override { wrapped_->DeallocateRaw(ptr); }
260
261	bool TracksAllocationSizes() const override {
262	return wrapped_->TracksAllocationSizes();
263	}
264
265	bool AllocatesOpaqueHandle() const override {
266	return wrapped_->AllocatesOpaqueHandle();
267	}
268
269	size_t RequestedSize(const void* ptr) const override {
270	return wrapped_->RequestedSize(ptr);
271	}
272
273	size_t AllocatedSize(const void* ptr) const override {
274	return wrapped_->AllocatedSize(ptr);
275	}
276
277	int64_t AllocationId(const void* ptr) const override {
278	return wrapped_->AllocationId(ptr);
279	}
280
281	size_t AllocatedSizeSlow(const void* ptr) const override {
282	return wrapped_->AllocatedSizeSlow(ptr);
283	}
284
285	AllocatorMemoryType GetMemoryType() const override {
286	return wrapped_->GetMemoryType();
287	}
288
289	private:
290	Allocator* const wrapped_;
291	};
292
293	// A tensorflow Op may need access to different kinds of memory that
294	// are not simply a function of the device to which the Op has been
295	// assigned. For example, an Op executing on a GPU may still need
296	// to allocate CPU RAM for some purpose. Internal to the tensorflow
297	// runtime we may choose to allocate CPU ram from special regions
298	// that have been prepared for higher performance in some use
299	// contexts, e.g. doing DMA with particular devices. For these
300	// reasons, the Device interface does not expose just one memory
301	// Allocator, but instead provides an accessor that takes a
302	// specification of the desired memory attributes in order to select
303	// an Allocator.
304	//
305	// Example use:
306	// // Allocator for ordinary device memory:
307	// Allocator a = allocator(AllocatorAttributes());*
308	// ...
309	// // Allocator for CPU RAM, regardless of where Op is executing:
310	// AllocatorAttributes attr;
311	// attr.set_on_host(true);
312	// Allocator a = allocator(attr);*
313	struct AllocatorAttributes {
314	void set_on_host(bool v) { value \|= (static_cast<int>(v)); }
315	bool on_host() const { return value & `0x1`; }
316	void set_nic_compatible(bool v) { value \|= (static_cast<int>(v) << `1`); }
317	bool nic_compatible() const { return value & (`0x1` << `1`); }
318	void set_gpu_compatible(bool v) { value \|= (static_cast<int>(v) << `2`); }
319	bool gpu_compatible() const { return value & (`0x1` << `2`); }
320	void Merge(AllocatorAttributes other) {
321	value \|= other.value;
322	if (scope_id != other.scope_id) {
323	CHECK(scope_id == `0` \|\| other.scope_id == `0`)
324	<< "At least one scope_id should be zero to merge "
325	"AllocatorAttributes but found this.scope_id="
326	<< scope_id << " and other.scope_id=" << other.scope_id;
327	scope_id = scope_id == `0` ? other.scope_id : scope_id;
328	}
329	}
330	// Returns true if the fields set in this is a subset of or equal to*
331	// those set in other.
332	bool IsEqualOrLessRestrictiveThan(const AllocatorAttributes& other) const {
333	return (value \| other.value) == other.value;
334	}
335
336	// NOTE: The upper 8 bits of the value are reserved for
337	// device-specific uses. Implementors of a device can interpret these
338	// upper 8 bits in device-specific ways, and ops implemented for those
339	// devices are responsible for setting those 8 bits appropriately.
340	uint32 value = `0`;
341	// EXPERIMENTAL: If this is greater than zero, then allocation is delegated to
342	// a named special-purpose allocator on the same device.
343	int32 scope_id = `0`;
344
345	// Returns a human readable representation of this.
346	std::string DebugString() const;
347	};
348
349	// Returns a trivial implementation of Allocator, which is a process singleton.
350	// Access through this function is only intended for use by restricted parts
351	// of the infrastructure.
352	Allocator* cpu_allocator_base();
353
354	// If available, calls ProcessState::GetCPUAllocator(numa_node).
355	// If not, falls back to cpu_allocator_base().
356	// Intended for use in contexts where ProcessState is not visible at
357	// compile time. Where ProcessState is visible, it's preferable to
358	// call it directly.
359	Allocator* cpu_allocator(int numa_node = port::kNUMANoAffinity);
360
361	// Enables AllocatorStats in the default CPU allocator implementation. By
362	// default, it's disabled.
363	void EnableCPUAllocatorStats();
364	// Disables AllocatorStats in the default CPU allocator implementation. By
365	// default, it's disabled.
366	void DisableCPUAllocatorStats();
367	bool CPUAllocatorStatsEnabled();
368
369	// Enables full statistics collection in the default CPU allocator
370	// implementation. By default, it's disabled.
371	void EnableCPUAllocatorFullStats();
372	bool CPUAllocatorFullStatsEnabled();
373
374	// An object that does the underlying suballoc/free of memory for a higher-level
375	// allocator. The expectation is that the higher-level allocator is doing some
376	// kind of cache or pool management so that it will call SubAllocator::Alloc and
377	// Free relatively infrequently, compared to the number of times its own
378	// AllocateRaw and Free methods are called.
379	class SubAllocator {
380	public:
381	// Visitor gets called with a pointer to a memory area and its
382	// size in bytes. The index value will be numa_node for a CPU
383	// allocator and GPU id for a GPU allocator.
384	typedef std::function<void(void, int* index, size_t)> Visitor;
385
386	SubAllocator(const std::vector<Visitor>& alloc_visitors,
387	const std::vector<Visitor>& free_visitors);
388
389	virtual ~SubAllocator() {}
390	// Allocates at least num_bytes. Returns actual number of bytes allocated in
391	// bytes_received. The caller can safely use the full bytes_received sized
392	// buffer following the returend pointer.
393	virtual void* Alloc(size_t alignment, size_t num_bytes,
394	size_t* bytes_received) = `0`;
395	virtual void Free(void* ptr, size_t num_bytes) = `0`;
396
397	// Returns true if the BFC allocator can safely coalesce adjacent regions
398	// returned by this allocator.
399	virtual bool SupportsCoalescing() const = `0`;
400
401	// Returns the type of the memory allocated by this SubAllocator.
402	virtual AllocatorMemoryType GetMemoryType() const {
403	return AllocatorMemoryType::kUnknown;
404	}
405
406	protected:
407	// Implementation of Alloc() method must call this on newly allocated
408	// value.
409	void VisitAlloc(void* ptr, int index, size_t num_bytes);
410
411	// Implementation of Free() method must call this on value to be
412	// freed immediately before deallocation.
413	void VisitFree(void* ptr, int index, size_t num_bytes);
414
415	const std::vector<Visitor> alloc_visitors_;
416	const std::vector<Visitor> free_visitors_;
417	};
418
419	} // namespace tsl
420
421	#endif // TENSORFLOW_TSL_FRAMEWORK_ALLOCATOR_H_
422

Browse the source code of tensorflow/tensorflow/tsl/framework/allocator.h