pool_allocator.cc source code [tensorflow/tensorflow/core/common_runtime/pool_allocator.cc]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "tensorflow/core/common_runtime/pool_allocator.h"
17
18	#include <errno.h>
19	#ifndef _MSC_VER
20	#include <strings.h>
21	#include <sys/mman.h> // for munmap
22	#endif
23
24	#include <map>
25	#include <utility>
26
27	#include "tensorflow/core/lib/strings/numbers.h"
28	#include "tensorflow/core/platform/logging.h"
29	#include "tensorflow/core/platform/mem.h"
30	#include "tensorflow/core/platform/mutex.h"
31	#include "tensorflow/core/platform/numa.h"
32	#include "tensorflow/core/platform/types.h"
33
34	namespace tensorflow {
35
36	PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
37	SubAllocator* allocator,
38	RoundUpInterface* size_rounder, string name)
39	: name_(std::move(name)),
40	has_size_limit_(pool_size_limit > `0`),
41	auto_resize_(auto_resize),
42	pool_size_limit_(pool_size_limit),
43	allocator_(allocator),
44	size_rounder_(size_rounder) {
45	if (auto_resize) {
46	CHECK_LT(size_t{`0`}, pool_size_limit)
47	<< "size limit must be > 0 if auto_resize is true.";
48	}
49	}
50
51	PoolAllocator::~PoolAllocator() { Clear(); }
52
53	namespace {
54	// Pools contain Chunks allocated from the underlying Allocator.
55	// Chunk alignment is always on kPoolAlignment boundaries. Each Chunk
56	// begins with a descriptor (ChunkPrefix) that gives its size and a
57	// pointer to itself. The pointer returned to the user is just past
58	// the ChunkPrefix. If the user asks for a larger alignment, we will
59	// increase the size of the chunk, then adjust the returned user
60	// pointer and also re-write the ChunkPrefix.chunk_ptr value
61	// immediately before it. This way the Chunk address and size can be
62	// recovered from the returned user pointer, regardless of alignment.
63	// Note that this dereferencing of the pointers means that we cannot
64	// handle GPU memory, only CPU memory.
65	struct ChunkPrefix {
66	size_t num_bytes;
67	void* chunk_ptr;
68	};
69	// kPoolAlignment cannot be less than the size of ChunkPrefix.
70	static const int kPoolAlignment = sizeof(ChunkPrefix);
71
72	void* PrepareChunk(void* chunk, size_t alignment, size_t num_bytes) {
73	ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(chunk);
74	cp->num_bytes = num_bytes;
75	cp->chunk_ptr = chunk;
76	void* user_ptr = reinterpret_cast<void*>(cp + `1`);
77	if (alignment > kPoolAlignment) {
78	// Move user_ptr forward to the first satisfying offset, and write
79	// chunk_ptr just before it.
80	size_t aligned_ptr = reinterpret_cast<size_t>(user_ptr) + alignment;
81	user_ptr = reinterpret_cast<void*>(aligned_ptr & ~(alignment - `1`));
82	(reinterpret_cast<ChunkPrefix*>(user_ptr) - `1`)->chunk_ptr = chunk;
83	}
84	// Safety check that user_ptr is always past the ChunkPrefix.
85	CHECK_GE(user_ptr, reinterpret_cast<ChunkPrefix*>(chunk) + `1`);
86	return user_ptr;
87	}
88
89	ChunkPrefix* FindPrefix(void* user_ptr) {
90	ChunkPrefix* cp = reinterpret_cast<ChunkPrefix*>(user_ptr) - `1`;
91	return reinterpret_cast<ChunkPrefix*>(cp->chunk_ptr);
92	}
93	} // namespace
94
95	void* PoolAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
96	if (num_bytes == `0`) return nullptr;
97
98	// If alignment is larger than kPoolAlignment, increase num_bytes so that we
99	// are guaranteed to be able to return an aligned ptr by advancing user_ptr
100	// without overrunning the end of the chunk.
101	if (alignment > kPoolAlignment) {
102	num_bytes += alignment;
103	}
104	num_bytes += sizeof(ChunkPrefix);
105	num_bytes = size_rounder_->RoundUp(num_bytes);
106	PtrRecord* pr = nullptr;
107	if (has_size_limit_) {
108	{
109	mutex_lock lock(mutex_);
110	auto iter = pool_.find(num_bytes);
111	if (iter == pool_.end()) {
112	allocated_count_++;
113	// Deliberately fall out of lock scope before
114	// calling the allocator. No further modification
115	// to the pool will be performed.
116	} else {
117	get_from_pool_count_++;
118	pr = iter ->second;
119	RemoveFromList(pr);
120	pool_.erase(iter);
121	// Fall out of lock scope and do the result without the lock held.
122	}
123	}
124	}
125	if (pr != nullptr) {
126	void* r = pr->ptr;
127	delete pr;
128	return PrepareChunk(r, alignment, num_bytes);
129	} else {
130	size_t bytes_received;
131	void* ptr = allocator_->Alloc(kPoolAlignment, num_bytes, &bytes_received);
132	return PrepareChunk(ptr, alignment, bytes_received);
133	}
134	}
135
136	void PoolAllocator::DeallocateRaw(void* ptr) {
137	if (ptr == nullptr) return;
138	ChunkPrefix* cp = FindPrefix(ptr);
139	CHECK_LE((void)cp, (void**)ptr);
140	if (!has_size_limit_ && !auto_resize_) {
141	allocator_->Free(cp, cp->num_bytes);
142	} else {
143	mutex_lock lock(mutex_);
144	++put_count_;
145	while (pool_.size() >= pool_size_limit_) {
146	EvictOne();
147	}
148	PtrRecord* pr = new PtrRecord;
149	pr->num_bytes = cp->num_bytes;
150	pr->ptr = cp;
151	AddToList(pr);
152	pool_.insert(std::make_pair(cp->num_bytes, pr));
153	}
154	}
155
156	void PoolAllocator::Clear() {
157	if (has_size_limit_) {
158	mutex_lock lock(mutex_);
159	for (auto iter : pool_) {
160	PtrRecord* pr = iter.second;
161	allocator_->Free(pr->ptr, pr->num_bytes);
162	delete pr;
163	}
164	pool_.clear();
165	get_from_pool_count_ = `0`;
166	put_count_ = `0`;
167	allocated_count_ = `0`;
168	evicted_count_ = `0`;
169	lru_head_ = nullptr;
170	lru_tail_ = nullptr;
171	}
172	}
173
174	void PoolAllocator::RemoveFromList(PtrRecord* pr) {
175	if (pr->prev == nullptr) {
176	DCHECK_EQ(lru_head_, pr);
177	lru_head_ = nullptr;
178	} else {
179	pr->prev->next = pr->next;
180	}
181	if (pr->next == nullptr) {
182	DCHECK_EQ(lru_tail_, pr);
183	lru_tail_ = pr->prev;
184	} else {
185	pr->next->prev = pr->prev;
186	if (lru_head_ == nullptr) {
187	lru_head_ = pr->next;
188	}
189	}
190	}
191
192	void PoolAllocator::AddToList(PtrRecord* pr) {
193	pr->prev = nullptr;
194	if (lru_head_ == nullptr) {
195	CHECK(lru_tail_ == nullptr);
196	lru_tail_ = pr;
197	pr->next = nullptr;
198	} else {
199	pr->next = lru_head_;
200	pr->next->prev = pr;
201	}
202	lru_head_ = pr;
203	}
204
205	void PoolAllocator::EvictOne() {
206	DCHECK(lru_tail_ != nullptr);
207	PtrRecord* prec = lru_tail_;
208	RemoveFromList(prec);
209	auto iter = pool_.find(prec->num_bytes);
210	while (iter ->second != prec) {
211	++iter;
212	DCHECK(iter != pool_.end());
213	}
214	pool_.erase(iter);
215	allocator_->Free(prec->ptr, prec->num_bytes);
216	delete prec;
217	++evicted_count_;
218	// Auto-resizing, and warning messages.
219	static const double kTolerable = `2e-3`;
220	static const int kCheckInterval = `1000`;
221	static const double kIncreaseFactor = `1.1`;
222	static const int kMinPoolSize = `100`;
223	if (`0` == evicted_count_ % kCheckInterval) {
224	const double eviction_rate =
225	evicted_count_ / static_cast<double>(put_count_);
226	const int64_t alloc_request_count = allocated_count_ + get_from_pool_count_;
227	const double alloc_rate =
228	(alloc_request_count == `0`)
229	? `0.0`
230	: allocated_count_ / static_cast<double>(alloc_request_count);
231	// Can turn on for debugging purposes.
232	const bool kShouldLog = false;
233	if (kShouldLog) {
234	LOG(INFO) << "PoolAllocator: After " << alloc_request_count
235	<< " get requests, put_count=" << put_count_
236	<< " evicted_count=" << evicted_count_
237	<< " eviction_rate=" << eviction_rate
238	<< " and unsatisfied allocation rate=" << alloc_rate;
239	}
240	if (auto_resize_ && (eviction_rate > kTolerable) &&
241	(alloc_rate > kTolerable)) {
242	size_t new_size_limit = (pool_size_limit_ < kMinPoolSize)
243	? kMinPoolSize
244	: (kIncreaseFactor * pool_size_limit_);
245	if (kShouldLog) {
246	LOG(INFO) << "Raising pool_size_limit_ from " << pool_size_limit_
247	<< " to " << new_size_limit;
248	}
249	pool_size_limit_ = new_size_limit;
250	// Reset all the counters so that ratios are relative to new sizes
251	// at next test interval.
252	put_count_ = `0`;
253	allocated_count_ = `0`;
254	evicted_count_ = `0`;
255	get_from_pool_count_ = `0`;
256	}
257	}
258	}
259
260	void* BasicCPUAllocator::Alloc(size_t alignment, size_t num_bytes,
261	size_t* bytes_received) {
262	void* ptr = nullptr;
263	*bytes_received = num_bytes;
264	if (num_bytes > `0`) {
265	if (numa_node_ == port::kNUMANoAffinity) {
266	ptr = port::AlignedMalloc(num_bytes, static_cast<int>(alignment));
267	} else {
268	ptr =
269	port::NUMAMalloc(numa_node_, num_bytes, static_cast<int>(alignment));
270	}
271	VisitAlloc(ptr, numa_node_, num_bytes);
272	}
273	return ptr;
274	}
275
276	void BasicCPUAllocator::Free(void* ptr, size_t num_bytes) {
277	if (num_bytes > `0`) {
278	VisitFree(ptr, numa_node_, num_bytes);
279	if (numa_node_ == port::kNUMANoAffinity) {
280	port::AlignedFree(ptr);
281	} else {
282	port::NUMAFree(ptr, num_bytes);
283	}
284	}
285	}
286	} // namespace tensorflow
287

Browse the source code of tensorflow/tensorflow/core/common_runtime/pool_allocator.cc