mkl_cpu_allocator.h source code [tensorflow/tensorflow/core/common_runtime/mkl_cpu_allocator.h]

1	/ Copyright 2017 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	// A simple CPU allocator that intercepts malloc/free calls from MKL library
17	// and redirects them to Tensorflow allocator
18
19	#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
20	#define TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
21
22	#ifdef INTEL_MKL
23
24	#include <cstdlib>
25
26	#include "tensorflow/core/common_runtime/bfc_allocator.h"
27	#include "tensorflow/core/common_runtime/pool_allocator.h"
28	#include "tensorflow/core/lib/strings/numbers.h"
29	#include "tensorflow/core/lib/strings/str_util.h"
30	#include "tensorflow/core/platform/mem.h"
31	#include "tensorflow/core/platform/numa.h"
32	#include "tensorflow/core/util/env_var.h"
33	#include "tensorflow/core/util/onednn_env_vars.h"
34	#ifdef _WIN32
35	typedef unsigned int uint;
36	#endif
37
38	namespace tensorflow {
39
40	static bool mkl_small_allocator_collect_stats = false;
41
42	class MklSubAllocator : public BasicCPUAllocator {
43	public:
44	MklSubAllocator() : BasicCPUAllocator (port::kNUMANoAffinity, {}, {}) {}
45	~MklSubAllocator() override {}
46	};
47
48	// CPU allocator that handles small-size allocations by calling
49	// suballocator directly. Mostly, it is just a wrapper around a suballocator
50	// (that calls malloc and free directly) with support for bookkeeping.
51	class MklSmallSizeAllocator : public Allocator {
52	public:
53	MklSmallSizeAllocator(SubAllocator* sub_allocator, size_t total_memory,
54	const string& name)
55	: sub_allocator_(sub_allocator), name_(name) {
56	stats_.bytes_limit = total_memory;
57	}
58	~MklSmallSizeAllocator() override {}
59
60	TF_DISALLOW_COPY_AND_ASSIGN(MklSmallSizeAllocator);
61
62	inline string Name() override { return name_; }
63
64	void* AllocateRaw(size_t alignment, size_t num_bytes) override {
65	void* ptr = port::AlignedMalloc(num_bytes, alignment);
66	if (mkl_small_allocator_collect_stats) IncrementStats(num_bytes);
67	return ptr;
68	}
69
70	void DeallocateRaw(void* ptr) override {
71	if (ptr == nullptr) {
72	LOG(ERROR) << "tried to deallocate nullptr";
73	return;
74	}
75
76	if (mkl_small_allocator_collect_stats) {
77	const size_t alloc_size = port::MallocExtension_GetAllocatedSize(ptr);
78	DecrementStats(alloc_size);
79	}
80	port::AlignedFree(ptr);
81	}
82
83	absl::optional<AllocatorStats> GetStats() override {
84	mutex_lock l(mutex_);
85	return stats_;
86	}
87
88	bool ClearStats() override {
89	mutex_lock l(mutex_);
90	stats_.num_allocs = `0`;
91	stats_.peak_bytes_in_use = `0`;
92	stats_.largest_alloc_size = `0`;
93	stats_.bytes_in_use = `0`;
94	stats_.bytes_limit = `0`;
95	return true;
96	}
97
98	private:
99	// Increment statistics for the allocator handling small allocations.
100	inline void IncrementStats(size_t alloc_size) TF_LOCKS_EXCLUDED(mutex_) {
101	mutex_lock l(mutex_);
102	++stats_.num_allocs;
103	stats_.bytes_in_use += alloc_size;
104	stats_.peak_bytes_in_use =
105	std::max(stats_.peak_bytes_in_use, stats_.bytes_in_use);
106	stats_.largest_alloc_size =
107	std::max(alloc_size, static_cast<size_t>(stats_.largest_alloc_size));
108	}
109
110	// Decrement statistics for the allocator handling small allocations.
111	inline void DecrementStats(size_t dealloc_size) TF_LOCKS_EXCLUDED(mutex_) {
112	mutex_lock l(mutex_);
113	stats_.bytes_in_use -= dealloc_size;
114	}
115
116	SubAllocator* sub_allocator_; // Not owned by this class.
117
118	// Mutex for protecting updates to map of allocations.
119	mutable mutex mutex_;
120
121	// Allocator name
122	string name_;
123
124	// Allocator stats for small allocs
125	AllocatorStats stats_ TF_GUARDED_BY(mutex_);
126	};
127
128	/// CPU allocator for MKL that wraps BFC allocator and intercepts
129	/// and redirects memory allocation calls from MKL.
130	class MklCPUAllocator : public Allocator {
131	public:
132	// Constructor and other standard functions
133
134	/// Environment variable that user can set to upper bound on memory allocation
135	static constexpr const char* kMaxLimitStr = "TF_MKL_ALLOC_MAX_BYTES";
136
137	/// Default upper limit on allocator size - 64GB
138	static constexpr size_t kDefaultMaxLimit = `64LL` << `30`;
139
140	MklCPUAllocator() { TF_CHECK_OK(Initialize()); }
141
142	~MklCPUAllocator() override {
143	delete small_size_allocator_;
144	delete large_size_allocator_;
145	}
146
147	Status Initialize() {
148	VLOG(`2`) << "MklCPUAllocator: In MklCPUAllocator";
149
150	// Set upper bound on memory allocation to physical RAM available on the
151	// CPU unless explicitly specified by user
152	uint64 max_mem_bytes = kDefaultMaxLimit;
153	#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
154	max_mem_bytes =
155	(uint64)sysconf(_SC_PHYS_PAGES) * (uint64)sysconf(_SC_PAGESIZE);
156	#endif
157	char* user_mem_bytes = getenv(kMaxLimitStr);
158
159	if (user_mem_bytes != NULL) {
160	uint64 user_val = `0`;
161	if (!strings::safe_strtou64(user_mem_bytes, &user_val)) {
162	return errors::InvalidArgument("Invalid memory limit (", user_mem_bytes,
163	") specified for MKL allocator through ",
164	kMaxLimitStr);
165	}
166	#if defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
167	if (user_val > max_mem_bytes) {
168	LOG(WARNING) << "The user specified a memory limit " << kMaxLimitStr
169	<< "=" << user_val
170	<< " greater than available physical memory: "
171	<< max_mem_bytes
172	<< ". This could significantly reduce performance!";
173	}
174	#endif
175	max_mem_bytes = user_val;
176	}
177
178	VLOG(`1`) << "MklCPUAllocator: Setting max_mem_bytes: " << max_mem_bytes;
179
180	sub_allocator_ = new MklSubAllocator ();
181
182	// SubAllocator is owned by BFCAllocator, so we do not need to deallocate
183	// it in MklSmallSizeAllocator.
184	small_size_allocator_ =
185	new MklSmallSizeAllocator (sub_allocator_, max_mem_bytes, kName);
186
187	BFCAllocator::Options large_allocator_opts;
188	large_allocator_opts.allow_growth = kAllowGrowth;
189	large_size_allocator_ =
190	new BFCAllocator (absl::WrapUnique(sub_allocator_), max_mem_bytes, kName,
191	large_allocator_opts);
192	return OkStatus();
193	}
194
195	inline string Name() override { return kName; }
196	inline bool IsSmallSizeAllocation(const void* ptr) const
197	TF_LOCKS_EXCLUDED(mutex_) {
198	mutex_lock l(mutex_);
199	return large_allocations_map_.find(ptr) == large_allocations_map_.end();
200	}
201	// AddLargeAllocMap and RemoveLargeAllocMap are always called with a lock held
202	inline void AddLargeAllocMap(void* ptr, size_t num_bytes)
203	TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
204	if (ptr != nullptr) {
205	std::pair<void*, size_t> map_val(ptr, num_bytes);
206	large_allocations_map_.insert(map_val);
207	}
208	}
209	inline void RemoveLargeAllocMap(void* ptr)
210	TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
211	auto map_iter = large_allocations_map_.find(ptr);
212	if (map_iter != large_allocations_map_.end()) {
213	large_allocations_map_.erase(map_iter);
214	} else {
215	LOG(ERROR) << "tried to deallocate invalid pointer";
216	}
217	return;
218	}
219
220	inline void* AllocateRaw(size_t alignment, size_t num_bytes) override {
221	// If the allocation size is less than threshold, call small allocator,
222	// otherwise call large-size allocator (BFC). We found that BFC allocator
223	// does not deliver good performance for small allocations when
224	// inter_op_parallelism_threads is high.
225	if (UseSystemAlloc() \|\| num_bytes < kSmallAllocationsThreshold) {
226	return small_size_allocator_->AllocateRaw(alignment, num_bytes);
227	} else {
228	mutex_lock l(mutex_);
229	void* ptr = large_size_allocator_->AllocateRaw(alignment, num_bytes);
230	AddLargeAllocMap(ptr, num_bytes);
231	return ptr;
232	}
233	}
234	inline void DeallocateRaw(void* ptr) override {
235	// Check if ptr is for "small" allocation. If it is, then call Free
236	// directly. Otherwise, call BFC to handle free.
237	if (UseSystemAlloc() \|\| IsSmallSizeAllocation(ptr)) {
238	small_size_allocator_->DeallocateRaw(ptr);
239	} else {
240	mutex_lock l(mutex_);
241	RemoveLargeAllocMap(ptr);
242	large_size_allocator_->DeallocateRaw(ptr);
243	}
244	}
245	absl::optional<AllocatorStats> GetStats() override {
246	auto s_stats = small_size_allocator_->GetStats();
247	auto l_stats = large_size_allocator_->GetStats();
248
249	// Combine statistics from small-size and large-size allocator.
250	mutex_lock l(mutex_);
251	stats_.num_allocs = l_stats ->num_allocs + s_stats ->num_allocs;
252	stats_.bytes_in_use = l_stats ->bytes_in_use + s_stats ->bytes_in_use;
253	stats_.peak_bytes_in_use =
254	l_stats ->peak_bytes_in_use + s_stats ->peak_bytes_in_use;
255
256	// Since small-size allocations go to MklSmallSizeAllocator,
257	// max_alloc_size from large_size_allocator would be the maximum
258	// size allocated by MklCPUAllocator.
259	stats_.largest_alloc_size = l_stats ->largest_alloc_size;
260	stats_.bytes_limit = std::max(s_stats ->bytes_limit, l_stats ->bytes_limit);
261	return stats_;
262	}
263
264	bool ClearStats() override {
265	bool stats_cleared = small_size_allocator_->ClearStats();
266	stats_cleared &= large_size_allocator_->ClearStats();
267	return stats_cleared;
268	}
269
270	private:
271	// Hooks provided by this allocator for memory allocation routines from MKL
272	static inline void* MallocHook(size_t size) {
273	VLOG(`3`) << "MklCPUAllocator: In MallocHook";
274	return cpu_allocator()->AllocateRaw(kAlignment, size);
275	}
276
277	static inline void FreeHook(void* ptr) {
278	VLOG(`3`) << "MklCPUAllocator: In FreeHook";
279	cpu_allocator()->DeallocateRaw(ptr);
280	}
281
282	static inline void* CallocHook(size_t num, size_t size) {
283	Status s = Status (error::Code::UNIMPLEMENTED,
284	"Unimplemented case for hooking MKL function.");
285	TF_CHECK_OK(s); // way to assert with an error message
286	return nullptr; // return a value and make static code analyzers happy
287	}
288
289	static inline void* ReallocHook(void* ptr, size_t size) {
290	Status s = Status (error::Code::UNIMPLEMENTED,
291	"Unimplemented case for hooking MKL function.");
292	TF_CHECK_OK(s); // way to assert with an error message
293	return nullptr; // return a value and make static code analyzers happy
294	}
295
296	// Do we allow growth in BFC Allocator
297	static const bool kAllowGrowth = true;
298
299	// Name
300	static constexpr const char* kName = "mklcpu";
301
302	// The alignment that we need for the allocations
303	static constexpr const size_t kAlignment = `64`;
304
305	Allocator* large_size_allocator_; // owned by this class
306	MklSmallSizeAllocator* small_size_allocator_; // owned by this class.
307
308	SubAllocator* sub_allocator_; // not owned by this class
309	mutable mutex mutex_;
310	AllocatorStats stats_ TF_GUARDED_BY(mutex_);
311
312	// Hash map to keep track of "BFC" allocations
313	// We do not use BFC allocator for small allocations.
314	std::unordered_map<const void*, size_t> large_allocations_map_
315	TF_GUARDED_BY(mutex_);
316
317	// Size in bytes that defines the upper-bound for "small" allocations.
318	// Any allocation below this threshold is "small" allocation.
319	static constexpr const size_t kSmallAllocationsThreshold = `4096`;
320
321	// Prevent copying and assignment
322	TF_DISALLOW_COPY_AND_ASSIGN(MklCPUAllocator);
323	};
324
325	} // namespace tensorflow
326
327	#endif // INTEL_MKL
328
329	#endif // TENSORFLOW_CORE_COMMON_RUNTIME_MKL_CPU_ALLOCATOR_H_
330

Browse the source code of tensorflow/tensorflow/core/common_runtime/mkl_cpu_allocator.h