cpu_allocator_impl.cc source code [tensorflow/tensorflow/tsl/framework/cpu_allocator_impl.cc]

1	/ Copyright 2019 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include <atomic>
17
18	#include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
19	#include "tensorflow/core/profiler/lib/traceme.h"
20	#include "tensorflow/tsl/framework/allocator.h"
21	#include "tensorflow/tsl/framework/allocator_registry.h"
22	#include "tensorflow/tsl/framework/tracking_allocator.h"
23	#include "tensorflow/tsl/platform/mem.h"
24	#include "tensorflow/tsl/platform/mutex.h"
25	#include "tensorflow/tsl/platform/strcat.h"
26	#include "tensorflow/tsl/platform/stringprintf.h"
27	#include "tensorflow/tsl/platform/types.h"
28
29	namespace tsl {
30
31	// If true, cpu allocator collects more stats.
32	static bool cpu_allocator_collect_stats = false;
33
34	void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
35	void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
36	bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
37
38	static const int kMaxTotalAllocationWarnings = `1`;
39
40	static const int kMaxSingleAllocationWarnings = `5`;
41
42	// If cpu_allocator_collect_stats is true, warn when the total allocated memory
43	// exceeds this threshold.
44	static const double kTotalAllocationWarningThreshold = `0.5`;
45
46	// Individual allocations large than this amount will trigger a warning.
47	static const double kLargeAllocationWarningThreshold = `0.1`;
48
49	// Cache first invocation to port::AvailableRam, as it can be expensive.
50	static int64_t LargeAllocationWarningBytes() {
51	static int64_t value = static_cast<int64_t>(port::AvailableRam() *
52	kLargeAllocationWarningThreshold);
53	return value;
54	}
55
56	static int64_t TotalAllocationWarningBytes() {
57	static int64_t value = static_cast<int64_t>(port::AvailableRam() *
58	kTotalAllocationWarningThreshold);
59	return value;
60	}
61
62	namespace {
63
64	// A default Allocator for CPU devices. ProcessState::GetCPUAllocator() will
65	// return a different version that may perform better, but may also lack the
66	// optional stats triggered by the functions above. TODO(tucker): migrate all
67	// uses of cpu_allocator() except tests to use ProcessState instead.
68	class CPUAllocator : public Allocator {
69	public:
70	CPUAllocator()
71	: single_allocation_warning_count_(`0`),
72	total_allocation_warning_count_(`0`) {}
73
74	~CPUAllocator() override {}
75
76	string Name() override { return "cpu"; }
77
78	void* AllocateRaw(size_t alignment, size_t num_bytes) override {
79	if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
80	single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
81	++single_allocation_warning_count_;
82	LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
83	<< `100` * kLargeAllocationWarningThreshold
84	<< "% of free system memory.";
85	}
86
87	void* p = port::AlignedMalloc(num_bytes, alignment);
88	if (cpu_allocator_collect_stats) {
89	const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
90	mutex_lock l(mu_);
91	++stats_.num_allocs;
92	stats_.bytes_in_use += alloc_size;
93	stats_.peak_bytes_in_use =
94	std::max<int64_t>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
95	stats_.largest_alloc_size =
96	std::max<int64_t>(stats_.largest_alloc_size, alloc_size);
97
98	if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
99	total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
100	++total_allocation_warning_count_;
101	LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
102	<< "exceeds " << `100` * kTotalAllocationWarningThreshold
103	<< "% of free system memory";
104	}
105	if (p != nullptr) {
106	AddTraceMe("MemoryAllocation", p, num_bytes, alloc_size);
107	}
108	}
109	return p;
110	}
111
112	void DeallocateRaw(void* ptr) override {
113	if (cpu_allocator_collect_stats) {
114	const std::size_t alloc_size =
115	port::MallocExtension_GetAllocatedSize(ptr);
116	mutex_lock l(mu_);
117	stats_.bytes_in_use -= alloc_size;
118	AddTraceMe("MemoryDeallocation", ptr, `0`, alloc_size);
119	}
120	port::AlignedFree(ptr);
121	}
122
123	void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
124	std::size_t req_bytes, std::size_t alloc_bytes) {
125	tensorflow::profiler::TraceMe::InstantActivity(
126	[this, traceme_name, chunk_ptr, req_bytes,
127	alloc_bytes]() TF_NO_THREAD_SAFETY_ANALYSIS {
128	const auto& annotation = tensorflow::profiler::
129	ScopedMemoryDebugAnnotation::CurrentAnnotation();
130	return tensorflow::profiler::TraceMeEncode(
131	traceme_name, {{"allocator_name", Name()},
132	{"bytes_reserved", stats_.bytes_reserved},
133	{"bytes_allocated", stats_.bytes_in_use},
134	{"peak_bytes_in_use", stats_.peak_bytes_in_use},
135	{"requested_bytes", req_bytes},
136	{"allocation_bytes", alloc_bytes},
137	{"addr", reinterpret_cast<uint64>(chunk_ptr)},
138	{"tf_op", annotation.pending_op_name},
139	{"id", annotation.pending_step_id},
140	{"region_type", annotation.pending_region_type},
141	{"data_type", annotation.pending_data_type},
142	{"shape", annotation.pending_shape_func ()}});
143	},
144	/level=/tensorflow::profiler::TraceMeLevel::kInfo);
145	}
146
147	absl::optional<AllocatorStats> GetStats() override {
148	if (!cpu_allocator_collect_stats) return absl::nullopt;
149	mutex_lock l(mu_);
150	return stats_;
151	}
152
153	bool ClearStats() override {
154	if (!cpu_allocator_collect_stats) return false;
155	mutex_lock l(mu_);
156	stats_.num_allocs = `0`;
157	stats_.peak_bytes_in_use = stats_.bytes_in_use;
158	stats_.largest_alloc_size = `0`;
159	return true;
160	}
161
162	size_t AllocatedSizeSlow(const void* ptr) const override {
163	return port::MallocExtension_GetAllocatedSize(ptr);
164	}
165
166	AllocatorMemoryType GetMemoryType() const override {
167	return AllocatorMemoryType::kHostPageable;
168	}
169
170	private:
171	mutex mu_;
172	AllocatorStats stats_ TF_GUARDED_BY(mu_);
173
174	// Use <atomic> for single allocations to avoid mutex contention when
175	// statistics are disabled.
176	std::atomic<int> single_allocation_warning_count_;
177	int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
178
179	TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
180	};
181
182	class CPUAllocatorFactory : public AllocatorFactory {
183	public:
184	Allocator* CreateAllocator() override { return new CPUAllocator; }
185
186	SubAllocator* CreateSubAllocator(int numa_node) override {
187	return new CPUSubAllocator (new CPUAllocator);
188	}
189
190	private:
191	class CPUSubAllocator : public SubAllocator {
192	public:
193	explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
194	: SubAllocator ({}, {}), cpu_allocator_(cpu_allocator) {}
195
196	void* Alloc(size_t alignment, size_t num_bytes,
197	size_t* bytes_received) override {
198	*bytes_received = num_bytes;
199	return cpu_allocator_->AllocateRaw(alignment, num_bytes);
200	}
201
202	void Free(void* ptr, size_t num_bytes) override {
203	cpu_allocator_->DeallocateRaw(ptr);
204	}
205
206	bool SupportsCoalescing() const override { return false; }
207
208	AllocatorMemoryType GetMemoryType() const override {
209	return cpu_allocator_->GetMemoryType();
210	}
211
212	private:
213	CPUAllocator* cpu_allocator_;
214	};
215	};
216
217	REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", `100`, CPUAllocatorFactory);
218	} // namespace
219
220	} // namespace tsl
221

Browse the source code of tensorflow/tensorflow/tsl/framework/cpu_allocator_impl.cc