1/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#include <atomic>
17
18#include "tensorflow/core/profiler/lib/scoped_memory_debug_annotation.h"
19#include "tensorflow/core/profiler/lib/traceme.h"
20#include "tensorflow/tsl/framework/allocator.h"
21#include "tensorflow/tsl/framework/allocator_registry.h"
22#include "tensorflow/tsl/framework/tracking_allocator.h"
23#include "tensorflow/tsl/platform/mem.h"
24#include "tensorflow/tsl/platform/mutex.h"
25#include "tensorflow/tsl/platform/strcat.h"
26#include "tensorflow/tsl/platform/stringprintf.h"
27#include "tensorflow/tsl/platform/types.h"
28
29namespace tsl {
30
31// If true, cpu allocator collects more stats.
32static bool cpu_allocator_collect_stats = false;
33
34void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
35void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
36bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
37
38static const int kMaxTotalAllocationWarnings = 1;
39
40static const int kMaxSingleAllocationWarnings = 5;
41
42// If cpu_allocator_collect_stats is true, warn when the total allocated memory
43// exceeds this threshold.
44static const double kTotalAllocationWarningThreshold = 0.5;
45
46// Individual allocations large than this amount will trigger a warning.
47static const double kLargeAllocationWarningThreshold = 0.1;
48
49// Cache first invocation to port::AvailableRam, as it can be expensive.
50static int64_t LargeAllocationWarningBytes() {
51 static int64_t value = static_cast<int64_t>(port::AvailableRam() *
52 kLargeAllocationWarningThreshold);
53 return value;
54}
55
56static int64_t TotalAllocationWarningBytes() {
57 static int64_t value = static_cast<int64_t>(port::AvailableRam() *
58 kTotalAllocationWarningThreshold);
59 return value;
60}
61
62namespace {
63
64// A default Allocator for CPU devices. ProcessState::GetCPUAllocator() will
65// return a different version that may perform better, but may also lack the
66// optional stats triggered by the functions above. TODO(tucker): migrate all
67// uses of cpu_allocator() except tests to use ProcessState instead.
68class CPUAllocator : public Allocator {
69 public:
70 CPUAllocator()
71 : single_allocation_warning_count_(0),
72 total_allocation_warning_count_(0) {}
73
74 ~CPUAllocator() override {}
75
76 string Name() override { return "cpu"; }
77
78 void* AllocateRaw(size_t alignment, size_t num_bytes) override {
79 if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
80 single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
81 ++single_allocation_warning_count_;
82 LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
83 << 100 * kLargeAllocationWarningThreshold
84 << "% of free system memory.";
85 }
86
87 void* p = port::AlignedMalloc(num_bytes, alignment);
88 if (cpu_allocator_collect_stats) {
89 const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
90 mutex_lock l(mu_);
91 ++stats_.num_allocs;
92 stats_.bytes_in_use += alloc_size;
93 stats_.peak_bytes_in_use =
94 std::max<int64_t>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
95 stats_.largest_alloc_size =
96 std::max<int64_t>(stats_.largest_alloc_size, alloc_size);
97
98 if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
99 total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
100 ++total_allocation_warning_count_;
101 LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
102 << "exceeds " << 100 * kTotalAllocationWarningThreshold
103 << "% of free system memory";
104 }
105 if (p != nullptr) {
106 AddTraceMe("MemoryAllocation", p, num_bytes, alloc_size);
107 }
108 }
109 return p;
110 }
111
112 void DeallocateRaw(void* ptr) override {
113 if (cpu_allocator_collect_stats) {
114 const std::size_t alloc_size =
115 port::MallocExtension_GetAllocatedSize(ptr);
116 mutex_lock l(mu_);
117 stats_.bytes_in_use -= alloc_size;
118 AddTraceMe("MemoryDeallocation", ptr, 0, alloc_size);
119 }
120 port::AlignedFree(ptr);
121 }
122
123 void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
124 std::size_t req_bytes, std::size_t alloc_bytes) {
125 tensorflow::profiler::TraceMe::InstantActivity(
126 [this, traceme_name, chunk_ptr, req_bytes,
127 alloc_bytes]() TF_NO_THREAD_SAFETY_ANALYSIS {
128 const auto& annotation = tensorflow::profiler::
129 ScopedMemoryDebugAnnotation::CurrentAnnotation();
130 return tensorflow::profiler::TraceMeEncode(
131 traceme_name, {{"allocator_name", Name()},
132 {"bytes_reserved", stats_.bytes_reserved},
133 {"bytes_allocated", stats_.bytes_in_use},
134 {"peak_bytes_in_use", stats_.peak_bytes_in_use},
135 {"requested_bytes", req_bytes},
136 {"allocation_bytes", alloc_bytes},
137 {"addr", reinterpret_cast<uint64>(chunk_ptr)},
138 {"tf_op", annotation.pending_op_name},
139 {"id", annotation.pending_step_id},
140 {"region_type", annotation.pending_region_type},
141 {"data_type", annotation.pending_data_type},
142 {"shape", annotation.pending_shape_func()}});
143 },
144 /*level=*/tensorflow::profiler::TraceMeLevel::kInfo);
145 }
146
147 absl::optional<AllocatorStats> GetStats() override {
148 if (!cpu_allocator_collect_stats) return absl::nullopt;
149 mutex_lock l(mu_);
150 return stats_;
151 }
152
153 bool ClearStats() override {
154 if (!cpu_allocator_collect_stats) return false;
155 mutex_lock l(mu_);
156 stats_.num_allocs = 0;
157 stats_.peak_bytes_in_use = stats_.bytes_in_use;
158 stats_.largest_alloc_size = 0;
159 return true;
160 }
161
162 size_t AllocatedSizeSlow(const void* ptr) const override {
163 return port::MallocExtension_GetAllocatedSize(ptr);
164 }
165
166 AllocatorMemoryType GetMemoryType() const override {
167 return AllocatorMemoryType::kHostPageable;
168 }
169
170 private:
171 mutex mu_;
172 AllocatorStats stats_ TF_GUARDED_BY(mu_);
173
174 // Use <atomic> for single allocations to avoid mutex contention when
175 // statistics are disabled.
176 std::atomic<int> single_allocation_warning_count_;
177 int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
178
179 TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
180};
181
182class CPUAllocatorFactory : public AllocatorFactory {
183 public:
184 Allocator* CreateAllocator() override { return new CPUAllocator; }
185
186 SubAllocator* CreateSubAllocator(int numa_node) override {
187 return new CPUSubAllocator(new CPUAllocator);
188 }
189
190 private:
191 class CPUSubAllocator : public SubAllocator {
192 public:
193 explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
194 : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
195
196 void* Alloc(size_t alignment, size_t num_bytes,
197 size_t* bytes_received) override {
198 *bytes_received = num_bytes;
199 return cpu_allocator_->AllocateRaw(alignment, num_bytes);
200 }
201
202 void Free(void* ptr, size_t num_bytes) override {
203 cpu_allocator_->DeallocateRaw(ptr);
204 }
205
206 bool SupportsCoalescing() const override { return false; }
207
208 AllocatorMemoryType GetMemoryType() const override {
209 return cpu_allocator_->GetMemoryType();
210 }
211
212 private:
213 CPUAllocator* cpu_allocator_;
214 };
215};
216
217REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
218} // namespace
219
220} // namespace tsl
221