1/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
17#define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
18
19// Simple LRU pool allocators for various flavors of CPU RAM.
20
21#include <atomic>
22#include <map>
23#include <memory>
24#include <vector>
25
26#include "tensorflow/core/framework/allocator.h"
27#include "tensorflow/core/lib/core/bits.h"
28#include "tensorflow/core/platform/logging.h"
29#include "tensorflow/core/platform/macros.h"
30#include "tensorflow/core/platform/mutex.h"
31#include "tensorflow/core/platform/types.h"
32
33namespace tensorflow {
34
35// Interface of an object that rounds up integers.
36class RoundUpInterface {
37 public:
38 virtual ~RoundUpInterface() {}
39 virtual size_t RoundUp(size_t num_bytes) = 0;
40};
41
42// Size-limited pool of memory buffers obtained from a SubAllocator
43// instance. Pool eviction policy is LRU.
44class PoolAllocator : public Allocator {
45 public:
46 // "pool_size_limit" is the maximum number of returned, re-usable
47 // memory buffers to keep in the pool. If pool_size_limit == 0, the
48 // pool is effectively a thin wrapper around the allocator.
49 // If "auto_resize" is true, then the pool_size_limit will gradually
50 // be raised so that deallocations happen very rarely, if at all.
51 // Transitory start-up objects may deallocate, but the long-term
52 // working-set should not. Auto-resizing can raise pool_size_limit
53 // but will never lower it.
54 // "allocator" is the object that performs the underlying memory
55 // malloc/free operations. This object takes ownership of allocator.
56 PoolAllocator(size_t pool_size_limit, bool auto_resize,
57 SubAllocator* allocator, RoundUpInterface* size_rounder,
58 string name);
59 ~PoolAllocator() override;
60
61 string Name() override { return name_; }
62
63 void* AllocateRaw(size_t alignment, size_t num_bytes) override;
64
65 void DeallocateRaw(void* ptr) override;
66
67 // Allocate an unused memory region of size "num_bytes". Fetch from
68 // the pool if available, otherwise call allocator_.
69 void* Get(size_t num_bytes);
70
71 // Return a no-longer needed memory region to the pool. It is an error
72 // to deference "ptr" after this call. If the pool is full, the least
73 // recently used region will be deallocated.
74 void Put(void* ptr, size_t num_bytes);
75
76 // Reset the pool to empty.
77 void Clear();
78
79 // The following accessors permit monitoring the effectiveness of
80 // the pool at avoiding repeated malloc/frees on the underlying
81 // allocator. Read locks are not taken on the theory that value
82 // consistency with other threads is not important.
83
84 // Number of Get() requests satisfied from pool.
85 int64_t get_from_pool_count() const TF_NO_THREAD_SAFETY_ANALYSIS {
86 return get_from_pool_count_;
87 }
88 // Number of Put() requests.
89 int64_t put_count() const TF_NO_THREAD_SAFETY_ANALYSIS { return put_count_; }
90 // Number of Get() requests requiring a fresh allocation.
91 int64_t allocated_count() const TF_NO_THREAD_SAFETY_ANALYSIS {
92 return allocated_count_;
93 }
94 // Number of pool evictions.
95 int64_t evicted_count() const TF_NO_THREAD_SAFETY_ANALYSIS {
96 return evicted_count_;
97 }
98 // Current size limit.
99 size_t size_limit() const TF_NO_THREAD_SAFETY_ANALYSIS {
100 return pool_size_limit_;
101 }
102
103 AllocatorMemoryType GetMemoryType() const override {
104 return allocator_->GetMemoryType();
105 }
106
107 private:
108 struct PtrRecord {
109 void* ptr;
110 size_t num_bytes;
111 PtrRecord* prev;
112 PtrRecord* next;
113 };
114
115 // Remove "pr" from the double-linked LRU list.
116 void RemoveFromList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
117
118 // Add "pr" to the head of the double-linked LRU list.
119 void AddToList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
120
121 // Delete the least recently used record.
122 void EvictOne() TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
123
124 const string name_;
125 const bool has_size_limit_;
126 const bool auto_resize_;
127 size_t pool_size_limit_;
128 std::unique_ptr<SubAllocator> allocator_;
129 std::unique_ptr<RoundUpInterface> size_rounder_;
130 mutex mutex_;
131 std::multimap<const size_t, PtrRecord*> pool_ TF_GUARDED_BY(mutex_);
132 PtrRecord* lru_head_ TF_GUARDED_BY(mutex_) = nullptr;
133 PtrRecord* lru_tail_ TF_GUARDED_BY(mutex_) = nullptr;
134 int64_t get_from_pool_count_ TF_GUARDED_BY(mutex_) = 0;
135 int64_t put_count_ TF_GUARDED_BY(mutex_) = 0;
136 int64_t allocated_count_ TF_GUARDED_BY(mutex_) = 0;
137 int64_t evicted_count_ TF_GUARDED_BY(mutex_) = 0;
138};
139
140// Do-nothing rounder. Passes through sizes unchanged.
141class NoopRounder : public RoundUpInterface {
142 public:
143 size_t RoundUp(size_t num_bytes) override { return num_bytes; }
144};
145
146// Power of 2 rounder: rounds up to nearest power of 2 size.
147class Pow2Rounder : public RoundUpInterface {
148 public:
149 size_t RoundUp(size_t num_bytes) override {
150 return 1uLL << Log2Ceiling64(num_bytes);
151 }
152};
153
154class BasicCPUAllocator : public SubAllocator {
155 public:
156 BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors,
157 const std::vector<Visitor>& free_visitors)
158 : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {}
159
160 ~BasicCPUAllocator() override {}
161
162 void* Alloc(size_t alignment, size_t num_bytes,
163 size_t* bytes_received) override;
164
165 void Free(void* ptr, size_t num_bytes) override;
166
167 bool SupportsCoalescing() const override { return false; }
168
169 AllocatorMemoryType GetMemoryType() const override {
170 return AllocatorMemoryType::kHostPageable;
171 }
172
173 private:
174 int numa_node_;
175
176 TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator);
177};
178
179} // namespace tensorflow
180#endif // TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_
181