1 | /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ |
17 | #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ |
18 | |
19 | // Simple LRU pool allocators for various flavors of CPU RAM. |
20 | |
21 | #include <atomic> |
22 | #include <map> |
23 | #include <memory> |
24 | #include <vector> |
25 | |
26 | #include "tensorflow/core/framework/allocator.h" |
27 | #include "tensorflow/core/lib/core/bits.h" |
28 | #include "tensorflow/core/platform/logging.h" |
29 | #include "tensorflow/core/platform/macros.h" |
30 | #include "tensorflow/core/platform/mutex.h" |
31 | #include "tensorflow/core/platform/types.h" |
32 | |
33 | namespace tensorflow { |
34 | |
35 | // Interface of an object that rounds up integers. |
36 | class RoundUpInterface { |
37 | public: |
38 | virtual ~RoundUpInterface() {} |
39 | virtual size_t RoundUp(size_t num_bytes) = 0; |
40 | }; |
41 | |
42 | // Size-limited pool of memory buffers obtained from a SubAllocator |
43 | // instance. Pool eviction policy is LRU. |
44 | class PoolAllocator : public Allocator { |
45 | public: |
46 | // "pool_size_limit" is the maximum number of returned, re-usable |
47 | // memory buffers to keep in the pool. If pool_size_limit == 0, the |
48 | // pool is effectively a thin wrapper around the allocator. |
49 | // If "auto_resize" is true, then the pool_size_limit will gradually |
50 | // be raised so that deallocations happen very rarely, if at all. |
51 | // Transitory start-up objects may deallocate, but the long-term |
52 | // working-set should not. Auto-resizing can raise pool_size_limit |
53 | // but will never lower it. |
54 | // "allocator" is the object that performs the underlying memory |
55 | // malloc/free operations. This object takes ownership of allocator. |
56 | PoolAllocator(size_t pool_size_limit, bool auto_resize, |
57 | SubAllocator* allocator, RoundUpInterface* size_rounder, |
58 | string name); |
59 | ~PoolAllocator() override; |
60 | |
61 | string Name() override { return name_; } |
62 | |
63 | void* AllocateRaw(size_t alignment, size_t num_bytes) override; |
64 | |
65 | void DeallocateRaw(void* ptr) override; |
66 | |
67 | // Allocate an unused memory region of size "num_bytes". Fetch from |
68 | // the pool if available, otherwise call allocator_. |
69 | void* Get(size_t num_bytes); |
70 | |
71 | // Return a no-longer needed memory region to the pool. It is an error |
72 | // to deference "ptr" after this call. If the pool is full, the least |
73 | // recently used region will be deallocated. |
74 | void Put(void* ptr, size_t num_bytes); |
75 | |
76 | // Reset the pool to empty. |
77 | void Clear(); |
78 | |
79 | // The following accessors permit monitoring the effectiveness of |
80 | // the pool at avoiding repeated malloc/frees on the underlying |
81 | // allocator. Read locks are not taken on the theory that value |
82 | // consistency with other threads is not important. |
83 | |
84 | // Number of Get() requests satisfied from pool. |
85 | int64_t get_from_pool_count() const TF_NO_THREAD_SAFETY_ANALYSIS { |
86 | return get_from_pool_count_; |
87 | } |
88 | // Number of Put() requests. |
89 | int64_t put_count() const TF_NO_THREAD_SAFETY_ANALYSIS { return put_count_; } |
90 | // Number of Get() requests requiring a fresh allocation. |
91 | int64_t allocated_count() const TF_NO_THREAD_SAFETY_ANALYSIS { |
92 | return allocated_count_; |
93 | } |
94 | // Number of pool evictions. |
95 | int64_t evicted_count() const TF_NO_THREAD_SAFETY_ANALYSIS { |
96 | return evicted_count_; |
97 | } |
98 | // Current size limit. |
99 | size_t size_limit() const TF_NO_THREAD_SAFETY_ANALYSIS { |
100 | return pool_size_limit_; |
101 | } |
102 | |
103 | AllocatorMemoryType GetMemoryType() const override { |
104 | return allocator_->GetMemoryType(); |
105 | } |
106 | |
107 | private: |
108 | struct PtrRecord { |
109 | void* ptr; |
110 | size_t num_bytes; |
111 | PtrRecord* prev; |
112 | PtrRecord* next; |
113 | }; |
114 | |
115 | // Remove "pr" from the double-linked LRU list. |
116 | void RemoveFromList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
117 | |
118 | // Add "pr" to the head of the double-linked LRU list. |
119 | void AddToList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
120 | |
121 | // Delete the least recently used record. |
122 | void EvictOne() TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_); |
123 | |
124 | const string name_; |
125 | const bool has_size_limit_; |
126 | const bool auto_resize_; |
127 | size_t pool_size_limit_; |
128 | std::unique_ptr<SubAllocator> allocator_; |
129 | std::unique_ptr<RoundUpInterface> size_rounder_; |
130 | mutex mutex_; |
131 | std::multimap<const size_t, PtrRecord*> pool_ TF_GUARDED_BY(mutex_); |
132 | PtrRecord* lru_head_ TF_GUARDED_BY(mutex_) = nullptr; |
133 | PtrRecord* lru_tail_ TF_GUARDED_BY(mutex_) = nullptr; |
134 | int64_t get_from_pool_count_ TF_GUARDED_BY(mutex_) = 0; |
135 | int64_t put_count_ TF_GUARDED_BY(mutex_) = 0; |
136 | int64_t allocated_count_ TF_GUARDED_BY(mutex_) = 0; |
137 | int64_t evicted_count_ TF_GUARDED_BY(mutex_) = 0; |
138 | }; |
139 | |
140 | // Do-nothing rounder. Passes through sizes unchanged. |
141 | class NoopRounder : public RoundUpInterface { |
142 | public: |
143 | size_t RoundUp(size_t num_bytes) override { return num_bytes; } |
144 | }; |
145 | |
146 | // Power of 2 rounder: rounds up to nearest power of 2 size. |
147 | class Pow2Rounder : public RoundUpInterface { |
148 | public: |
149 | size_t RoundUp(size_t num_bytes) override { |
150 | return 1uLL << Log2Ceiling64(num_bytes); |
151 | } |
152 | }; |
153 | |
154 | class BasicCPUAllocator : public SubAllocator { |
155 | public: |
156 | BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors, |
157 | const std::vector<Visitor>& free_visitors) |
158 | : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {} |
159 | |
160 | ~BasicCPUAllocator() override {} |
161 | |
162 | void* Alloc(size_t alignment, size_t num_bytes, |
163 | size_t* bytes_received) override; |
164 | |
165 | void Free(void* ptr, size_t num_bytes) override; |
166 | |
167 | bool SupportsCoalescing() const override { return false; } |
168 | |
169 | AllocatorMemoryType GetMemoryType() const override { |
170 | return AllocatorMemoryType::kHostPageable; |
171 | } |
172 | |
173 | private: |
174 | int numa_node_; |
175 | |
176 | TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator); |
177 | }; |
178 | |
179 | } // namespace tensorflow |
180 | #endif // TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ |
181 | |