1/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16// Classes to maintain a static registry of memory allocator factories.
17#ifndef TENSORFLOW_TSL_FRAMEWORK_ALLOCATOR_REGISTRY_H_
18#define TENSORFLOW_TSL_FRAMEWORK_ALLOCATOR_REGISTRY_H_
19
20#include <string>
21#include <vector>
22
23#include "tensorflow/tsl/framework/allocator.h"
24#include "tensorflow/tsl/platform/macros.h"
25#include "tensorflow/tsl/platform/mutex.h"
26#include "tensorflow/tsl/platform/numa.h"
27
28namespace tensorflow {
29
30class ProcessState;
31
32}
33
34namespace tsl {
35
36class AllocatorFactory {
37 public:
38 virtual ~AllocatorFactory() {}
39
40 // Returns true if the factory will create a functionally different
41 // SubAllocator for different (legal) values of numa_node.
42 virtual bool NumaEnabled() { return false; }
43
44 // Create an Allocator.
45 virtual Allocator* CreateAllocator() = 0;
46
47 // Create a SubAllocator. If NumaEnabled() is true, then returned SubAllocator
48 // will allocate memory local to numa_node. If numa_node == kNUMANoAffinity
49 // then allocated memory is not specific to any NUMA node.
50 virtual SubAllocator* CreateSubAllocator(int numa_node) = 0;
51};
52
53// ProcessState is defined in a package that cannot be a dependency of
54// framework. This definition allows us to access the one method we need.
55class ProcessStateInterface {
56 public:
57 virtual ~ProcessStateInterface() {}
58 virtual Allocator* GetCPUAllocator(int numa_node) = 0;
59};
60
61// A singleton registry of AllocatorFactories.
62//
63// Allocators should be obtained through ProcessState or cpu_allocator()
64// (deprecated), not directly through this interface. The purpose of this
65// registry is to allow link-time discovery of multiple AllocatorFactories among
66// which ProcessState will obtain the best fit at startup.
67class AllocatorFactoryRegistry {
68 public:
69 AllocatorFactoryRegistry() {}
70 ~AllocatorFactoryRegistry() {}
71
72 void Register(const char* source_file, int source_line, const string& name,
73 int priority, AllocatorFactory* factory);
74
75 // Returns 'best fit' Allocator. Find the factory with the highest priority
76 // and return an allocator constructed by it. If multiple factories have
77 // been registered with the same priority, picks one by unspecified criteria.
78 Allocator* GetAllocator();
79
80 // Returns 'best fit' SubAllocator. First look for the highest priority
81 // factory that is NUMA-enabled. If none is registered, fall back to the
82 // highest priority non-NUMA-enabled factory. If NUMA-enabled, return a
83 // SubAllocator specific to numa_node, otherwise return a NUMA-insensitive
84 // SubAllocator.
85 SubAllocator* GetSubAllocator(int numa_node);
86
87 // Returns the singleton value.
88 static AllocatorFactoryRegistry* singleton();
89
90 ProcessStateInterface* process_state() const { return process_state_; }
91
92 protected:
93 friend class tensorflow::ProcessState;
94 ProcessStateInterface* process_state_ = nullptr;
95
96 private:
97 mutex mu_;
98 bool first_alloc_made_ = false;
99 struct FactoryEntry {
100 const char* source_file;
101 int source_line;
102 string name;
103 int priority;
104 std::unique_ptr<AllocatorFactory> factory;
105 std::unique_ptr<Allocator> allocator;
106 // Index 0 corresponds to kNUMANoAffinity, other indices are (numa_node +
107 // 1).
108 std::vector<std::unique_ptr<SubAllocator>> sub_allocators;
109 };
110 std::vector<FactoryEntry> factories_ TF_GUARDED_BY(mu_);
111
112 // Returns any FactoryEntry registered under 'name' and 'priority',
113 // or 'nullptr' if none found.
114 const FactoryEntry* FindEntry(const string& name, int priority) const
115 TF_EXCLUSIVE_LOCKS_REQUIRED(mu_);
116
117 TF_DISALLOW_COPY_AND_ASSIGN(AllocatorFactoryRegistry);
118};
119
120class AllocatorFactoryRegistration {
121 public:
122 AllocatorFactoryRegistration(const char* file, int line, const string& name,
123 int priority, AllocatorFactory* factory) {
124 AllocatorFactoryRegistry::singleton()->Register(file, line, name, priority,
125 factory);
126 }
127};
128
129#define REGISTER_MEM_ALLOCATOR(name, priority, factory) \
130 REGISTER_MEM_ALLOCATOR_UNIQ_HELPER(__COUNTER__, __FILE__, __LINE__, name, \
131 priority, factory)
132
133#define REGISTER_MEM_ALLOCATOR_UNIQ_HELPER(ctr, file, line, name, priority, \
134 factory) \
135 REGISTER_MEM_ALLOCATOR_UNIQ(ctr, file, line, name, priority, factory)
136
137#define REGISTER_MEM_ALLOCATOR_UNIQ(ctr, file, line, name, priority, factory) \
138 static AllocatorFactoryRegistration allocator_factory_reg_##ctr( \
139 file, line, name, priority, new factory)
140
141} // namespace tsl
142
143#endif // TENSORFLOW_TSL_FRAMEWORK_ALLOCATOR_REGISTRY_H_
144