process_state.h source code [tensorflow/tensorflow/core/common_runtime/process_state.h]

1	/ Copyright 2015 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
17	#define TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
18
19	#include <functional>
20	#include <map>
21	#include <unordered_map>
22	#include <vector>
23
24	#include "tensorflow/core/framework/allocator.h"
25	#include "tensorflow/core/framework/allocator_registry.h"
26	#include "tensorflow/core/platform/mutex.h"
27	#include "tensorflow/core/platform/thread_annotations.h"
28	#include "tensorflow/core/platform/types.h"
29	#include "tensorflow/core/protobuf/config.pb.h"
30
31	namespace tensorflow {
32
33	class PoolAllocator;
34
35	// Singleton that manages per-process state, e.g. allocation of
36	// shared resources.
37	class ProcessState : public ProcessStateInterface {
38	public:
39	static ProcessState* singleton();
40
41	// Descriptor for memory allocation attributes, used by optional
42	// runtime correctness analysis logic.
43	struct MemDesc {
44	enum MemLoc { CPU, GPU };
45	MemLoc loc;
46	int dev_index;
47	bool gpu_registered;
48	bool nic_registered;
49	MemDesc()
50	: loc(CPU),
51	dev_index(`0`),
52	gpu_registered(false),
53	nic_registered(false) {}
54	string DebugString();
55	};
56
57	// If NUMA Allocators are desired, call this before calling any
58	// Allocator accessor.
59	void EnableNUMA() { numa_enabled_ = true; }
60
61	// Returns what we know about the memory at ptr.
62	// If we know nothing, it's called CPU 0 with no other attributes.
63	MemDesc PtrType(const void* ptr);
64
65	// Returns the one CPUAllocator used for the given numa_node.
66	// Treats numa_node == kNUMANoAffinity as numa_node == 0.
67	Allocator* GetCPUAllocator(int numa_node) override;
68
69	// Registers alloc visitor for the CPU allocator(s).
70	// REQUIRES: must be called before GetCPUAllocator.
71	void AddCPUAllocVisitor(SubAllocator::Visitor v);
72
73	// Registers free visitor for the CPU allocator(s).
74	// REQUIRES: must be called before GetCPUAllocator.
75	void AddCPUFreeVisitor(SubAllocator::Visitor v);
76
77	typedef std::unordered_map<const void*, MemDesc> MDMap;
78
79	protected:
80	ProcessState();
81	virtual ~ProcessState() {}
82	friend class GPUProcessState;
83	friend class PluggableDeviceProcessState;
84
85	// If these flags need to be runtime configurable consider adding
86	// them to ConfigProto.
87	static constexpr bool FLAGS_brain_mem_reg_gpu_dma = true;
88	static constexpr bool FLAGS_brain_gpu_record_mem_types = false;
89
90	// Helper method for unit tests to reset the ProcessState singleton by
91	// cleaning up everything. Never use in production.
92	void TestOnlyReset();
93
94	static ProcessState* instance_;
95	bool numa_enabled_;
96
97	mutex mu_;
98
99	// Indexed by numa_node. If we want numa-specific allocators AND a
100	// non-specific allocator, maybe should index by numa_node+1.
101	std::vector<Allocator*> cpu_allocators_ TF_GUARDED_BY(mu_);
102	std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ TF_GUARDED_BY(mu_);
103	std::vector<SubAllocator::Visitor> cpu_free_visitors_ TF_GUARDED_BY(mu_);
104
105	// A cache of cpu allocators indexed by a numa node. Used as a fast path to
106	// get CPU allocator by numa node id without locking the mutex. We can't use
107	// `cpu_allocators_` storage in the lock-free path because concurrent
108	// operation can deallocate the vector storage.
109	std::atomic<int> cpu_allocators_cached_;
110	std::array<Allocator*, `8`> cpu_allocators_cache_;
111
112	// Optional RecordingAllocators that wrap the corresponding
113	// Allocators for runtime attribute use analysis.
114	MDMap mem_desc_map_;
115	std::vector<Allocator*> cpu_al_ TF_GUARDED_BY(mu_);
116	};
117
118	namespace internal {
119	class RecordingAllocator : public Allocator {
120	public:
121	RecordingAllocator(ProcessState::MDMap* mm, Allocator* a,
122	ProcessState::MemDesc md, mutex* mu)
123	: mm_(mm), a_(a), md_(md), mu_(mu) {}
124
125	string Name() override { return a_->Name(); }
126	void* AllocateRaw(size_t alignment, size_t num_bytes) override {
127	void* p = a_->AllocateRaw(alignment, num_bytes);
128	mutex_lock l(*mu_);
129	(*mm_)[p] = md_;
130	return p;
131	}
132	void DeallocateRaw(void* p) override {
133	mutex_lock l(*mu_);
134	auto iter = mm_->find(p);
135	mm_->erase(iter);
136	a_->DeallocateRaw(p);
137	}
138	bool TracksAllocationSizes() const override {
139	return a_->TracksAllocationSizes();
140	}
141	size_t RequestedSize(const void* p) const override {
142	return a_->RequestedSize(p);
143	}
144	size_t AllocatedSize(const void* p) const override {
145	return a_->AllocatedSize(p);
146	}
147	absl::optional<AllocatorStats> GetStats() override { return a_->GetStats(); }
148	bool ClearStats() override { return a_->ClearStats(); }
149
150	AllocatorMemoryType GetMemoryType() const override {
151	return a_->GetMemoryType();
152	}
153
154	ProcessState::MDMap* mm_; // not owned
155	Allocator* a_; // not owned
156	ProcessState::MemDesc md_;
157	mutex* mu_;
158	};
159	} // namespace internal
160	} // namespace tensorflow
161	#endif // TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
162

Browse the source code of tensorflow/tensorflow/core/common_runtime/process_state.h