CPUProfilingAllocator.h source code [pytorch/c10/mobile/CPUProfilingAllocator.h]

1	#pragma once
2
3	#include <algorithm>
4	#include <deque>
5	#include <memory>
6	#include <mutex>
7
8	#include <c10/util/Exception.h>
9	#include <c10/util/SmallVector.h>
10	#include <c10/util/flat_hash_map.h>
11
12	namespace c10 {
13
14	/*
15	* Given a sequence of allocations in a thread, AllocationPlan records
16	* 1. size of each allocation
17	* 2. Lifetime of each allocation.
18	* 3. allocation offsets: Memory offset for each allocation in a single blob of
19	* memory
20	* 4. Total size of a blob of memory required to satisfy all the allocations.
21	*/
22	class C10_API AllocationPlan {
23	private:
24	// Records size of each allocation by their sequential allocation ids.
25	std::vector<uint64_t> allocation_sizes;
26	// This maps one allocation id (X) to another allocation id (Y).
27	// Allocation X is alive until allocation Y. From allocation Y onwards
28	// allocation X is not referenced.
29	// Thus Y is the id of the first allocation after X is freed.
30	// NB: When an allocation is recorded, along with recording its size,
31	// we also set the lifetime to be numeric_limits::max()
32	// This is to track allocations that are made during the scope of
33	// profiling but were not freed until after the scope ended.
34	// Such allocations are not managed by profiling allocator.
35	std::vector<uint64_t> allocation_lifetimes;
36	// Maps an allocation to some offset in a blob of memory.
37	std::vector<uint64_t> allocation_offsets;
38	uint64_t total_size{`0`};
39	void clear();
40	friend class AllocationPlanner;
41	friend class CPUProfilingAllocator;
42	};
43
44	/*
45	* Map of memory ptr to allocation id. This is auxiliary information only
46	* used to establish lifetime of allocations.
47	*/
48	class C10_API AllocationPlanner {
49	private:
50	AllocationPlan* allocation_plan_{nullptr};
51	// Maps allocated ptr to its allocation id.
52	// This is used when freeing the memory to look up the allocation id
53	// in order to establish the lifetime of a particular allocation.
54	ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
55	uint64_t allocation_id_{`0`};
56	bool validation_mode_{false};
57
58	bool validate_allocation(const uint64_t size, const void* ptr);
59	bool validate_free(const void* ptr);
60
61	public:
62	bool validation_success{true};
63
64	AllocationPlanner() = delete;
65	AllocationPlanner(AllocationPlan* plan, bool validate = false)
66	: allocation_plan_(plan), validation_mode_(validate) {}
67	void record_allocation(const uint64_t size, const void* ptr);
68	void record_free(const void* ptr);
69	void formulate_plan();
70	void clear();
71	};
72
73	// NOT THREAD SAFE profiling allocator.
74	class C10_API CPUProfilingAllocator {
75	private:
76	const AllocationPlan* plan_{nullptr};
77	uint64_t allocation_id_{`0`};
78	uint64_t current_size_{`0`};
79	void* blob_{nullptr};
80	ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
81
82	public:
83	~CPUProfilingAllocator();
84	void set_plan(const AllocationPlan* plan);
85	void unset_plan();
86	void* allocate(const size_t bytes);
87	void free(void* const ptr);
88	};
89
90	/*
91	* Usage: Profile allocations made by one run of the model.
92	* AllocationPlan plan;
93	* {
94	* WithProfileAllocationGuard profile_guard(&plan);
95	* module.forward(...);
96	* }
97	* plan now contains allocation plan.
98	*/
99	class C10_API WithProfileAllocationsGuard {
100	public:
101	WithProfileAllocationsGuard(AllocationPlan* plan);
102	~WithProfileAllocationsGuard();
103
104	private:
105	std::unique_ptr<AllocationPlanner> planner_;
106	};
107
108	/*
109	* Usage: Validate allocation plan made with WithProfileAllocationGuard
110	* bool plan_validation_success, success = true;
111	* for (some number of representative inputs)
112	* {
113	* WithValidateAllocationPlanGuard(&plan, &plan_validation_success);
114	* module.forward(...);
115	* success = success && plan_validation_success;
116	* }
117	* success == true means allocations are according to plan
118	* else for some inputs allocation pattern changed.
119	*/
120	class C10_API WithValidateAllocationPlanGuard {
121	public:
122	WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success);
123	~WithValidateAllocationPlanGuard();
124
125	private:
126	std::unique_ptr<AllocationPlanner> planner_;
127	bool* success_;
128	};
129
130	AllocationPlanner* GetThreadLocalAllocationPlanner();
131
132	/*
133	* Usage: Allocate tensors accordingly to allocation plan
134	* First make allocation plan.
135	* See WithProfileAllocationsGuard usage.
136	* Second validate allocation plan.
137	* See WithValidateAllocationPlanGuard usage.
138	* CPUProfilingAllocator profiling_allocator;
139	* {
140	* WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan);
141	* module.forward(...);
142	* }
143	*/
144	class C10_API WithProfilingAllocatorGuard {
145	public:
146	WithProfilingAllocatorGuard(
147	CPUProfilingAllocator* allocator,
148	const AllocationPlan* plan);
149	~WithProfilingAllocatorGuard();
150	};
151
152	CPUProfilingAllocator* GetThreadLocalProfilingAllocator();
153
154	} // namespace c10
155

Browse the source code of pytorch/c10/mobile/CPUProfilingAllocator.h