1#pragma once
2
3#include <algorithm>
4#include <deque>
5#include <memory>
6#include <mutex>
7
8#include <c10/util/Exception.h>
9#include <c10/util/SmallVector.h>
10#include <c10/util/flat_hash_map.h>
11
12namespace c10 {
13
14/*
15 * Given a sequence of allocations in a thread, AllocationPlan records
16 * 1. size of each allocation
17 * 2. Lifetime of each allocation.
18 * 3. allocation offsets: Memory offset for each allocation in a single blob of
19 * memory
20 * 4. Total size of a blob of memory required to satisfy all the allocations.
21 */
22class C10_API AllocationPlan {
23 private:
24 // Records size of each allocation by their sequential allocation ids.
25 std::vector<uint64_t> allocation_sizes;
26 // This maps one allocation id (X) to another allocation id (Y).
27 // Allocation X is alive until allocation Y. From allocation Y onwards
28 // allocation X is not referenced.
29 // Thus Y is the id of the first allocation after X is freed.
30 // NB: When an allocation is recorded, along with recording its size,
31 // we also set the lifetime to be numeric_limits::max()
32 // This is to track allocations that are made during the scope of
33 // profiling but were not freed until after the scope ended.
34 // Such allocations are not managed by profiling allocator.
35 std::vector<uint64_t> allocation_lifetimes;
36 // Maps an allocation to some offset in a blob of memory.
37 std::vector<uint64_t> allocation_offsets;
38 uint64_t total_size{0};
39 void clear();
40 friend class AllocationPlanner;
41 friend class CPUProfilingAllocator;
42};
43
44/*
45 * Map of memory ptr to allocation id. This is auxiliary information only
46 * used to establish lifetime of allocations.
47 */
48class C10_API AllocationPlanner {
49 private:
50 AllocationPlan* allocation_plan_{nullptr};
51 // Maps allocated ptr to its allocation id.
52 // This is used when freeing the memory to look up the allocation id
53 // in order to establish the lifetime of a particular allocation.
54 ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
55 uint64_t allocation_id_{0};
56 bool validation_mode_{false};
57
58 bool validate_allocation(const uint64_t size, const void* ptr);
59 bool validate_free(const void* ptr);
60
61 public:
62 bool validation_success{true};
63
64 AllocationPlanner() = delete;
65 AllocationPlanner(AllocationPlan* plan, bool validate = false)
66 : allocation_plan_(plan), validation_mode_(validate) {}
67 void record_allocation(const uint64_t size, const void* ptr);
68 void record_free(const void* ptr);
69 void formulate_plan();
70 void clear();
71};
72
73// NOT THREAD SAFE profiling allocator.
74class C10_API CPUProfilingAllocator {
75 private:
76 const AllocationPlan* plan_{nullptr};
77 uint64_t allocation_id_{0};
78 uint64_t current_size_{0};
79 void* blob_{nullptr};
80 ska::flat_hash_map<const void*, uint64_t> allocation_ptr_to_id_;
81
82 public:
83 ~CPUProfilingAllocator();
84 void set_plan(const AllocationPlan* plan);
85 void unset_plan();
86 void* allocate(const size_t bytes);
87 void free(void* const ptr);
88};
89
90/*
91 * Usage: Profile allocations made by one run of the model.
92 * AllocationPlan plan;
93 * {
94 * WithProfileAllocationGuard profile_guard(&plan);
95 * module.forward(...);
96 * }
97 * plan now contains allocation plan.
98 */
99class C10_API WithProfileAllocationsGuard {
100 public:
101 WithProfileAllocationsGuard(AllocationPlan* plan);
102 ~WithProfileAllocationsGuard();
103
104 private:
105 std::unique_ptr<AllocationPlanner> planner_;
106};
107
108/*
109 * Usage: Validate allocation plan made with WithProfileAllocationGuard
110 * bool plan_validation_success, success = true;
111 * for (some number of representative inputs)
112 * {
113 * WithValidateAllocationPlanGuard(&plan, &plan_validation_success);
114 * module.forward(...);
115 * success = success && plan_validation_success;
116 * }
117 * success == true means allocations are according to plan
118 * else for some inputs allocation pattern changed.
119 */
120class C10_API WithValidateAllocationPlanGuard {
121 public:
122 WithValidateAllocationPlanGuard(AllocationPlan* plan, bool* success);
123 ~WithValidateAllocationPlanGuard();
124
125 private:
126 std::unique_ptr<AllocationPlanner> planner_;
127 bool* success_;
128};
129
130AllocationPlanner* GetThreadLocalAllocationPlanner();
131
132/*
133 * Usage: Allocate tensors accordingly to allocation plan
134 * First make allocation plan.
135 * See WithProfileAllocationsGuard usage.
136 * Second validate allocation plan.
137 * See WithValidateAllocationPlanGuard usage.
138 * CPUProfilingAllocator profiling_allocator;
139 * {
140 * WithProfilingAllocatorGuard allocator_guard(&profiling_allocator, &plan);
141 * module.forward(...);
142 * }
143 */
144class C10_API WithProfilingAllocatorGuard {
145 public:
146 WithProfilingAllocatorGuard(
147 CPUProfilingAllocator* allocator,
148 const AllocationPlan* plan);
149 ~WithProfilingAllocatorGuard();
150};
151
152CPUProfilingAllocator* GetThreadLocalProfilingAllocator();
153
154} // namespace c10
155