1 | #include <c10/cuda/CUDACachingAllocator.h> |
2 | #include <torch/csrc/cuda/memory_snapshot.h> |
3 | #include <torch/csrc/jit/serialization/pickler.h> |
4 | namespace torch { |
5 | namespace cuda { |
6 | |
7 | using c10::Dict; |
8 | using c10::IValue; |
9 | using torch::jit::Pickler; |
10 | |
11 | using c10::cuda::CUDACachingAllocator::BlockInfo; |
12 | using c10::cuda::CUDACachingAllocator::History; |
13 | using c10::cuda::CUDACachingAllocator::SegmentInfo; |
14 | |
15 | namespace { |
16 | std::string write_pickle(const IValue& v) { |
17 | std::vector<char> result; |
18 | { |
19 | auto writer = [&](const char* data, size_t size) { |
20 | result.insert(result.end(), data, data + size); |
21 | }; |
22 | Pickler pickler(writer, nullptr, nullptr, nullptr, nullptr, false); |
23 | pickler.protocol(); |
24 | pickler.pushIValue(v); |
25 | pickler.stop(); |
26 | } |
27 | return std::string(result.begin(), result.end()); |
28 | } |
29 | Dict<IValue, IValue> new_dict() { |
30 | return Dict<IValue, IValue>(c10::AnyType::get(), c10::AnyType::get()); |
31 | } |
32 | c10::List<IValue> new_list() { |
33 | return List<IValue>(c10::AnyType::get()); |
34 | } |
35 | } // namespace |
36 | void _record_memory_history(bool enabled, int64_t alloc_trace_max_entries) { |
37 | c10::cuda::CUDACachingAllocator::recordHistory( |
38 | enabled, nullptr, alloc_trace_max_entries, false); |
39 | } |
40 | |
41 | std::string _memory_snapshot_pickled() { |
42 | IValue device_s = "device" ; |
43 | IValue address_s = "address" ; |
44 | IValue total_size_s = "total_size" ; |
45 | IValue allocated_size_s = "allocated_size" ; |
46 | IValue active_size_s = "active_size" ; |
47 | IValue requested_size_s = "requested_size" ; |
48 | IValue stream_s = "stream" ; |
49 | IValue segment_type_s = "segment_type" ; |
50 | IValue large_s = "large" ; |
51 | IValue small_s = "small" ; |
52 | IValue size_s = "size" ; |
53 | IValue state_s = "state" ; |
54 | IValue active_allocated_s = "active_allocated" ; |
55 | IValue active_pending_free_s = "active_pending_free" ; |
56 | IValue inactive_s = "inactive" ; |
57 | IValue addr_s = "addr" ; |
58 | IValue real_size_s = "real_size" ; |
59 | IValue filename_s = "filename" ; |
60 | IValue name_s = "name" ; |
61 | IValue line_s = "line" ; |
62 | IValue frames_s = "frames" ; |
63 | IValue history_s = "history" ; |
64 | IValue blocks_s = "blocks" ; |
65 | |
66 | auto empty_frames = new_list(); |
67 | |
68 | const auto segmentInfoToDict = [&](const SegmentInfo& segmentInfo) { |
69 | auto segmentDict = new_dict(); |
70 | segmentDict.insert(device_s, segmentInfo.device); |
71 | segmentDict.insert(address_s, segmentInfo.address); |
72 | segmentDict.insert(total_size_s, segmentInfo.total_size); |
73 | segmentDict.insert(allocated_size_s, segmentInfo.allocated_size); |
74 | segmentDict.insert(active_size_s, segmentInfo.active_size); |
75 | segmentDict.insert(requested_size_s, segmentInfo.requested_size); |
76 | segmentDict.insert(stream_s, int64_t(segmentInfo.stream)); |
77 | segmentDict.insert( |
78 | segment_type_s, (segmentInfo.is_large ? large_s : small_s)); |
79 | |
80 | auto blocks = new_list(); |
81 | for (const auto& blockInfo : segmentInfo.blocks) { |
82 | auto blockDict = new_dict(); |
83 | blockDict.insert(size_s, blockInfo.size); |
84 | blockDict.insert(requested_size_s, blockInfo.requested_size); |
85 | blockDict.insert( |
86 | state_s, |
87 | (blockInfo.allocated |
88 | ? active_allocated_s |
89 | : (blockInfo.active ? active_pending_free_s : inactive_s))); |
90 | if (blockInfo.history.size()) { |
91 | auto history = new_list(); |
92 | for (const History& h : blockInfo.history) { |
93 | auto history_entry = new_dict(); |
94 | history_entry.insert(addr_s, (int64_t)h.addr); |
95 | history_entry.insert(real_size_s, (int64_t)h.real_size); |
96 | if (h.context) { |
97 | history_entry.insert(frames_s, empty_frames); |
98 | } |
99 | history.push_back(std::move(history_entry)); |
100 | } |
101 | blockDict.insert(history_s, std::move(history)); |
102 | } |
103 | blocks.push_back(blockDict); |
104 | } |
105 | segmentDict.insert(blocks_s, blocks); |
106 | |
107 | return segmentDict; |
108 | }; |
109 | |
110 | auto snapshot = c10::cuda::CUDACachingAllocator::snapshot(); |
111 | |
112 | auto segments = new_list(); |
113 | for (const auto& segmentInfo : snapshot.segments) { |
114 | segments.push_back(segmentInfoToDict(segmentInfo)); |
115 | } |
116 | |
117 | auto traces = new_list(); |
118 | IValue action_s = "action" ; |
119 | IValue alloc_s = "alloc" ; |
120 | IValue free_requested_s = "free_requested" ; |
121 | IValue free_completed_s = "free_completed" ; |
122 | IValue segment_alloc_s = "segment_alloc" ; |
123 | IValue segment_free_s = "segment_free" ; |
124 | IValue snapshot_s = "snapshot" ; |
125 | IValue oom_s = "oom" ; |
126 | IValue device_free_s = "device_free" ; |
127 | |
128 | using namespace c10::cuda::CUDACachingAllocator; |
129 | |
130 | auto action_to_str = [&](TraceEntry::Action action) { |
131 | switch (action) { |
132 | case TraceEntry::ALLOC: |
133 | return alloc_s; |
134 | case TraceEntry::FREE_REQUESTED: |
135 | return free_requested_s; |
136 | case TraceEntry::FREE_COMPLETED: |
137 | return free_completed_s; |
138 | case TraceEntry::SEGMENT_ALLOC: |
139 | return segment_alloc_s; |
140 | case TraceEntry::SEGMENT_FREE: |
141 | return segment_free_s; |
142 | case TraceEntry::OOM: |
143 | return oom_s; |
144 | case TraceEntry::SNAPSHOT: |
145 | return snapshot_s; |
146 | } |
147 | throw std::runtime_error("unreachable" ); |
148 | }; |
149 | |
150 | for (const auto& traceInfo : snapshot.device_traces) { |
151 | auto trace = new_list(); |
152 | for (const auto& te : traceInfo) { |
153 | auto trace_entry = new_dict(); |
154 | trace_entry.insert(action_s, action_to_str(te.action_)); |
155 | trace_entry.insert( |
156 | TraceEntry::OOM == te.action_ ? device_free_s : addr_s, te.addr_); |
157 | trace_entry.insert(size_s, (int64_t)te.size_); |
158 | trace_entry.insert(stream_s, int64_t(te.stream_)); |
159 | trace.push_back(trace_entry); |
160 | } |
161 | traces.push_back(trace); |
162 | } |
163 | |
164 | auto result = new_dict(); |
165 | result.insert("segments" , segments); |
166 | result.insert("device_traces" , traces); |
167 | return write_pickle(result); |
168 | } |
169 | } // namespace cuda |
170 | } // namespace torch |
171 | |