1#include <c10/cuda/CUDACachingAllocator.h>
2#include <torch/csrc/cuda/memory_snapshot.h>
3#include <torch/csrc/jit/serialization/pickler.h>
4namespace torch {
5namespace cuda {
6
7using c10::Dict;
8using c10::IValue;
9using torch::jit::Pickler;
10
11using c10::cuda::CUDACachingAllocator::BlockInfo;
12using c10::cuda::CUDACachingAllocator::History;
13using c10::cuda::CUDACachingAllocator::SegmentInfo;
14
15namespace {
16std::string write_pickle(const IValue& v) {
17 std::vector<char> result;
18 {
19 auto writer = [&](const char* data, size_t size) {
20 result.insert(result.end(), data, data + size);
21 };
22 Pickler pickler(writer, nullptr, nullptr, nullptr, nullptr, false);
23 pickler.protocol();
24 pickler.pushIValue(v);
25 pickler.stop();
26 }
27 return std::string(result.begin(), result.end());
28}
29Dict<IValue, IValue> new_dict() {
30 return Dict<IValue, IValue>(c10::AnyType::get(), c10::AnyType::get());
31}
32c10::List<IValue> new_list() {
33 return List<IValue>(c10::AnyType::get());
34}
35} // namespace
36void _record_memory_history(bool enabled, int64_t alloc_trace_max_entries) {
37 c10::cuda::CUDACachingAllocator::recordHistory(
38 enabled, nullptr, alloc_trace_max_entries, false);
39}
40
41std::string _memory_snapshot_pickled() {
42 IValue device_s = "device";
43 IValue address_s = "address";
44 IValue total_size_s = "total_size";
45 IValue allocated_size_s = "allocated_size";
46 IValue active_size_s = "active_size";
47 IValue requested_size_s = "requested_size";
48 IValue stream_s = "stream";
49 IValue segment_type_s = "segment_type";
50 IValue large_s = "large";
51 IValue small_s = "small";
52 IValue size_s = "size";
53 IValue state_s = "state";
54 IValue active_allocated_s = "active_allocated";
55 IValue active_pending_free_s = "active_pending_free";
56 IValue inactive_s = "inactive";
57 IValue addr_s = "addr";
58 IValue real_size_s = "real_size";
59 IValue filename_s = "filename";
60 IValue name_s = "name";
61 IValue line_s = "line";
62 IValue frames_s = "frames";
63 IValue history_s = "history";
64 IValue blocks_s = "blocks";
65
66 auto empty_frames = new_list();
67
68 const auto segmentInfoToDict = [&](const SegmentInfo& segmentInfo) {
69 auto segmentDict = new_dict();
70 segmentDict.insert(device_s, segmentInfo.device);
71 segmentDict.insert(address_s, segmentInfo.address);
72 segmentDict.insert(total_size_s, segmentInfo.total_size);
73 segmentDict.insert(allocated_size_s, segmentInfo.allocated_size);
74 segmentDict.insert(active_size_s, segmentInfo.active_size);
75 segmentDict.insert(requested_size_s, segmentInfo.requested_size);
76 segmentDict.insert(stream_s, int64_t(segmentInfo.stream));
77 segmentDict.insert(
78 segment_type_s, (segmentInfo.is_large ? large_s : small_s));
79
80 auto blocks = new_list();
81 for (const auto& blockInfo : segmentInfo.blocks) {
82 auto blockDict = new_dict();
83 blockDict.insert(size_s, blockInfo.size);
84 blockDict.insert(requested_size_s, blockInfo.requested_size);
85 blockDict.insert(
86 state_s,
87 (blockInfo.allocated
88 ? active_allocated_s
89 : (blockInfo.active ? active_pending_free_s : inactive_s)));
90 if (blockInfo.history.size()) {
91 auto history = new_list();
92 for (const History& h : blockInfo.history) {
93 auto history_entry = new_dict();
94 history_entry.insert(addr_s, (int64_t)h.addr);
95 history_entry.insert(real_size_s, (int64_t)h.real_size);
96 if (h.context) {
97 history_entry.insert(frames_s, empty_frames);
98 }
99 history.push_back(std::move(history_entry));
100 }
101 blockDict.insert(history_s, std::move(history));
102 }
103 blocks.push_back(blockDict);
104 }
105 segmentDict.insert(blocks_s, blocks);
106
107 return segmentDict;
108 };
109
110 auto snapshot = c10::cuda::CUDACachingAllocator::snapshot();
111
112 auto segments = new_list();
113 for (const auto& segmentInfo : snapshot.segments) {
114 segments.push_back(segmentInfoToDict(segmentInfo));
115 }
116
117 auto traces = new_list();
118 IValue action_s = "action";
119 IValue alloc_s = "alloc";
120 IValue free_requested_s = "free_requested";
121 IValue free_completed_s = "free_completed";
122 IValue segment_alloc_s = "segment_alloc";
123 IValue segment_free_s = "segment_free";
124 IValue snapshot_s = "snapshot";
125 IValue oom_s = "oom";
126 IValue device_free_s = "device_free";
127
128 using namespace c10::cuda::CUDACachingAllocator;
129
130 auto action_to_str = [&](TraceEntry::Action action) {
131 switch (action) {
132 case TraceEntry::ALLOC:
133 return alloc_s;
134 case TraceEntry::FREE_REQUESTED:
135 return free_requested_s;
136 case TraceEntry::FREE_COMPLETED:
137 return free_completed_s;
138 case TraceEntry::SEGMENT_ALLOC:
139 return segment_alloc_s;
140 case TraceEntry::SEGMENT_FREE:
141 return segment_free_s;
142 case TraceEntry::OOM:
143 return oom_s;
144 case TraceEntry::SNAPSHOT:
145 return snapshot_s;
146 }
147 throw std::runtime_error("unreachable");
148 };
149
150 for (const auto& traceInfo : snapshot.device_traces) {
151 auto trace = new_list();
152 for (const auto& te : traceInfo) {
153 auto trace_entry = new_dict();
154 trace_entry.insert(action_s, action_to_str(te.action_));
155 trace_entry.insert(
156 TraceEntry::OOM == te.action_ ? device_free_s : addr_s, te.addr_);
157 trace_entry.insert(size_s, (int64_t)te.size_);
158 trace_entry.insert(stream_s, int64_t(te.stream_));
159 trace.push_back(trace_entry);
160 }
161 traces.push_back(trace);
162 }
163
164 auto result = new_dict();
165 result.insert("segments", segments);
166 result.insert("device_traces", traces);
167 return write_pickle(result);
168}
169} // namespace cuda
170} // namespace torch
171