memory_snapshot.cpp source code [pytorch/torch/csrc/cuda/memory_snapshot.cpp]

1	#include <c10/cuda/CUDACachingAllocator.h>
2	#include <torch/csrc/cuda/memory_snapshot.h>
3	#include <torch/csrc/jit/serialization/pickler.h>
4	namespace torch {
5	namespace cuda {
6
7	using c10::Dict;
8	using c10::IValue;
9	using torch::jit::Pickler;
10
11	using c10::cuda::CUDACachingAllocator::BlockInfo;
12	using c10::cuda::CUDACachingAllocator::History;
13	using c10::cuda::CUDACachingAllocator::SegmentInfo;
14
15	namespace {
16	std::string write_pickle(const IValue& v) {
17	std::vector<char> result;
18	{
19	auto writer = [&](const char* data, size_t size) {
20	result.insert(result.end(), data, data + size);
21	};
22	Pickler pickler(writer, nullptr, nullptr, nullptr, nullptr, false);
23	pickler.protocol();
24	pickler.pushIValue(v);
25	pickler.stop();
26	}
27	return std::string (result.begin(), result.end());
28	}
29	Dict<IValue, IValue> new_dict() {
30	return Dict<IValue, IValue>(c10::AnyType::get(), c10::AnyType::get());
31	}
32	c10::List<IValue> new_list() {
33	return List<IValue>(c10::AnyType::get());
34	}
35	} // namespace
36	void _record_memory_history(bool enabled, int64_t alloc_trace_max_entries) {
37	c10::cuda::CUDACachingAllocator::recordHistory(
38	enabled, nullptr, alloc_trace_max_entries, false);
39	}
40
41	std::string _memory_snapshot_pickled() {
42	IValue device_s = "device";
43	IValue address_s = "address";
44	IValue total_size_s = "total_size";
45	IValue allocated_size_s = "allocated_size";
46	IValue active_size_s = "active_size";
47	IValue requested_size_s = "requested_size";
48	IValue stream_s = "stream";
49	IValue segment_type_s = "segment_type";
50	IValue large_s = "large";
51	IValue small_s = "small";
52	IValue size_s = "size";
53	IValue state_s = "state";
54	IValue active_allocated_s = "active_allocated";
55	IValue active_pending_free_s = "active_pending_free";
56	IValue inactive_s = "inactive";
57	IValue addr_s = "addr";
58	IValue real_size_s = "real_size";
59	IValue filename_s = "filename";
60	IValue name_s = "name";
61	IValue line_s = "line";
62	IValue frames_s = "frames";
63	IValue history_s = "history";
64	IValue blocks_s = "blocks";
65
66	auto empty_frames = new_list();
67
68	const auto segmentInfoToDict = [&](const SegmentInfo& segmentInfo) {
69	auto segmentDict = new_dict();
70	segmentDict.insert(device_s, segmentInfo.device);
71	segmentDict.insert(address_s, segmentInfo.address);
72	segmentDict.insert(total_size_s, segmentInfo.total_size);
73	segmentDict.insert(allocated_size_s, segmentInfo.allocated_size);
74	segmentDict.insert(active_size_s, segmentInfo.active_size);
75	segmentDict.insert(requested_size_s, segmentInfo.requested_size);
76	segmentDict.insert(stream_s, int64_t(segmentInfo.stream));
77	segmentDict.insert(
78	segment_type_s, (segmentInfo.is_large ? large_s : small_s));
79
80	auto blocks = new_list();
81	for (const auto& blockInfo : segmentInfo.blocks) {
82	auto blockDict = new_dict();
83	blockDict.insert(size_s, blockInfo.size);
84	blockDict.insert(requested_size_s, blockInfo.requested_size);
85	blockDict.insert(
86	state_s,
87	(blockInfo.allocated
88	? active_allocated_s
89	: (blockInfo.active ? active_pending_free_s : inactive_s)));
90	if (blockInfo.history.size()) {
91	auto history = new_list();
92	for (const History& h : blockInfo.history) {
93	auto history_entry = new_dict();
94	history_entry.insert(addr_s, (int64_t)h.addr);
95	history_entry.insert(real_size_s, (int64_t)h.real_size);
96	if (h.context) {
97	history_entry.insert(frames_s, empty_frames);
98	}
99	history.push_back(std::move(history_entry));
100	}
101	blockDict.insert(history_s, std::move(history));
102	}
103	blocks.push_back(blockDict);
104	}
105	segmentDict.insert(blocks_s, blocks);
106
107	return segmentDict;
108	};
109
110	auto snapshot = c10::cuda::CUDACachingAllocator::snapshot();
111
112	auto segments = new_list();
113	for (const auto& segmentInfo : snapshot.segments) {
114	segments.push_back(segmentInfoToDict (segmentInfo));
115	}
116
117	auto traces = new_list();
118	IValue action_s = "action";
119	IValue alloc_s = "alloc";
120	IValue free_requested_s = "free_requested";
121	IValue free_completed_s = "free_completed";
122	IValue segment_alloc_s = "segment_alloc";
123	IValue segment_free_s = "segment_free";
124	IValue snapshot_s = "snapshot";
125	IValue oom_s = "oom";
126	IValue device_free_s = "device_free";
127
128	using namespace c10::cuda::CUDACachingAllocator;
129
130	auto action_to_str = [&](TraceEntry::Action action) {
131	switch (action) {
132	case TraceEntry::ALLOC:
133	return alloc_s;
134	case TraceEntry::FREE_REQUESTED:
135	return free_requested_s;
136	case TraceEntry::FREE_COMPLETED:
137	return free_completed_s;
138	case TraceEntry::SEGMENT_ALLOC:
139	return segment_alloc_s;
140	case TraceEntry::SEGMENT_FREE:
141	return segment_free_s;
142	case TraceEntry::OOM:
143	return oom_s;
144	case TraceEntry::SNAPSHOT:
145	return snapshot_s;
146	}
147	throw std::runtime_error ("unreachable");
148	};
149
150	for (const auto& traceInfo : snapshot.device_traces) {
151	auto trace = new_list();
152	for (const auto& te : traceInfo) {
153	auto trace_entry = new_dict();
154	trace_entry.insert(action_s, action_to_str (te.action_));
155	trace_entry.insert(
156	TraceEntry::OOM == te.action_ ? device_free_s : addr_s, te.addr_);
157	trace_entry.insert(size_s, (int64_t)te.size_);
158	trace_entry.insert(stream_s, int64_t(te.stream_));
159	trace.push_back(trace_entry);
160	}
161	traces.push_back(trace);
162	}
163
164	auto result = new_dict();
165	result.insert("segments", segments);
166	result.insert("device_traces", traces);
167	return write_pickle(result);
168	}
169	} // namespace cuda
170	} // namespace torch
171

Browse the source code of pytorch/torch/csrc/cuda/memory_snapshot.cpp