ChildMemSizeBasedScheduler.cpp source code [glow/lib/IR/ChildMemSizeBasedScheduler.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "GraphScheduler.h"
18
19	#include "glow/Graph/Utils.h"
20	#include "glow/Support/Debug.h"
21
22	#include "llvm/Support/Casting.h"
23	#include "llvm/Support/Debug.h"
24	#include "llvm/Support/raw_ostream.h"
25
26	#define DEBUG_TYPE "graph-scheduler"
27
28	using llvm::cast;
29	using llvm::dyn_cast;
30	using llvm::isa;
31
32	namespace glow {
33	/// \returns true if a node \p N is scheduled already.
34	bool ChildMemSizeBasedScheduler::isScheduled(const Node N) const* {
35	return std::find(scheduled_.begin(), scheduled_.end(), N) != scheduled_.end();
36	}
37
38	/// Computes the amount of memory required to keep the result
39	/// of each node.
40	void ChildMemSizeBasedScheduler::computeNodeResultsMemorySize() {
41	for (auto &N : G_.getNodes()) {
42	int64_t resultSize = `0`;
43	for (size_t idx = `0`, e = N.getNumResults(); idx < e; ++idx) {
44	resultSize += N.getType(idx)->getSizeInBytes();
45	}
46	resultMemSize_[&N] = resultSize;
47	DEBUG_GLOW(llvm::dbgs()
48	<< "ResultSize of " << N.getName() << ":" << resultSize << "\n");
49	}
50	}
51
52	/// Computes the max amount of memory required during the computation
53	/// of children for each node.
54	void ChildMemSizeBasedScheduler::computeNodeComputationMaxMemorySize() {
55	// Traverse nodes in such a way, that dependnecies are processed
56	// before the node using them.
57	GraphPostOrderVisitor visitor(G_);
58	for (auto *N : visitor.getPostOrder()) {
59	int64_t maxSize = (N->getNumInputs() > `0`)
60	? std::max(resultMemSize_[N->getNthInput(`0`)],
61	maxMemSize_[N->getNthInput(`0`)])
62	: `0`;
63	for (size_t idx = `1`, e = N->getNumInputs(); idx < e; ++idx) {
64	const auto &input = N->getNthInput(idx);
65	// Skip operands that do not require memory allocations for storing
66	// their results.
67	if (isa<Storage>(input))
68	continue;
69	assert(resultMemSize_.count(input) > `0`);
70	assert(maxMemSize_.count(input) > `0`);
71	maxSize += resultMemSize_[input];
72	if (maxSize < maxMemSize_[input])
73	maxSize = maxMemSize_[input];
74	}
75	maxMemSize_[N] = maxSize;
76	DEBUG_GLOW(llvm::dbgs()
77	<< "MaxSize of " << N->getName() << ":" << maxSize << "\n");
78	}
79	}
80
81	/// Order children by (maxSize - resultSize). It gives more
82	/// priority to the nodes that free more memory after
83	/// their computation.
84	void ChildMemSizeBasedScheduler::orderChildNodesAndSchedule(Node *node) {
85	// Use `worklist` as a stack-like container to hold nodes for processing. Each
86	// node is going to appear in the container at least twice:
87	// 1) To put its children (and sometimes parents) in the worklist
88	// 2) To schedule the node itself (i.e. append it to `scheduled_` list)
89	// At the end of stage #1, the node is added in `readyNodes` set. By checking
90	// this set the algorithm figures out if it should perform stage #1 or #2 with
91	// the node it pulls out of the worklist.
92	// Since the algorithm does some special handling of node's parents, a node
93	// can get into the worklist more than twice. To ensure that stage #2 happens
94	// only after all children of the node are scheduled, `readyNodes` keeps
95	// node's worklist index instead of just node's pointer.
96	std::vector<Node *> worklist;
97	std::unordered_set<size_t> readyNodes;
98	worklist.push_back(node);
99
100	while (worklist.size()) {
101	auto *N = worklist.back();
102	worklist.pop_back();
103	size_t idx = worklist.size();
104
105	// Do not explicitly schedule storage nodes.
106	if (isa<Storage>(N)) {
107	continue;
108	}
109	// Each child should be scheduled just once.
110	if (isScheduled(N)) {
111	readyNodes.erase(idx);
112	continue;
113	}
114
115	// If node is marked as ready, all its children have been scheduled, so it
116	// can be scheduled now too.
117	if (readyNodes.count(idx)) {
118	readyNodes.erase(idx);
119	scheduled_.push_back(N);
120	continue;
121	}
122
123	// If this node has a user which does not have any users and which does not
124	// require any additional memory, schedule it here, because we don't want to
125	// extend the lifetime of this value for no reason. We want to execute and
126	// get rid of this node as soon as possible to reduce the memory pressure.
127	for (NodeUse &use : N->getUsers()) {
128	Node *user = use.getUser();
129	// Users may be scattered across different functions.
130	// Only accounts for the ones in that function.
131	if (&G_ != user->getParent()) {
132	continue;
133	}
134	// Bail if a nodes has users, because nodes that have users can't be
135	// scheduled safely without violating dependencies.
136	if (user->getNumUsers()) {
137	continue;
138	}
139	// Schedule a node if it does not require any additional memory.
140	if (resultMemSize_[user] == `0`) {
141	worklist.push_back(user);
142	}
143	}
144
145	// Push the node again in the stack. By the time it's going to pop up again,
146	// all children are going to be scheduled, so mark it as ready.
147	readyNodes.insert(worklist.size());
148	worklist.push_back(N);
149
150	// Take care about node's children.
151	size_t childrenStartIdx = worklist.size();
152	for (int i = `0`, e = N->getNumInputs(); i < e; ++i) {
153	worklist.push_back(N->getNthInput(i));
154	}
155
156	if (N->hasPredicate()) {
157	worklist.push_back(N->getPredicate());
158	}
159
160	// We don't model memory dependencies, but we still need to honor them.
161	// Make sure the a node mutating any of its inputs happens after the last
162	// non-mutating use of the operand being mutated. Some examples of such
163	// nodes would be SaveNode and QuantizationProfileNode.
164	for (unsigned i = `0`, e = N->getNumInputs(); i < e; ++i) {
165	// We don't care about inputs that are not mutated by the node.
166	if (!N->isOverwrittenNthInput(i)) {
167	continue;
168	}
169	auto mutatedInput = N->getNthInput(i);
170	auto *destination = mutatedInput.getNode();
171	for (NodeUse &use : destination->getUsers()) {
172	Node *user = use.getUser();
173	if (user == N) {
174	continue;
175	}
176	// Nodes may have users scattered across different functions.
177	// Only accounts for the ones in that function.
178	if (&G_ != user->getParent()) {
179	continue;
180	}
181	worklist.push_back(user);
182	}
183	}
184
185	// Order children by (maxSize - resultSize). It gives more
186	// priority to the nodes that free more memory after
187	// their computation.
188	for (size_t j = childrenStartIdx, e = worklist.size(); j < e; ++j) {
189	for (size_t i = j; i > childrenStartIdx; --i) {
190	auto &currentChild = worklist [i];
191	auto &prevChild = worklist [i - `1`];
192	if (maxMemSize_[currentChild] - resultMemSize_[currentChild] <=
193	maxMemSize_[prevChild] - resultMemSize_[prevChild]) {
194	std::swap(currentChild, prevChild);
195	}
196	}
197	}
198
199	DEBUG_GLOW(llvm::dbgs() << "\nAbout to schedule children of "
200	<< N->getName() << "\n";
201	llvm::dbgs() << "Children are:\n");
202	DEBUG_GLOW(
203	for (size_t i = childrenStartIdx, e = worklist.size(); i < e; ++i) {
204	auto &child = worklist[i];
205	llvm::dbgs() << "Child " << child->getName() << ": "
206	<< maxMemSize_[child] - resultMemSize_[child] << "\n";
207	});
208	}
209	}
210
211	void ChildMemSizeBasedScheduler::scheduleNodes() {
212	/// Try to schedule all root nodes.
213	for (auto &N : G_.getNodes()) {
214	if (N.getNumUsers() == `0`)
215	orderChildNodesAndSchedule(&N);
216	}
217	}
218
219	void ChildMemSizeBasedScheduler::schedule() {
220	computeNodeResultsMemorySize();
221	computeNodeComputationMaxMemorySize();
222	scheduleNodes();
223	}
224	} // namespace glow
225

Browse the source code of glow/lib/IR/ChildMemSizeBasedScheduler.cpp