HostManagerTest.cpp source code [glow/tests/unittests/HostManagerTest.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include "BackendTestUtils.h"
17
18	#include "glow/ExecutionContext/ExecutionContext.h"
19	#include "glow/Flags/Flags.h"
20	#include "glow/Runtime/HostManager/HostManager.h"
21
22	#include "gtest/gtest.h"
23
24	#include <future>
25	#include <thread>
26
27	using namespace glow;
28	using namespace glow::runtime;
29	using DAGNodePairTy = std::pair<std::vector<std::unique_ptr<DAGNode>>,
30	std::vector<std::unique_ptr<DAGNode>>>;
31
32	class HostManagerTest : public ::testing::TestWithParam<std::string> {
33	public:
34	void SetUp() override { backendName_ = GetParam(); }
35	std::string backendName_;
36	};
37
38	std::vector<std::unique_ptr<DeviceConfig>>
39	generateConfigs(std::string backendName, unsigned numConfigs = `1`) {
40	std::vector<std::unique_ptr<DeviceConfig>> configs;
41	for (unsigned i = `0`; i < numConfigs; i++) {
42	auto deviceConfig = glow::make_unique<DeviceConfig>(backendName);
43	deviceConfig ->deviceID = i;
44	configs.push_back(std::move(deviceConfig));
45	}
46	return configs;
47	}
48
49	std::unique_ptr<Module> setupModule(unsigned functionCount) {
50	std::unique_ptr<Module> module = glow::make_unique<Module>();
51	for (unsigned int i = `0`; i < functionCount; i++) {
52	Function *F = module ->createFunction("function" + std::to_string(i));
53	auto *X = module ->createPlaceholder(ElemKind::FloatTy, {`3`},
54	"X" + std::to_string(i), false);
55	auto *pow = F->createPow("Pow" + std::to_string(i), X, `2.0`);
56	F->createSave("save" + std::to_string(i), pow);
57	}
58	return module;
59	}
60
61	std::unique_ptr<HostManager>
62	createHostManager(llvm::StringRef backendName,
63	HostConfig hostConfig = HostConfig ()) {
64	std::vector<std::unique_ptr<DeviceConfig>> configs =
65	generateConfigs(std::string (backendName), `1`);
66	std::unique_ptr<HostManager> hostManager =
67	glow::make_unique<HostManager>(std::move(configs), hostConfig);
68	return hostManager;
69	}
70
71	Error addNetwork(HostManager *manager, std::string name) {
72	std::unique_ptr<Module> module = glow::make_unique<Module>();
73	Function *F = module ->createFunction(name);
74	auto *X =
75	module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X_" + name, false);
76	auto *pow = F->createPow("Pow_" + name, X, `2.0`);
77	F->createSave("save" + name, pow);
78
79	// Expect this to be an Error because multiple networks with the same name
80	// have been added to HostManager
81	CompilationContext cctx;
82	return manager->addNetwork(std::move(module), cctx);
83	}
84
85	void addAndRemoveNetwork(HostManager manager, unsigned* int functionNumber) {
86	std::string name = "function" + std::to_string(functionNumber);
87	ERR_TO_BOOL(addNetwork(manager, name));
88	// Removal can return an error if the network is in the process of being
89	// added. That is fine we expect it in this test.
90	ERR_TO_BOOL(manager->removeNetwork(name));
91	}
92
93	TEST_P(HostManagerTest, newHostManager) {
94	CHECK_IF_ENABLED();
95	createHostManager(backendName_);
96	}
97
98	TEST_P(HostManagerTest, addNetwork) {
99	CHECK_IF_ENABLED();
100	auto module = setupModule(`6`);
101	auto hostManager = createHostManager(backendName_);
102	CompilationContext cctx;
103	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
104	}
105
106	TEST_P(HostManagerTest, queueOverflow) {
107	CHECK_IF_ENABLED();
108	std::unique_ptr<Module> module = glow::make_unique<Module>();
109
110	Function *F = module ->createFunction("main");
111	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`10`}, "X", false*);
112	auto *pow = F->createPow("Pow1", X, `2.0`);
113	pow = F->createPow("Pow1", pow, `2.0`);
114	auto *save = F->createSave("save", pow);
115	std::vector<std::unique_ptr<ExecutionContext>> contexts;
116	for (int i = `0`; i < `100`; ++i) {
117	std::unique_ptr<ExecutionContext> context =
118	glow::make_unique<ExecutionContext>();
119	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
120	XTensor->getHandle() = {`1.`, `2.`, `3.`, `1.`, `2.`, `3.`, `1.`, `2.`, `3.`, `1.`};
121	context ->getPlaceholderBindings()->allocate(save->getPlaceholder());
122	contexts.emplace_back(std::move(context));
123	}
124
125	HostConfig hostConfig;
126	hostConfig.maxQueueSize = `1`;
127	hostConfig.maxActiveRequests = `1`;
128	auto hostManager = createHostManager(backendName_, hostConfig);
129	CompilationContext cctx;
130	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
131
132	std::vector<std::promise<void>> requests(`100`);
133	std::list<std::future<void>> futures;
134	for (auto &r : requests) {
135	futures.emplace_back(r.get_future());
136	}
137
138	for (int i = `0`; i < `100`; ++i) {
139	auto &context = contexts [i];
140	auto &request = requests [i];
141	hostManager ->runNetwork(
142	"main", std::move(context),
143	[&request](RunIdentifierTy runID, Error err,
144	std::unique_ptr<ExecutionContext> context_) {
145	TRACE_EVENT_SCOPE(context_->getTraceContext(), TraceLevel::RUNTIME,
146	"HostManager::runNetwork");
147	ERR_TO_BOOL(std::move(err));
148	request.set_value();
149	});
150	}
151
152	for (auto &f : futures) {
153	f.wait();
154	}
155	}
156
157	TEST_P(HostManagerTest, runNetwork) {
158	CHECK_IF_ENABLED();
159	std::unique_ptr<Module> module = glow::make_unique<Module>();
160	std::unique_ptr<ExecutionContext> context =
161	glow::make_unique<ExecutionContext>();
162
163	Function *F = module ->createFunction("main");
164	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
165	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
166	XTensor->getHandle() = {`1.`, `2.`, `3.`};
167	auto *pow = F->createPow("Pow1", X, `2.0`);
168	auto *save = F->createSave("save", pow);
169	auto *saveTensor =
170	context ->getPlaceholderBindings()->allocate(save->getPlaceholder());
171
172	auto hostManager = createHostManager(backendName_);
173	CompilationContext cctx;
174	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
175
176	std::promise<void> runNetwork;
177	auto ready = runNetwork.get_future();
178
179	std::unique_ptr<Error> runErr;
180	hostManager ->runNetwork("main", std::move(context),
181	[&runNetwork, &saveTensor, &context, &runErr](
182	RunIdentifierTy runID, Error err,
183	std::unique_ptr<ExecutionContext> context_) {
184	auto HX = saveTensor->getHandle();
185	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
186	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
187	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
188	context = std::move(context_);
189	runErr = glow::make_unique<Error>(std::move(err));
190	runNetwork.set_value();
191	});
192
193	ready.wait();
194	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
195
196	// reset runErr
197	runErr = nullptr;
198
199	std::promise<void> newRun;
200	ready = newRun.get_future();
201	hostManager ->runNetwork("main", std::move(context),
202	[&newRun, &saveTensor, &runErr](
203	RunIdentifierTy runID, Error err,
204	std::unique_ptr<ExecutionContext> context_) {
205	auto HX = saveTensor->getHandle();
206	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
207	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
208	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
209	runErr = glow::make_unique<Error>(std::move(err));
210	newRun.set_value();
211	});
212
213	ready.wait();
214	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
215	}
216
217	/// Test that HostManager properly handles concurrent add/remove requests with
218	/// unique network names.
219	TEST_P(HostManagerTest, ConcurrentAddRemoveUnique) {
220	CHECK_IF_ENABLED();
221	constexpr auto numThreads = `6`;
222	constexpr auto numItersPerThread = `20`;
223	auto hostManager = createHostManager(backendName_);
224	std::atomic<unsigned> counter{`0`};
225	std::vector<std::thread> threads;
226	for (auto i = `0`; i < numThreads; ++i) {
227	threads.emplace_back([&]() {
228	for (auto j = `0`; j < numItersPerThread; ++j) {
229	addAndRemoveNetwork(hostManager.get(), ++counter);
230	}
231	});
232	}
233
234	for (auto &t : threads) {
235	t.join();
236	}
237	}
238
239	/// Test that HostManager properly handles concurrent add/remove requests with a
240	/// duplicate network name.
241	TEST_P(HostManagerTest, ConcurrentAddRemoveDuplicate) {
242	CHECK_IF_ENABLED();
243	constexpr auto numThreads = `6`;
244	constexpr auto numItersPerThread = `20`;
245	auto hostManager = createHostManager(backendName_);
246	std::vector<std::thread> threads;
247	for (auto i = `0`; i < numThreads; ++i) {
248	threads.emplace_back([&]() {
249	for (auto j = `0`; j < numItersPerThread; ++j) {
250	addAndRemoveNetwork(hostManager.get(), `0`);
251	}
252	});
253	}
254
255	for (auto &t : threads) {
256	t.join();
257	}
258	}
259
260	/// Run several requests concurrently.
261	TEST_P(HostManagerTest, runNetworkConcurrent) {
262	CHECK_IF_ENABLED();
263	std::unique_ptr<Module> module = glow::make_unique<Module>();
264
265	Function *F = module ->createFunction("main");
266	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
267	auto *pow = F->createPow("Pow1", X, `2.0`);
268	F->createSave("save", pow);
269	auto *savePH = module ->getPlaceholderByNameSlow("save");
270
271	auto hostManager = createHostManager(backendName_);
272	CompilationContext cctx;
273
274	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
275
276	std::vector<std::future<void>> ready;
277	for (int i = `0`; i < `50`; i++) {
278	auto runNetwork = std::make_shared<std::promise<void>>();
279	ready.push_back(runNetwork ->get_future());
280	std::unique_ptr<ExecutionContext> context =
281	glow::make_unique<ExecutionContext>();
282	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
283	XTensor->getHandle() = {`1.`, `2.`, `3.`};
284	auto *saveTensor = context ->getPlaceholderBindings()->allocate(savePH);
285	hostManager ->runNetwork(
286	"main", std::move(context),
287	[runNetwork, saveTensor](RunIdentifierTy runID, Error err,
288	std::unique_ptr<ExecutionContext> context_) {
289	auto HX = saveTensor->getHandle();
290	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
291	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
292	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
293	EXPECT_FALSE(std::move(err));
294	runNetwork ->set_value();
295	});
296	}
297
298	for (auto &r : ready) {
299	r.wait();
300	}
301	}
302
303	TEST_P(HostManagerTest, testSaturateHost) {
304	CHECK_IF_ENABLED();
305	std::unique_ptr<Module> module = glow::make_unique<Module>();
306
307	Function *F = module ->createFunction("main");
308	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
309	auto *pow = F->createPow("Pow1", X, `2.0`);
310	F->createSave("save", pow);
311	auto *savePH = module ->getPlaceholderByNameSlow("save");
312
313	std::vector<std::unique_ptr<DeviceConfig>> configs =
314	generateConfigs(backendName_, `2`);
315	std::unique_ptr<HostManager> hostManager =
316	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
317
318	CompilationContext cctx;
319	cctx.saturateHost = true;
320	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
321
322	std::vector<std::future<void>> ready;
323	for (int i = `0`; i < `50`; i++) {
324	auto runNetwork = std::make_shared<std::promise<void>>();
325	ready.push_back(runNetwork ->get_future());
326	std::unique_ptr<ExecutionContext> context =
327	glow::make_unique<ExecutionContext>();
328	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
329	XTensor->getHandle() = {`1.`, `2.`, `3.`};
330	auto *saveTensor = context ->getPlaceholderBindings()->allocate(savePH);
331	hostManager ->runNetwork(
332	"main", std::move(context),
333	[runNetwork, saveTensor](RunIdentifierTy, Error err,
334	std::unique_ptr<ExecutionContext>) {
335	auto HX = saveTensor->getHandle();
336	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
337	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
338	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
339	EXPECT_FALSE(std::move(err));
340	runNetwork ->set_value();
341	});
342	}
343
344	for (auto &r : ready) {
345	r.wait();
346	}
347	}
348
349	/// Test that the HostManager respects it's configuration parameters.
350	TEST_P(HostManagerTest, ConfigureHostManager) {
351	CHECK_IF_ENABLED();
352	HostConfig config;
353	config.maxActiveRequests = `1`;
354	config.maxQueueSize = `0`;
355	auto hostManager = createHostManager("Interpreter", std::move(config));
356
357	EXPECT_FALSE(ERR_TO_BOOL(addNetwork(hostManager.get(), "main")));
358
359	auto context = glow::make_unique<ExecutionContext>();
360	auto context2 = glow::make_unique<ExecutionContext>();
361
362	std::unique_ptr<Error> runErr;
363
364	std::shared_ptr<std::mutex> lock = std::make_shared<std::mutex>();
365	std::unique_lock<std::mutex> guard(*lock);
366
367	/// Don't care a about the first one.
368	hostManager ->runNetwork("main", std::move(context),
369	[lock](RunIdentifierTy runID, Error err,
370	std::unique_ptr<ExecutionContext> context_) {
371	ERR_TO_BOOL(std::move(err));
372	});
373
374	hostManager ->runNetwork(
375	"main", std::move(context2),
376	[&runErr](RunIdentifierTy runID, Error err,
377	std::unique_ptr<ExecutionContext> context_) {
378	runErr = glow::make_unique<Error>(std::move(err));
379	});
380	guard.unlock();
381	// Don't need a future, error CB called inline.
382	EXPECT_TRUE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
383	}
384
385	/// Test that the HostManager properly enqueues requests.
386	TEST_P(HostManagerTest, QueueTest) {
387	CHECK_IF_ENABLED();
388	HostConfig config;
389	// Setup the hostmanager to allow 1 active and 2 queued requests for a total
390	// of 3 requests in the system.
391	config.maxActiveRequests = `1`;
392	auto hostManager = createHostManager("Interpreter", std::move(config));
393
394	EXPECT_FALSE(ERR_TO_BOOL(addNetwork(hostManager.get(), "main")));
395
396	auto context = glow::make_unique<ExecutionContext>();
397	auto context2 = glow::make_unique<ExecutionContext>();
398	auto context3 = glow::make_unique<ExecutionContext>();
399	auto context4 = glow::make_unique<ExecutionContext>();
400	std::promise<unsigned> run1p, run2p, run3p, dispatched;
401	auto dispatchDone = dispatched.get_future();
402	auto run1f = run1p.get_future();
403	auto run2f = run2p.get_future();
404	auto run3f = run3p.get_future();
405	std::atomic<unsigned> counter{`0`};
406
407	// The first will go right to dispatch since there will be no inflight
408	// requests.
409	hostManager ->runNetwork("main", std::move(context),
410	[&run1p, &counter, &dispatchDone](
411	RunIdentifierTy runID, Error err,
412	std::unique_ptr<ExecutionContext> context) {
413	EXIT_ON_ERR(std::move(err));
414	run1p.set_value(counter ++);
415	dispatchDone.wait();
416	});
417	// Set the priority of the second to 1.
418	hostManager ->runNetwork(
419	"main", std::move(context2),
420	[&run2p, &counter](RunIdentifierTy runID, Error err,
421	std::unique_ptr<ExecutionContext> context) {
422	EXIT_ON_ERR(std::move(err));
423	run2p.set_value(counter ++);
424	},
425	`1`);
426
427	// Set the priority of the run3 to 0 so it should be first in the queue
428	// after run1.
429	hostManager ->runNetwork(
430	"main", std::move(context3),
431	[&run3p, &counter](RunIdentifierTy runID, Error err,
432	std::unique_ptr<ExecutionContext> context) {
433	EXIT_ON_ERR(std::move(err));
434	run3p.set_value(counter ++);
435	},
436	`0`);
437	/// Wait for all three to finish.
438	dispatched.set_value(`0`);
439	auto res1 = run1f.get();
440	auto res2 = run2f.get();
441	auto res3 = run3f.get();
442	// Should expect them to finish in order: 1, 3, 2. Check atomic value
443	EXPECT_GT(res3, res1);
444	EXPECT_GT(res2, res3);
445	}
446
447	/// Test that the enabling partition replication through user defined
448	/// partitioning works.
449	TEST_P(HostManagerTest, testPartitionConfigReplication) {
450	CHECK_IF_ENABLED();
451	std::unique_ptr<Module> module = glow::make_unique<Module>();
452	std::unique_ptr<ExecutionContext> context =
453	glow::make_unique<ExecutionContext>();
454
455	Function *F = module ->createFunction("main");
456	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
457	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
458	XTensor->getHandle() = {`1.`, `2.`, `3.`};
459	auto *pow = F->createPow("Pow", X, `2.0`);
460	auto *save = F->createSave("save", pow);
461	auto savePH = save->getPlaceholder();
462
463	std::vector<std::unique_ptr<DeviceConfig>> configs =
464	generateConfigs(backendName_, `2`);
465	std::unique_ptr<HostManager> hostManager =
466	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
467	CompilationContext cctx;
468
469	// Setup forced partitioning.
470	PartitionConfig partitionConfig;
471	partitionConfig.funcName = "main";
472	partitionConfig.numOfPartitions = `2`;
473	partitionConfig.backendNames = {backendName_, backendName_};
474	partitionConfig.partitionNames = {"p0", "p1"};
475	partitionConfig.nodeToPartition = {{"Pow", `0`}, {"save", `3`}};
476	partitionConfig.logicalIDs = {{`0`}, {`1`}};
477	partitionConfig.replicationCount [`0`] = `2`;
478	cctx.partitionConfig = &partitionConfig;
479
480	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
481
482	std::vector<std::future<void>> ready;
483	for (int i = `0`; i < `50`; i++) {
484	auto runNetwork = std::make_shared<std::promise<void>>();
485	ready.push_back(runNetwork ->get_future());
486	std::unique_ptr<ExecutionContext> context =
487	glow::make_unique<ExecutionContext>();
488	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
489	XTensor->getHandle() = {`1.`, `2.`, `3.`};
490	auto *saveTensor = context ->getPlaceholderBindings()->allocate(savePH);
491	hostManager ->runNetwork(
492	"main", std::move(context),
493	[runNetwork, saveTensor](RunIdentifierTy runID, Error err,
494	std::unique_ptr<ExecutionContext> context_) {
495	auto HX = saveTensor->getHandle();
496	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
497	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
498	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
499	EXPECT_FALSE(std::move(err));
500	runNetwork ->set_value();
501	});
502	}
503
504	for (auto &r : ready) {
505	r.wait();
506	}
507	}
508
509	/// Test replication for a single partition network.
510	TEST_P(HostManagerTest, testSinglePartitionReplication) {
511	CHECK_IF_ENABLED();
512	std::unique_ptr<Module> module = glow::make_unique<Module>();
513	std::unique_ptr<ExecutionContext> context =
514	glow::make_unique<ExecutionContext>();
515
516	Function *F = module ->createFunction("main");
517	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
518	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
519	XTensor->getHandle() = {`1.`, `2.`, `3.`};
520	auto *pow = F->createPow("Pow1", X, `2.0`);
521	auto *save = F->createSave("save", pow);
522	auto *savePH = save->getPlaceholder();
523
524	auto hostManager = createHostManager(backendName_);
525	CompilationContext cctx;
526	cctx.replicationCount = `2`;
527	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
528
529	std::vector<std::future<void>> ready;
530	for (int i = `0`; i < `50`; i++) {
531	auto runNetwork = std::make_shared<std::promise<void>>();
532	ready.push_back(runNetwork ->get_future());
533	std::unique_ptr<ExecutionContext> context =
534	glow::make_unique<ExecutionContext>();
535	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
536	XTensor->getHandle() = {`1.`, `2.`, `3.`};
537	auto *saveTensor = context ->getPlaceholderBindings()->allocate(savePH);
538	hostManager ->runNetwork(
539	"main", std::move(context),
540	[runNetwork, saveTensor](RunIdentifierTy runID, Error err,
541	std::unique_ptr<ExecutionContext> context_) {
542	auto HX = saveTensor->getHandle();
543	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
544	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
545	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
546	EXPECT_FALSE(std::move(err));
547	runNetwork ->set_value();
548	});
549	}
550
551	for (auto &r : ready) {
552	r.wait();
553	}
554	}
555
556	// This test creates a network that is split into four partitions. P0,P1,P2,P3
557	// and three devices D0,D1,D2. P0 is loaded on D0, P1 and P2 are loaded on D2
558	// and P3 is loaded on D2. This test then enables both DRT and P2P
559	// optimizations. We then run the network twice to test the alternating static
560	// assignments.
561	TEST_P(HostManagerTest, testStaticAssignmentP2PandDRT) {
562	CHECK_IF_ENABLED();
563	std::unique_ptr<Module> module = glow::make_unique<Module>();
564	std::unique_ptr<ExecutionContext> context =
565	glow::make_unique<ExecutionContext>();
566
567	Function *F = module ->createFunction("main");
568	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
569	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
570	XTensor->getHandle() = {`1.`, `2.`, `3.`};
571	auto *pow = F->createPow("Pow1", X, `2.0`);
572	auto pow2 = F->createPow("Pow2", pow, `2.0`);
573	auto pow3 = F->createPow("Pow3", pow2, `1.0`);
574	auto *save = F->createSave("save", pow3);
575	auto *saveTensor =
576	context ->getPlaceholderBindings()->allocate(save->getPlaceholder());
577
578	std::vector<std::unique_ptr<DeviceConfig>> configs =
579	generateConfigs(backendName_, `3`);
580	std::unique_ptr<HostManager> hostManager =
581	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
582	CompilationContext cctx;
583	cctx.enableP2P = true;
584	cctx.enableDRT = true;
585
586	// Setup forced partitioning.
587	PartitionConfig partitionConfig;
588	partitionConfig.funcName = "main";
589	partitionConfig.numOfPartitions = `4`;
590	partitionConfig.backendNames = {backendName_, backendName_, backendName_,
591	backendName_};
592	partitionConfig.partitionNames = {"p0", "p1", "p2", "p3"};
593	partitionConfig.nodeToPartition = {
594	{"Pow1", `0`}, {"Pow2", `1`}, {"Pow3", `2`}, {"Pow1__1", `0`},
595	{"Pow2__1", `1`}, {"Pow3__1", `2`}, {"save", `3`}, {"save_save", `3`}};
596	partitionConfig.logicalIDs = {{`0`}, {`1`}, {`1`}, {`2`}};
597	cctx.partitionConfig = &partitionConfig;
598
599	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
600
601	std::promise<void> runNetwork;
602	auto ready = runNetwork.get_future();
603
604	std::unique_ptr<Error> runErr;
605	hostManager ->runNetwork("main", std::move(context),
606	[&runNetwork, &saveTensor, &context, &runErr](
607	RunIdentifierTy runID, Error err,
608	std::unique_ptr<ExecutionContext> context_) {
609	auto HX = saveTensor->getHandle();
610	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
611	EXPECT_NEAR(HX.at({`1`}), `16`, `1E-5`);
612	EXPECT_NEAR(HX.at({`2`}), `81`, `1E-5`);
613	context = std::move(context_);
614	runErr = glow::make_unique<Error>(std::move(err));
615	runNetwork.set_value();
616	});
617
618	ready.wait();
619	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
620
621	// reset runErr
622	runErr = nullptr;
623
624	std::promise<void> newRun;
625	ready = newRun.get_future();
626	hostManager ->runNetwork("main", std::move(context),
627	[&newRun, &saveTensor, &runErr](
628	RunIdentifierTy runID, Error err,
629	std::unique_ptr<ExecutionContext> context_) {
630	auto HX = saveTensor->getHandle();
631	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
632	EXPECT_NEAR(HX.at({`1`}), `16`, `1E-5`);
633	EXPECT_NEAR(HX.at({`2`}), `81`, `1E-5`);
634	runErr = glow::make_unique<Error>(std::move(err));
635	newRun.set_value();
636	});
637
638	ready.wait();
639	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
640	}
641
642	// This test creates a network that is split into four partitions. P0,P1,P2,P3
643	// and three devices D0,D1,D2. P0 is loaded on D0, P1 and P2 are loaded on D2
644	// and P3 is loaded on D2. This test then enables the DRT optimization without
645	// P2P. We then run the network twice to test the alternating static
646	// assignments.
647	TEST_P(HostManagerTest, testStaticAssignmentDeviceResidentTensorOnly) {
648	CHECK_IF_ENABLED();
649	std::unique_ptr<Module> module = glow::make_unique<Module>();
650	std::unique_ptr<ExecutionContext> context =
651	glow::make_unique<ExecutionContext>();
652
653	Function *F = module ->createFunction("main");
654	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
655	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
656	XTensor->getHandle() = {`1.`, `2.`, `3.`};
657	auto *pow = F->createPow("Pow1", X, `2.0`);
658	auto pow2 = F->createPow("Pow2", pow, `2.0`);
659	auto pow3 = F->createPow("Pow3", pow2, `1.0`);
660	auto *save = F->createSave("save", pow3);
661	auto *saveTensor =
662	context ->getPlaceholderBindings()->allocate(save->getPlaceholder());
663
664	std::vector<std::unique_ptr<DeviceConfig>> configs =
665	generateConfigs(backendName_, `3`);
666	std::unique_ptr<HostManager> hostManager =
667	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
668	CompilationContext cctx;
669	cctx.enableDRT = true;
670
671	// Setup forced partitioning.
672	PartitionConfig partitionConfig;
673	partitionConfig.funcName = "main";
674	partitionConfig.numOfPartitions = `4`;
675	partitionConfig.backendNames = {backendName_, backendName_, backendName_,
676	backendName_};
677	partitionConfig.partitionNames = {"p0", "p1", "p2", "p3"};
678	partitionConfig.nodeToPartition = {
679	{"Pow1", `0`}, {"Pow2", `1`}, {"Pow3", `2`}, {"Pow1__1", `0`},
680	{"Pow2__1", `1`}, {"Pow3__1", `2`}, {"save", `3`}, {"save_save", `3`}};
681	partitionConfig.logicalIDs = {{`0`}, {`1`}, {`1`}, {`2`}};
682	cctx.partitionConfig = &partitionConfig;
683
684	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
685
686	std::promise<void> runNetwork;
687	auto ready = runNetwork.get_future();
688
689	std::unique_ptr<Error> runErr;
690	hostManager ->runNetwork("main", std::move(context),
691	[&runNetwork, &saveTensor, &context, &runErr](
692	RunIdentifierTy runID, Error err,
693	std::unique_ptr<ExecutionContext> context_) {
694	auto HX = saveTensor->getHandle();
695	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
696	EXPECT_NEAR(HX.at({`1`}), `16`, `1E-5`);
697	EXPECT_NEAR(HX.at({`2`}), `81`, `1E-5`);
698	context = std::move(context_);
699	runErr = glow::make_unique<Error>(std::move(err));
700	runNetwork.set_value();
701	});
702
703	ready.wait();
704	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
705
706	// reset runErr
707	runErr = nullptr;
708
709	std::promise<void> newRun;
710	ready = newRun.get_future();
711	hostManager ->runNetwork("main", std::move(context),
712	[&newRun, &saveTensor, &runErr](
713	RunIdentifierTy runID, Error err,
714	std::unique_ptr<ExecutionContext> context_) {
715	auto HX = saveTensor->getHandle();
716	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
717	EXPECT_NEAR(HX.at({`1`}), `16`, `1E-5`);
718	EXPECT_NEAR(HX.at({`2`}), `81`, `1E-5`);
719	runErr = glow::make_unique<Error>(std::move(err));
720	newRun.set_value();
721	});
722
723	ready.wait();
724	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
725	}
726
727	// This test creates a network that is split into four partitions. P0,P1,P2,P3
728	// and three devices D0,D1,D2. P0 is loaded on D0, P1 and P2 are loaded on D2
729	// and P3 is loaded on D2. This test then enables the P2P optimization without
730	// DRT. We then run the network twice to test the alternating static
731	// assignments.
732	TEST_P(HostManagerTest, testStaticAssignmentP2POnly) {
733	CHECK_IF_ENABLED();
734	std::unique_ptr<Module> module = glow::make_unique<Module>();
735	std::unique_ptr<ExecutionContext> context =
736	glow::make_unique<ExecutionContext>();
737
738	Function *F = module ->createFunction("main");
739	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
740	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
741	XTensor->getHandle() = {`1.`, `2.`, `3.`};
742	auto *pow = F->createPow("Pow1", X, `2.0`);
743	auto pow2 = F->createPow("Pow2", pow, `2.0`);
744	auto pow3 = F->createPow("Pow3", pow2, `1.0`);
745	auto *save = F->createSave("save", pow3);
746	auto *saveTensor =
747	context ->getPlaceholderBindings()->allocate(save->getPlaceholder());
748
749	std::vector<std::unique_ptr<DeviceConfig>> configs =
750	generateConfigs(backendName_, `3`);
751	std::unique_ptr<HostManager> hostManager =
752	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
753	CompilationContext cctx;
754	cctx.enableP2P = true;
755
756	// Setup forced partitioning.
757	PartitionConfig partitionConfig;
758	partitionConfig.funcName = "main";
759	partitionConfig.numOfPartitions = `4`;
760	partitionConfig.backendNames = {backendName_, backendName_, backendName_,
761	backendName_};
762	partitionConfig.partitionNames = {"p0", "p1", "p2", "p3"};
763	partitionConfig.nodeToPartition = {
764	{"Pow1", `0`}, {"Pow2", `1`}, {"Pow3", `2`}, {"Pow1__1", `0`},
765	{"Pow2__1", `1`}, {"Pow3__1", `2`}, {"save", `3`}, {"save_save", `3`}};
766	partitionConfig.logicalIDs = {{`0`}, {`1`}, {`1`}, {`2`}};
767	cctx.partitionConfig = &partitionConfig;
768
769	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
770
771	std::promise<void> runNetwork;
772	auto ready = runNetwork.get_future();
773
774	std::unique_ptr<Error> runErr;
775	hostManager ->runNetwork("main", std::move(context),
776	[&runNetwork, &saveTensor, &context, &runErr](
777	RunIdentifierTy runID, Error err,
778	std::unique_ptr<ExecutionContext> context_) {
779	auto HX = saveTensor->getHandle();
780	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
781	EXPECT_NEAR(HX.at({`1`}), `16`, `1E-5`);
782	EXPECT_NEAR(HX.at({`2`}), `81`, `1E-5`);
783	context = std::move(context_);
784	runErr = glow::make_unique<Error>(std::move(err));
785	runNetwork.set_value();
786	});
787
788	ready.wait();
789	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
790
791	// reset runErr
792	runErr = nullptr;
793
794	std::promise<void> newRun;
795	ready = newRun.get_future();
796	hostManager ->runNetwork("main", std::move(context),
797	[&newRun, &saveTensor, &runErr](
798	RunIdentifierTy runID, Error err,
799	std::unique_ptr<ExecutionContext> context_) {
800	auto HX = saveTensor->getHandle();
801	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
802	EXPECT_NEAR(HX.at({`1`}), `16`, `1E-5`);
803	EXPECT_NEAR(HX.at({`2`}), `81`, `1E-5`);
804	runErr = glow::make_unique<Error>(std::move(err));
805	newRun.set_value();
806	});
807
808	ready.wait();
809	EXPECT_FALSE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
810	}
811
812	// This test creates a network that is split into two partitions. P0,P1. P0 is
813	// loaded on one device, P1 is loaded on two devices. This test then enables
814	// static assignment which allows for P2P testing. We then run the network
815	// multiple requests concurrently.
816	TEST_P(HostManagerTest, testStaticAssignmentP2PandDRTConcurrent) {
817	CHECK_IF_ENABLED();
818	std::unique_ptr<Module> module = glow::make_unique<Module>();
819
820	Function *F = module ->createFunction("main");
821	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
822	auto *pow = F->createPow("Pow1", X, `2.0`);
823	F->createSave("save", pow);
824	auto *savePH = module ->getPlaceholderByNameSlow("save");
825
826	std::vector<std::unique_ptr<DeviceConfig>> configs =
827	generateConfigs(backendName_, `3`);
828	std::unique_ptr<HostManager> hostManager =
829	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
830	CompilationContext cctx;
831	cctx.enableDRT = true;
832	cctx.enableP2P = true;
833
834	// Setup forced partitioning.
835	PartitionConfig partitionConfig;
836	partitionConfig.funcName = "main";
837	partitionConfig.numOfPartitions = `2`;
838	partitionConfig.backendNames = {backendName_, backendName_};
839	partitionConfig.partitionNames = {"p0", "p1"};
840	partitionConfig.nodeToPartition = {{"Pow1", `0`}, {"save", `1`}};
841	partitionConfig.logicalIDs = {{`0`}, {`1`, `2`}};
842	cctx.partitionConfig = &partitionConfig;
843
844	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
845
846	std::vector<std::future<void>> ready;
847	for (int i = `0`; i < `50`; i++) {
848	auto runNetwork = std::make_shared<std::promise<void>>();
849	ready.push_back(runNetwork ->get_future());
850	std::unique_ptr<ExecutionContext> context =
851	glow::make_unique<ExecutionContext>();
852	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
853	XTensor->getHandle() = {`1.`, `2.`, `3.`};
854	auto *saveTensor = context ->getPlaceholderBindings()->allocate(savePH);
855	hostManager ->runNetwork(
856	"main", std::move(context),
857	[runNetwork, saveTensor](RunIdentifierTy runID, Error err,
858	std::unique_ptr<ExecutionContext> context_) {
859	auto HX = saveTensor->getHandle();
860	EXPECT_NEAR(HX.at({`0`}), `1`, `1E-5`);
861	EXPECT_NEAR(HX.at({`1`}), `4`, `1E-5`);
862	EXPECT_NEAR(HX.at({`2`}), `9`, `1E-5`);
863	EXPECT_FALSE(std::move(err));
864	runNetwork ->set_value();
865	});
866	}
867
868	for (auto &r : ready) {
869	r.wait();
870	}
871	}
872
873	/// This tests that the HostMangaer registry works and is able to report what
874	/// devices a network is loaded on.
875	TEST_P(HostManagerTest, testHostManagerRegistry) {
876	CHECK_IF_ENABLED();
877	std::unique_ptr<Module> module = glow::make_unique<Module>();
878
879	Function *F = module ->createFunction("main");
880	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
881	auto *pow = F->createPow("Pow1", X, `2.0`);
882	F->createSave("save", pow);
883	module ->getPlaceholderByNameSlow("save");
884
885	std::unique_ptr<Module> module2 = glow::make_unique<Module>();
886
887	Function *F2 = module2 ->createFunction("main2");
888	auto X2 = module2 ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X2", false*);
889	auto *pow2 = F2->createPow("Pow2", X2, `2.0`);
890	F2->createSave("save2", pow2);
891	module2 ->getPlaceholderByNameSlow("save2");
892
893	std::vector<std::unique_ptr<DeviceConfig>> configs =
894	generateConfigs(backendName_, `2`);
895	std::unique_ptr<HostManager> hostManager =
896	glow::make_unique<HostManager>(std::move(configs), HostConfig ());
897
898	CompilationContext cctx;
899	cctx.saturateHost = true;
900	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
901	cctx.saturateHost = false;
902	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module2), cctx)));
903	glow::runtime::ManagerRegistry()->registerHostManager(hostManager.get());
904	auto testHM = glow::runtime::ManagerRegistry()->getHostManager();
905	auto loading = testHM->getDevicePartitionMapping("main");
906	auto loading2 = testHM->getDevicePartitionMapping("main2");
907	EXPECT_EQ(loading["main"].size(), `2`);
908	EXPECT_EQ(loading2["main2"].size(), `1`);
909	}
910
911	TEST_P(HostManagerTest, testTimeout) {
912	CHECK_IF_ENABLED();
913
914	if (backendName_ == "NNPI") {
915	// Skip this test if running on ICEREF, since we want to test the device
916	// timeout.
917	auto useInfAPI = getenv("USE_INF_API");
918	if (!useInfAPI \|\| strcmp(useInfAPI, "1")) {
919	GTEST_SKIP();
920	}
921	// Set the timeout to very short so we fail intentionally.
922	glow::runtime::flags::NNPITimeoutMs = `1`;
923	}
924
925	std::unique_ptr<Module> module = glow::make_unique<Module>();
926	std::unique_ptr<ExecutionContext> context =
927	glow::make_unique<ExecutionContext>();
928
929	Function *F = module ->createFunction("main");
930	auto X = module ->createPlaceholder(ElemKind::FloatTy, {`3`}, "X", false*);
931	auto *XTensor = context ->getPlaceholderBindings()->allocate(X);
932	XTensor->getHandle() = {`1.`, `2.`, `3.`};
933	auto *pow = F->createPow("Poww", X, `2.0`);
934	for (unsigned i = `0`; i < `1000`; i++) {
935	pow = F->createPow("pow" + std::to_string(i), pow, `1.0`);
936	}
937	auto *save = F->createSave("save", pow);
938	context ->getPlaceholderBindings()->allocate(save->getPlaceholder());
939
940	auto hostManager = createHostManager(backendName_);
941
942	CompilationContext cctx;
943	ASSERT_FALSE(ERR_TO_BOOL(hostManager ->addNetwork(std::move(module), cctx)));
944
945	std::promise<void> runNetwork;
946	auto ready = runNetwork.get_future();
947
948	std::unique_ptr<Error> runErr;
949	hostManager ->runNetwork("main", std::move(context),
950	[&runNetwork, &context, &runErr](
951	RunIdentifierTy runID, Error err,
952	std::unique_ptr<ExecutionContext> context_) {
953	context = std::move(context_);
954	runErr = glow::make_unique<Error>(std::move(err));
955	runNetwork.set_value();
956	});
957
958	ready.wait();
959	EXPECT_TRUE(ERR_TO_BOOL(std::move(*DCHECK_NOTNULL(runErr.get()))));
960	}
961
962	INSTANTIATE_BACKEND_TEST(HostManagerTest);
963

Browse the source code of glow/tests/unittests/HostManagerTest.cpp