ExecutorCore.cpp source code [glow/tools/loader/ExecutorCore.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "ExecutorCore.h"
18
19	#include "ExecutorCoreHelperFunctions.h"
20	#include "Loader.h"
21
22	#include "glow/Base/Image.h"
23	#include "glow/Base/TensorSerialization.h"
24	#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
25	#include "glow/Importer/Caffe2ModelLoader.h"
26	#include "glow/Importer/ONNXModelLoader.h"
27	#include "glow/Optimizer/IROptimizer/CommandLine.h"
28	#include "glow/Support/Support.h"
29
30	#include "llvm/ADT/StringSwitch.h"
31	#include "llvm/Support/CommandLine.h"
32	#include "llvm/Support/Format.h"
33	#include "llvm/Support/Timer.h"
34	#include "llvm/Support/raw_ostream.h"
35
36	#include <atomic>
37	#include <cfloat>
38	#include <fstream>
39	#include <future>
40	#include <iostream>
41	#include <memory>
42	#include <mutex>
43	#include <queue>
44	#include <sstream>
45	#include <thread>
46
47	extern llvm::cl::opt<unsigned> traceLevel;
48
49	using namespace glow;
50
51	namespace {
52
53	class PostProcessExecutor : public PostProcessOutputDataExtension {
54	public:
55	/// Iterates over registered extensions for processing and printing results
56	/// and executes them.
57	/// \return accumulated errors. Value greater then 0 indicates one or more
58	/// errros have occured.
59	int processOutputs(const llvm::StringMap<Placeholder *> &PHM,
60	PlaceholderBindings &bindings,
61	VecVecRef<std::string> inputImageBatchFilenames) override;
62
63	/// Registers Post Processing Output extensions.
64	void registerPostProcessOutputExtensions(
65	const std::vector<PostProcessExtFuncPtr> &extVector);
66
67	private:
68	UniquePtrVec<PostProcessOutputDataExtension> extensions_;
69	};
70
71	class PreProcessInputExecutor : public PreProcessInputDataExtension {
72	public:
73	/// Iterates over PreProcessInputDataExtension extensions and executes them
74	/// one by one.
75	void processInputTensor(llvm::ArrayRef<Tensor *> inputImageData,
76	size_t startId, size_t endId,
77	size_t batchSz) override;
78
79	/// Registers Input Data Preprocessing Extensions.
80	void registerInputDataPreProcessingExtension(
81	const std::vector<
82	std::function<std::unique_ptr<PreProcessInputDataExtension>()>>
83	&extVector);
84
85	private:
86	UniquePtrVec<PreProcessInputDataExtension> extensions_;
87	};
88
89	void PostProcessExecutor::registerPostProcessOutputExtensions(
90	const std::vector<PostProcessExtFuncPtr> &extVector) {
91	for (auto &f : extVector) {
92	extensions_.push_back(f ());
93	}
94	}
95
96	} // namespace
97
98	/// Iterates over registered extensions for processing and Printing results and
99	/// executes them.
100	int PostProcessExecutor::processOutputs(
101	const llvm::StringMap<Placeholder *> &PHM, PlaceholderBindings &bindings,
102	VecVecRef<std::string> inputImageBatchFilenames) {
103	int numErrors = `0`;
104	for (auto &f : extensions_) {
105	numErrors += f ->processOutputs(PHM, bindings, inputImageBatchFilenames);
106	}
107	return numErrors;
108	}
109
110	/// Iterates over PreProcessInputDataExtension extensions and execute them one
111	/// by one.
112	void PreProcessInputExecutor::processInputTensor(
113	llvm::ArrayRef<Tensor *> inputImageData, size_t startId, size_t endId,
114	size_t batchSz) {
115	for (auto &f : extensions_) {
116	f ->processInputTensor(inputImageData, startId, endId, batchSz);
117	}
118	}
119
120	void PreProcessInputExecutor::registerInputDataPreProcessingExtension(
121	const std::vector<
122	std::function<std::unique_ptr<PreProcessInputDataExtension>()>>
123	&extVector) {
124	for (auto &f : extVector) {
125	extensions_.push_back(f ());
126	}
127	}
128
129	Executor::Executor(std::string appName, int argc, char **argv) {
130	appName_ = appName;
131	// Clear all external storage for command args set variables. This is
132	// necessary in order to support multiple calls to parse the command
133	// line; it seems that clearing the command line options is not possible,
134	// thus, we clear their external storage only. With each successive
135	// call to parse the arguments, arguments are pilling up in the ::cl
136	// argument, however, external storage will be set by the arguments from the
137	// current call only.
138	// NOTE: llvm::cl::ResetAllOptionOccurrences() or opt.reset() should do the
139	// job but they don't work.
140	// TODO: Loader should provide function to register callbacks.
141	initExecutorCoreCmdArgVars();
142	initImageCmdArgVars();
143	// Verify/initialize command line parameters, and then loader initializes
144	// the ExecutionEngine and Function.
145
146	parseCommandLine(argc, argv);
147	processImageCmdArgVars(modelInputsOpt.size());
148	}
149
150	/// Registers a Loader Extension that will be invoked after model is loaded.
151	/// If multiple extensions are registered they will be executed in order they
152	/// were registered.
153	void Executor::registerLoaderExtension(
154	std::function<std::unique_ptr<LoaderExtension>()> func) {
155	loaderextensions_.push_back(func);
156	}
157
158	/// Registers an extension that will be invoked on Tensor containing current
159	/// batch of input data. If multiple extensions are registered they will be
160	/// executed in order they were registered.
161	void Executor::registerInputDataPreProcessingExtension(
162	std::function<std::unique_ptr<PreProcessInputDataExtension>()> func) {
163	ppInputDataExtensions_.push_back(func);
164	}
165
166	/// Registers extension that will be invoked for each execution of the
167	/// network. If multiple extensions are registered they will be executed in
168	/// order they were registered.
169	void Executor::registerPostProcessOutputExtension(PostProcessExtFuncPtr func) {
170	ppOutputDataExtensions_.push_back(func);
171	}
172
173	/// Iterates over lambda expressions and registers them with each instance of a
174	/// loader in main dispatch loop.
175	void Executor::addLoaderExtensions(Loader &ld) {
176	for (auto &f : loaderextensions_) {
177	ld.registerExtension(f ());
178	}
179	}
180
181	void parseInputFiles(VecVec<std::string> &inputImageFiles) {
182	if (inputImageListFileOpt.empty() && inputImageDirs.empty() &&
183	inputTensorListFile.empty() && inputImageFilenamesOpt.size() == `0`) {
184	llvm::errs() << "Args: Either positional image list or "
185	"-input-image-dir or "
186	"-input-image-list-file or "
187	"-input-tensor-list-file "
188	"must be used to specify input images.\n";
189	return;
190	}
191
192	if (!inputImageDirs.empty() &&
193	(!inputImageListFileOpt.empty() \|\| inputImageFilenamesOpt.size() != `0`)) {
194	LOG(FATAL) << "Args: Specifying image using input-image-dir cannot be "
195	"combined with "
196	"-input-image-list-file or the positional image list.\n";
197	}
198
199	if (!inputImageListFileOpt.empty() && inputImageFilenamesOpt.size() != `0`) {
200	LOG(FATAL) << "Args: positional image list cannot be combined with "
201	"-input-image-list-file to specify input images.\n";
202	}
203
204	int32_t numInputNames = modelInputsOpt.size();
205
206	// if positional list of images, we support one input only. Assign 1st input
207	// vector list.
208	if (inputImageFilenamesOpt.size() != `0`) {
209	CHECK_EQ(numInputNames, `1`) << "When using positional image list, single "
210	"input networks are supported only.";
211	inputImageFiles.push_back(inputImageFilenamesOpt);
212	return;
213	}
214
215	if (!inputTensorListFile.empty()) {
216	CHECK_EQ(inputImageFilenamesOpt.size(), `0`)
217	<< "When using -input-tensor-list-file all Input images must be "
218	"specified "
219	"using -input-tensor-list-file option.";
220	CHECK_EQ(inputImageListFileOpt.size(), `0`)
221	<< "When using -input-tensor-list-file all Input images must be "
222	"specified "
223	"using -input-tensor-list-file option.";
224	CHECK_EQ(numInputNames, `1`) << "When using -input-tensor-list-file single "
225	"input networks are supported only.";
226	std::vector<std::string> imageFiles;
227	parseInputList(inputTensorListFile, imageFiles);
228	inputImageFiles.push_back(imageFiles);
229	return;
230	}
231
232	if (!inputImageDirs.empty()) {
233	CHECK_EQ(numInputNames, `1`)
234	<< "When using image dir. single input networks are supported only.";
235	for (const auto &inputImageDir : inputImageDirs) {
236	std::vector<std::string> imageFiles;
237	parseInputDir(inputImageDir, imageFiles);
238	inputImageFiles.push_back(imageFiles);
239	}
240	return;
241	}
242
243	// If images are given using vector of lists of images
244	CHECK_EQ(numInputNames, inputImageListFileOpt.size())
245	<< "Args: number of inputs and number of inputs image lists must match.";
246
247	size_t numInputImages = `0`;
248	for (int i = `0`; i < numInputNames; i++) {
249	std::vector<std::string> imageFiles;
250	parseInputList(inputImageListFileOpt [i], imageFiles);
251	inputImageFiles.push_back(imageFiles);
252	if (i > `0`) {
253	CHECK_EQ(numInputImages, inputImageFiles[i].size())
254	<< "Each image list file should have the same number of images.";
255	} else {
256	numInputImages = inputImageFiles [i].size();
257	}
258	}
259	}
260
261	/// This will parse command line, load, build and execute a network.
262	int Executor::executeNetwork() {
263
264	parseInputFiles(inputImageFilenames_);
265
266	if (excludedFirstWarmupRuns && excludedFirstWarmupRuns >= warmup) {
267	llvm::errs() << "Excluding all warmup runs does not make sense\n";
268	return `1`;
269	}
270	// Stream input mode.
271	const bool streamInputFilenamesMode = inputImageFilenamesOpt.size() == `1` &&
272	inputImageFilenamesOpt.front() == "-";
273
274	CHECK(!(streamInputFilenamesMode && emittingBundle()))
275	<< "Cannot emit a bundle and also stream inputs.";
276
277	// If tracing is enabled, create a TraceContext to merge each runs events
278	// into.
279	if (!tracePath.empty()) {
280	traceContext = glow::make_unique<TraceContext>(TraceLevel::STANDARD);
281	}
282
283	// Mini-batch mode.
284	const bool miniBatchMode = miniBatch > `0`;
285	CHECK(((!miniBatchMode) \|\| (!streamInputFilenamesMode)))
286	<< "The minibatch option is not compatible with the stream input "
287	"image mode.";
288	CHECK(((!miniBatchMode) \|\| (inputImageFilenames_[`0`].size() % miniBatch == `0`)))
289	<< "The number of input images must be a multiple of the mini-batch.";
290
291	CHECK(((!iterationsOpt) \|\| (!miniBatchMode) \|\|
292	(iterationsOpt % miniBatch == `0`)))
293	<< "Benchmark count must be a multiple of the mini-batch.";
294	CHECK(!preloadAllImages \|\| miniBatchMode)
295	<< "preload-all-images can only be used with minibatch";
296
297	const bool singleBatchRepeatedMode = repeatSingleBatchCount > `0`;
298	CHECK(!(streamInputFilenamesMode && singleBatchRepeatedMode))
299	<< "singleBatchRepeatedMode is not compatible with "
300	"streamInputFilenamesMode";
301
302	// When the mini-batch mode is enabled do not allow debug instrumentation.
303	if (miniBatchMode) {
304	CHECK(!instrumentDebug)
305	<< "The minibatch option is not compatible with debug instrumentation.";
306	}
307
308	CHECK(!preloadAllImages \|\| (modelInputsOpt.size() == `1`))
309	<< "Preloading all images doesn't support networks with multiple inputs.";
310
311	CHECK(!iterationsOpt \|\| (modelInputsOpt.size() == `1`))
312	<< "Benchmark mode doesn't support networks with multiple inputs.";
313
314	// Print out the inferred image classification.
315	llvm::outs() << "Model: " << Loader::getModelOptPath() << "\n";
316	std::mutex ioMu;
317	int numErrors = `0`;
318
319	if (runAllInputsOnAllDevices) {
320	if (numDevices != miniBatchThreads) {
321	llvm::outs() << "Setting " << miniBatchThreads.ArgStr << " to match "
322	<< numDevices.ArgStr << " (" << numDevices
323	<< ") as required by " << runAllInputsOnAllDevices.ArgStr
324	<< "\n";
325	miniBatchThreads.getValue() = numDevices;
326	}
327	}
328
329	// If preloading then load+process all images here in preloadedInputImageData.
330	Tensor preloadedInputImageData;
331	if (preloadAllImages) {
332	Loader loader;
333	PreProcessInputExecutor ppImageExecutor;
334	addLoaderExtensions(loader);
335	ppImageExecutor.registerInputDataPreProcessingExtension(
336	ppInputDataExtensions_);
337
338	if (!inputTensorListFile.empty()) {
339	loadInputImageFromFileWithType(
340	inputImageFilenames_[`0`], &preloadedInputImageData, imageLayoutOpt [`0`]);
341	} else {
342	// Load and process the image data into the inputImageData Tensor.
343	loadImagesAndPreprocess(inputImageFilenames_, {&preloadedInputImageData});
344	ppImageExecutor.processInputTensor({&preloadedInputImageData}, `0`,
345	inputImageFilenames_[`0`].size(),
346	preloadedInputImageData.dims()[`0`]);
347	}
348	}
349
350	// Process a set of minibatches with indices [startIndex, endIndex).
351	auto processImageRange = [&](size_t startIndex, size_t endIndex, size_t TID) {
352	std::unique_ptr<ExecutionContext> exContext =
353	glow::make_unique<ExecutionContext>();
354	PlaceholderBindings &bindings = *exContext ->getPlaceholderBindings();
355	if (traceContext) {
356	exContext ->setTraceContext(
357	glow::make_unique<TraceContext>(TraceLevel::STANDARD));
358	}
359	// If runAllInputsOnAllDevices, then assign this thread with TID to device
360	// TID. E.g. if this is TID 2 then this will be assigned to device 2.
361	Loader loader = runAllInputsOnAllDevices ? Loader (TID) : Loader ();
362	PostProcessExecutor ppResultExecutor;
363	PreProcessInputExecutor ppImageExecutor;
364
365	// Registering all the extensions per thread.
366	addLoaderExtensions(loader);
367	ppResultExecutor.registerPostProcessOutputExtensions(
368	ppOutputDataExtensions_);
369	ppImageExecutor.registerInputDataPreProcessingExtension(
370	ppInputDataExtensions_);
371
372	// Used to make sure we only compile once, and run only once if not
373	// streaming.
374	bool isFirstRun = true;
375
376	// Perform graph profiling initialization if needed.
377	// if (profilingGraph()) {
378	// loader.initGraphProfiling(
379	// bindings, miniBatch > 0 ? miniBatch :
380	// inputImageFilenames_[0].size(), inputImageFilenames_[0].size());
381	//}
382
383	// These will be set during the first run.
384	llvm::StringMap<Placeholder *> iPHM;
385	llvm::StringMap<Placeholder *> oPHM;
386	std::vector<Placeholder *> inPHs;
387	std::vector<Placeholder *> outPHs;
388
389	size_t miniBatchIndex = startIndex;
390	std::vector<Tensor> inputData(modelInputsOpt.size());
391	if (preloadAllImages) {
392	inputData [`0`] = preloadedInputImageData.getUnowned();
393	}
394
395	VecVec<std::string> inputImageBatchFilenames;
396	if ((!miniBatchMode) &&
397	(!streamInputFilenamesMode \|\| singleBatchRepeatedMode)) {
398	inputImageBatchFilenames = inputImageFilenames_;
399	} else if (singleBatchRepeatedMode) {
400	for (size_t i = `0`, e = modelInputsOpt.size(); i < e; i++) {
401	std::vector<std::string> names(inputImageFilenames_[`0`].begin(),
402	inputImageFilenames_[`0`].begin() +
403	miniBatch);
404	inputImageBatchFilenames.push_back(names);
405	}
406	}
407	if (!tracePath.empty()) {
408	loader.getHostManager()->setTraceContext(
409	glow::make_unique<TraceContext>(traceLevel));
410	Error err = loader.getHostManager()->startDeviceTrace();
411	if (err) {
412	LOG(INFO) << "Failed to start device trace.";
413	numErrors = `1`;
414	return;
415	} else {
416	llvm::outs() << "Device trace started.";
417	}
418	}
419
420	// Pass input tensors around as array of pointers.
421	std::vector<Tensor *> inputImageData;
422	for (auto &data : inputData) {
423	inputImageData.push_back(&data);
424	}
425
426	unsigned repeatedLoopCountRemaining = repeatSingleBatchCount;
427
428	auto loopCond = [&]() {
429	// If in stream mode then get the next image filenames if they exist,
430	// otherwise exit.
431	if (streamInputFilenamesMode) {
432	return getNextStdinImageFilenames(inputImageBatchFilenames);
433	}
434
435	// If a single batch is going to be loaded once and repeated then keep
436	// running repeatedLoopCountRemaining mores times.
437	if (singleBatchRepeatedMode) {
438	return repeatedLoopCountRemaining-- != `0`;
439	}
440
441	// If in miniBatchMode then continue if we have already preloaded all
442	// images (will break inside loop once done), or otherwise get the next
443	// miniBatch image filenames if they exist, otherwise exit.
444	if (miniBatchMode) {
445	return getNextMiniBatch(inputImageBatchFilenames, inputImageFilenames_,
446	miniBatchIndex, miniBatch, endIndex);
447	}
448
449	// At least enter once, e.g. to just dump a bundle.
450	return isFirstRun;
451	};
452
453	while (loopCond ()) {
454	if (!preloadAllImages && (!singleBatchRepeatedMode \|\| isFirstRun)) {
455	// Load and process the image data into the inputImageData Tensor.
456	if (!inputTensorListFile.empty()) {
457	loadInputImageFromFileWithType(inputImageBatchFilenames [`0`],
458	inputImageData [`0`], imageLayoutOpt [`0`]);
459	} else {
460	loadImagesAndPreprocess(inputImageBatchFilenames, inputImageData);
461	ppImageExecutor.processInputTensor(inputImageData, startIndex,
462	endIndex,
463	inputImageData [`0`]->dims()[`0`]);
464	}
465	}
466
467	// Note: At this point miniBatchIndex is the end index, so subtract
468	// miniBatch to get the start index.
469	const dim_t startMiniBatchIndex = miniBatchIndex - miniBatch;
470
471	ShapeVector imageShape(inputImageData [`0`]->getType().dims().begin(),
472	inputImageData [`0`]->getType().dims().end());
473	if (miniBatch) {
474	imageShape [`0`] = miniBatch;
475	} else if (iterationsOpt) {
476	imageShape [`0`] = iterationsOpt;
477	}
478
479	// If we are benchmarking reset the image data to the batch size we need.
480	if (iterationsOpt) {
481	auto resetTensor = [](Tensor *tensor) {
482	ShapeVector imageSize(tensor->getType().dims().begin(),
483	tensor->getType().dims().end());
484	imageSize [`0`] = miniBatch ? miniBatch : iterationsOpt;
485	tensor->reset(ElemKind::FloatTy, imageSize);
486	};
487	std::for_each(inputImageData.begin(), inputImageData.end(),
488	resetTensor);
489	}
490
491	// If this is the first run, then we need to build and compile the model.
492	if (isFirstRun) {
493	isFirstRun = false;
494
495	std::vector<TypeRef> types;
496	auto preloadTy =
497	Type::newShape(inputImageData [`0`]->getType(), imageShape);
498
499	if (preloadAllImages) {
500	types.push_back(&preloadTy);
501	} else {
502	// get types of all input tensors.
503	for_each(inputImageData.begin(), inputImageData.end(),
504	[&](auto *t) { types.push_back(&t->getType()); });
505	}
506
507	// Build and compile the graph, then get input and output Placeholders.
508	std::tie(iPHM, oPHM) =
509	buildAndCompileAndGetInAndOutPair(loader, bindings, types);
510
511	// If in bundle mode, the bundle has been saved by the above call, so we
512	// can safely return.
513	if (emittingBundle()) {
514	LOG(INFO) << "Emit bundle mode is on. Network is compiled only.";
515	return;
516	}
517
518	// Obtain input/output placeholders from input/output map.
519	// For inputs, we got map but need to convert to array - need to
520	// take from map in order specified by modelInputsOpt.
521	for (size_t i = `0`, e = modelInputsOpt.size(); i < e; i++) {
522	auto it = iPHM.find(modelInputsOpt [i]);
523	CHECK(it != iPHM.end())
524	<< "Couldn't find placeholder: " << modelInputsOpt [i];
525	CHECK((*it).second) << "Placeholder in input map is NULL.";
526	inPHs.push_back((*it).second);
527	};
528	for_each(oPHM.begin(), oPHM.end(), [&](auto &p) {
529	CHECK(p.second) << "Placeholder in output map is NULL.";
530	outPHs.push_back(p.second);
531	});
532	}
533
534	// preloadAllImages - set a new Tensor that takes a slice from the 1st
535	// (and only) input tensor. Assign this new Tensor the tensor array of
536	// pointers, inputImageData, used further.
537	Tensor inputImageDataBatch;
538	if (preloadAllImages) {
539	std::vector<dim_t> imgSliceStart(imageShape.size(), `0`);
540	imgSliceStart [`0`] = startMiniBatchIndex;
541	inputImageDataBatch =
542	inputImageData [`0`]->getUnowned(imageShape, imgSliceStart);
543	inputImageData [`0`] = &inputImageDataBatch;
544	}
545
546	// Compile done.
547	CHECK(!inPHs.empty()) << "Input must be valid.";
548	CHECK(!outPHs.empty()) << "Output must be valid.";
549	CHECK_EQ(inPHs.size(), inputImageData.size())
550	<< "Number of input placeholders and tensors must match";
551	for (size_t i = `0`, e = inputImageData.size(); i < e; i++) {
552	CHECK(inPHs[i]->dims() == inputImageData[i]->dims())
553	<< "New input shape does not match the compiled function: "
554	<< inPHs [i]->dims() << " vs " << inputImageData [i]->dims();
555	}
556
557	// Convert the raw input to fp16. This must be done every time we get new
558	// image data.
559	// Convert the raw input to fp16.
560	if (convertInAndOutToFp16) {
561	for (auto &t : inputImageData) {
562	t->convertToType(ElemKind::Float16Ty);
563	}
564	}
565
566	// If we are benchmarking we are done with the while loop.
567	if (iterationsOpt) {
568	break;
569	}
570
571	// Minibatch inference initialization of loader extensions
572	loader.inferInitMiniBatch(bindings, startMiniBatchIndex, miniBatch);
573
574	// About to run inference, so update the input image Placeholder's backing
575	// Tensor with inputImageDataBatch.
576	updateInputPlaceholders(bindings, inPHs, inputImageData);
577
578	// Perform the inference execution, updating output tensors.
579	auto batchSize = inputImageData [`0`]->dims()[`0`];
580	loader.runInference(exContext.get(), batchSize);
581	if (traceContext) {
582	traceContext ->merge(exContext ->getTraceContext());
583	}
584
585	// Process output of the network. Each app cand do its own post-processing
586	// depending on type of the network.
587	{
588	std::lock_guard<std::mutex> lock(ioMu);
589	numErrors += ppResultExecutor.processOutputs(oPHM, bindings,
590	inputImageBatchFilenames);
591	}
592
593	// Minibatch inference initialization of loader extensions.
594	loader.inferEndMiniBatch(bindings, startMiniBatchIndex, miniBatch);
595	}
596
597	if (iterationsOpt) {
598	// Image tensors loaded up to be run at once for benchmark mode.
599	UniquePtrVec<ExecutionContext> contexts =
600	setupContextPool(outPHs, inPHs [`0`], *inputImageData [`0`]);
601
602	std::string name = loader.getFunctionName();
603	std::unique_ptr<llvm::Timer> restRunsTimer = nullptr;
604	std::unique_ptr<llvm::Timer> firstRunsTimer = nullptr;
605	std::unique_ptr<double> bestRunTime = nullptr;
606	if (timeOpt) {
607	if (excludedFirstWarmupRuns) {
608	firstRunsTimer.reset(
609	new llvm::Timer ("First Runs", "First inference runs"));
610	restRunsTimer.reset(
611	new llvm::Timer ("Rest Inferences", "Rest of the inference runs"));
612	} else {
613	restRunsTimer.reset(
614	new llvm::Timer ("Inferences", "All inference runs"));
615	}
616	bestRunTime.reset(new double);
617	*bestRunTime = DBL_MAX;
618	}
619	unsigned requestCount = miniBatch ? iterationsOpt / miniBatch : `1`;
620
621	runBenchmark(name, loader, std::move(contexts), requestCount, warmup,
622	restRunsTimer.get(), firstRunsTimer.get(),
623	bestRunTime.get());
624	if (timeOpt) {
625	double wallTime = restRunsTimer ->getTotalTime().getWallTime();
626	llvm::outs() << llvm::formatv(
627	"Average wall time per item (s): {0:f4}\n",
628	wallTime / (iterationsOpt + warmup - excludedFirstWarmupRuns));
629	llvm::outs() << llvm::formatv(
630	" Best wall time (s): {0:f4}\n", *bestRunTime);
631	}
632	}
633
634	if (profilingGraph()) {
635	loader.generateAndSerializeProfilingInfos(bindings);
636	}
637
638	if (!tracePath.empty()) {
639	Error err = loader.getHostManager()->stopDeviceTrace();
640	if (err) {
641	LOG(INFO) << "Failed to stop device trace:";
642	numErrors = `1`;
643	return;
644	} else {
645	traceContext ->merge(loader.getHostManager()->getTraceContext());
646	}
647	}
648	};
649
650	// We will force single-threaded execution if:
651	// - Minibatch mode and runAllInputsOnAllDevices are disabled;
652	// - We are going to emit bundle and do not do inference;
653	// - We are collecting inference profile.
654	// Otherwise, there can be several minibatches of equal size.
655	const bool multiThreadingAllowed =
656	(runAllInputsOnAllDevices \|\| miniBatchMode) && !emittingBundle() &&
657	!profilingGraph();
658	const size_t numBatches =
659	miniBatchMode ? inputImageFilenames_[`0`].size() / miniBatch : `1u`;
660	const size_t numThreads =
661	runAllInputsOnAllDevices
662	? miniBatchThreads
663	: (multiThreadingAllowed
664	? std::min(size_t(miniBatchThreads), numBatches)
665	: `1u`);
666	if (miniBatchThreads > `1` && !multiThreadingAllowed) {
667	llvm::outs() << "WARNING: multi-threaded execution is not possible. Make "
668	"sure that minibatch size is specified and you are not "
669	"trying to dump profile or emit bundle.\n";
670	}
671
672	llvm::outs() << "Running " << numThreads << " thread(s).\n";
673	std::vector<std::thread> threads(numThreads);
674	const size_t miniBatchesPerThread =
675	(numBatches + numThreads - `1`) / numThreads;
676	for (size_t i = `0`; i < numThreads; i++) {
677	size_t startIndex, endIndex;
678	if (!runAllInputsOnAllDevices && numThreads > `1`) {
679	startIndex = i * miniBatchesPerThread * miniBatch;
680	endIndex = std::min((i + `1`) * miniBatchesPerThread * miniBatch,
681	inputImageFilenames_[`0`].size());
682	} else {
683	startIndex = `0`;
684	endIndex = inputImageFilenames_[`0`].size();
685	}
686	auto worker = [&processImageRange, startIndex, endIndex, i]() {
687	processImageRange (startIndex, endIndex, i);
688	};
689	threads.push_back(std::thread (worker));
690	}
691
692	for (auto &t : threads) {
693	if (t.joinable()) {
694	t.join();
695	}
696	}
697
698	if (!tracePath.empty()) {
699	traceContext ->dump(tracePath, appName_);
700	}
701
702	return numErrors;
703	}
704

Browse the source code of glow/tools/loader/ExecutorCore.cpp