ExecutorCoreHelperFunctions.cpp source code [glow/tools/loader/ExecutorCoreHelperFunctions.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "glow/Base/Image.h"
18	#include "glow/Converter/TypeAToTypeBFunctionConverter.h"
19	#include "glow/Graph/Nodes.h"
20	#include "glow/Importer/Caffe2ModelLoader.h"
21	#include "glow/Importer/ONNXModelLoader.h"
22	#include "glow/Support/Support.h"
23
24	#include "llvm/ADT/StringSwitch.h"
25	#include "llvm/Support/CommandLine.h"
26	#include "llvm/Support/FileSystem.h"
27	#include "llvm/Support/Format.h"
28	#include "llvm/Support/Timer.h"
29	#include "llvm/Support/raw_ostream.h"
30
31	#include <atomic>
32	#include <cfloat>
33	#include <fstream>
34	#include <future>
35	#include <iostream>
36	#include <memory>
37	#include <mutex>
38	#include <queue>
39	#include <sstream>
40	#include <thread>
41
42	#include "ExecutorCoreHelperFunctions.h"
43
44	using namespace glow;
45
46	/// Image loader options.
47	llvm::cl::OptionCategory executorCat("Executor Options");
48
49	// Either positional or list file or input-image-list-file will ultimately
50	// create a double vector containing an image list per input, stored here.
51	VecVec<std::string> inputImageFilenames_;
52
53	llvm::cl::list<std::string> inputImageFilenamesOpt(
54	llvm::cl::Positional,
55	llvm::cl::desc ("<input files> (note: specifying '-' enables streaming "
56	"mode, where the model is compiled once and then can be run "
57	"many times with new input filenames passed via stdin)"),
58	llvm::cl::ZeroOrMore);
59
60	llvm::cl::list<std::string> inputImageDirs(
61	"input-image-dir",
62	llvm::cl::desc (
63	"Name of directory containing images. Can be used multiple times."),
64	llvm::cl::value_desc ("dir_name"), llvm::cl::Optional, llvm::cl::ZeroOrMore,
65	llvm::cl::cat (executorCat));
66
67	std::vector<std::string> inputImageListFileOpt;
68	static llvm::cl::list<std::string, std::vector<std::string>>
69	inputImageListFileF(
70	"input-image-list-file",
71	llvm::cl::desc (
72	"List of files containing list of images (one image per line)"),
73	llvm::cl::value_desc ("string_name"), llvm::cl::ZeroOrMore,
74	llvm::cl::CommaSeparated, llvm::cl::cat (executorCat),
75	llvm::cl::location(inputImageListFileOpt));
76
77	llvm::cl::opt<std::string> inputTensorListFile(
78	"input-tensor-list-file",
79	llvm::cl::desc (
80	"Name of the file containing list of tensors (one tensor per line)"),
81	llvm::cl::value_desc ("string_name"), llvm::cl::Optional,
82	llvm::cl::cat (executorCat));
83
84	llvm::cl::opt<unsigned> miniBatch(
85	"minibatch",
86	llvm::cl::desc (
87	"Size of mini-batches. Split the input image list into a set of "
88	"mini-batches. The input model is compiled for an input tensor batch "
89	"size equal to the specified mini-batch size and mini-batches of "
90	"images are inferred separately. The number of input images must be a "
91	"multiple of the mini-batch size. By default, mini-batch is set to 1."),
92	llvm::cl::Optional, llvm::cl::init(`1`), llvm::cl::cat (executorCat));
93
94	llvm::cl::opt<unsigned> miniBatchThreads(
95	"minibatch-threads",
96	llvm::cl::desc (
97	"Max number of threads used to process mini-batches. If "
98	"minibatch-threads is greater than 1, and we are working in minibatch "
99	"mode, then several worker threads are created to process the "
100	"minibatches. Then the minibatches are distributed between these "
101	"threads, and each thread processes its set of minibatches "
102	"independently."
103	" By default, the number of threads is 1, and no parallelization is "
104	"happening. These are things to be aware of:\n"
105	"\t- The actual number of worker threads can be less than specified by "
106	"this option (for example, if specified number of threads is greater "
107	"than number of minibatches to process). Their number may also be "
108	"forced to 1 in some cases (see below);\n"
109	"\t- Currently, dumping profile and emitting bundle force "
110	"single-threaded mode;\n"
111	"\t- If a model has operations that make reduction across images in "
112	"the batch, it is a user's responsibility to make sure that this model "
113	"is not processed in multi-threaded mode. Otherwise, the correctness "
114	"of results is not guaranteed."),
115	llvm::cl::Optional, llvm::cl::init(`1`), llvm::cl::cat (executorCat));
116
117	llvm::cl::opt<unsigned> poolSize(
118	"pool-size",
119	llvm::cl::desc ("Size of context pool for the benchmark; default:10"),
120	llvm::cl::Optional, llvm::cl::init(`10`), llvm::cl::cat (executorCat));
121
122	llvm::cl::opt<bool> convertInAndOutToFp16(
123	"convert-inout-to-fp16",
124	llvm::cl::desc (
125	"Convert the input and output tensors of the network to fp16"),
126	llvm::cl::cat (executorCat));
127
128	llvm::cl::opt<std::string> tracePath("trace-path",
129	llvm::cl::desc ("Write trace logs to disk"),
130	llvm::cl::init(""),
131	llvm::cl::cat (executorCat));
132
133	llvm::cl::opt<bool>
134	autoInstrument("auto-instrument",
135	llvm::cl::desc ("Add instrumentation for operator tracing"),
136	llvm::cl::Optional, llvm::cl::init(false),
137	llvm::cl::cat (executorCat));
138
139	llvm::cl::opt<unsigned> traceLevel(
140	"trace-level",
141	llvm::cl::desc (
142	"Set tracing level (bit-field, see TraceEvents.h for details)"),
143	llvm::cl::Optional, llvm::cl::init((unsigned)TraceLevel::NONE),
144	llvm::cl::cat (executorCat));
145
146	llvm::cl::opt<unsigned> warmup(
147	"warmup", llvm::cl::desc ("How many passes to do to warm everything up"),
148	llvm::cl::init(`0`), llvm::cl::value_desc ("W"), llvm::cl::cat (executorCat));
149
150	llvm::cl::opt<unsigned> excludedFirstWarmupRuns(
151	"excluded-first-warmup-runs",
152	llvm::cl::desc ("Exclude the time of the given number of first warmup runs "
153	"from the total time"),
154	llvm::cl::Optional, llvm::cl::init(`0`), llvm::cl::cat (executorCat));
155
156	llvm::cl::opt<bool>
157	preloadAllImages("preload-all-images",
158	llvm::cl::desc ("Pre-load all images before inference"),
159	llvm::cl::init(false), llvm::cl::cat (executorCat));
160
161	llvm::cl::opt<std::string> modelOutputName(
162	"output-name",
163	llvm::cl::desc ("The name of the variable for the model's output."),
164	llvm::cl::value_desc ("string_name"), llvm::cl::Optional,
165	llvm::cl::cat (executorCat));
166
167	llvm::cl::opt<unsigned> repeatSingleBatchCount(
168	"repeat-single-batch-count",
169	llvm::cl::desc (
170	"Repeat a single batch input n times. Used for testing purposes. If "
171	"used without minibatch then the whole input set is used as the batch "
172	"size and repeated n times. Otherwise the first minibatch is repeated "
173	"and all other inputs are ignored."),
174	llvm::cl::init(`0`), llvm::cl::cat (executorCat));
175
176	/// Read all images from \p inputImageDir into \p imageFilenames.
177	void parseInputDir(const std::string &inputImageDir,
178	std::vector<std::string> &imageFilenames) {
179	CHECK(llvm::sys::fs::is_directory(inputImageDir))
180	<< strFormat("Path '%s' is not a directory!", inputImageDir.data());
181	std::error_code code;
182	llvm::sys::fs::directory_iterator dirIt(inputImageDir, code);
183	std::vector<std::string> imageFiles;
184	while (!code && dirIt != llvm::sys::fs::directory_iterator ()) {
185	auto path = dirIt ->path();
186	if (llvm::sys::fs::is_regular_file(path)) {
187	imageFiles.emplace_back(path);
188	}
189	dirIt.increment(code);
190	}
191	// The paths retrieved by the directory iterator are not sorted.
192	// Sort the paths alphabetically in increasing order and add them
193	// to the overall list of image filenames.
194	std::sort(imageFiles.begin(), imageFiles.end());
195	for (auto &imageFile : imageFiles) {
196	imageFilenames.push_back(imageFile);
197	}
198	}
199
200	/// Clear external storage for cmd args defined in Loader.
201	void initExecutorCoreCmdArgVars() {
202	inputImageFilenames_.clear();
203	inputImageListFileOpt.clear();
204	modelInputsOpt.clear();
205	}
206
207	/// Do any special processing for cmd args defined in ExecutorCore.
208	void processExecutorCoreCmdArgVars() {}
209
210	/// Read all images from \p inputListFile in to \p imageFilenames.
211	void parseInputList(const std::string &inputListFile,
212	std::vector<std::string> &imageFilenames) {
213
214	std::ifstream inFile;
215	inFile.open(inputListFile);
216	if (!inFile.good()) {
217	llvm::outs() << "Could not open input-image-list-file: " << inputListFile
218	<< ", exiting.\n";
219	std::exit(`1`);
220	}
221
222	while (!inFile.eof()) {
223	std::string img;
224	getline(inFile, img);
225	if (!img.empty()) {
226	imageFilenames.push_back(img);
227	}
228	}
229	inFile.close();
230	}
231
232	/// Write a prompt to stdout asking for filenames for classification. Read in
233	/// those filenames and add them to \p filenames. \p filenames is cleared before
234	/// adding the new set of filenames from stdin. \returns false if the passed in
235	/// line was empty.
236	bool getNextStdinImageFilenames(VecVec<std::string> &filenamesVec) {
237	std::vector<std::string> filenames;
238	// Clear out old filenames before adding new ones.
239	filenamesVec.clear();
240
241	llvm::outs() << "Enter image filenames to classify: ";
242
243	// Add in each filename to the vector.
244	std::string filenamesRaw;
245	getline(std::cin, filenamesRaw);
246	std::istringstream iss(filenamesRaw);
247	std::string filename;
248	while (iss >> filename) {
249	filenames.push_back(filename);
250	}
251	if (!filenames.empty()) {
252	filenamesVec.push_back(filenames);
253	}
254	return !filenames.empty();
255	}
256
257	/// Generate in \p imageLists the list of lists (for each input) of filenames
258	/// corresponding to the next mini-batch of size \p miniBatchSize extracted from
259	/// \p totalImageLists at index \p minibatchIndex. /returns true if the index is
260	/// valid, false otherwise. In case the function returns true, \p minibatchIndex
261	/// is incremented by \p miniBatchSize. Stop upon reaching \p miniBatchLimit.
262	bool getNextMiniBatch(VecVec<std::string> &imageLists,
263	VecVecRef<std::string> totalImageLists,
264	size_t &miniBatchIndex, size_t miniBatchSize,
265	size_t miniBatchLimit) {
266	if (miniBatchIndex >= miniBatchLimit) {
267	return false;
268	}
269
270	imageLists.clear();
271	for (const auto &totalImageList : totalImageLists) {
272	size_t batchIdx = miniBatchIndex;
273	size_t batchSize = miniBatchSize;
274	size_t endIndex = batchIdx + batchSize;
275	std::vector<std::string> imageList;
276	for (size_t index = batchIdx; index < endIndex; index++) {
277	imageList.push_back(totalImageList [index]);
278	}
279	imageLists.push_back(imageList);
280	}
281	miniBatchIndex += miniBatchSize;
282	return true;
283	}
284
285	Placeholder *
286	getOutputForPostProcessing(const llvm::StringMap<Placeholder *> &PHM) {
287	if (PHM.size() == `1`) {
288	return PHM.begin()->second;
289	}
290	if (modelOutputName.empty()) {
291	static bool warningPrinted = false;
292	if (!warningPrinted) {
293	warningPrinted = true;
294	llvm::outs()
295	<< "WARNING: Multiple outputs found and none is selected. "
296	"Any postprocessing will be DISABLED!\n"
297	"Use '-output-name' to select output for postprocessing\n";
298	}
299	return nullptr;
300	}
301	auto ph = PHM.find(modelOutputName);
302	if (ph == PHM.end()) {
303	static bool warning_printed = false;
304	if (!warning_printed) {
305	warning_printed = true;
306	llvm::outs() << "WARNING: Name specified not found in outputs. "
307	"Any postprocessing will be DISABLED: "
308	<< modelOutputName << "\n";
309	}
310	return nullptr;
311	}
312	return ph ->second;
313	}
314
315	/// Given \p loader, the \p bindings, and \p inputImageType, build the graph
316	/// from the provided protobuf file found via \p loader. Then compiles and
317	/// \returns a pair of pointers to the input Placeholder and output Nodes Map.
318	std::pair<llvm::StringMap<Placeholder >, llvm::StringMap<Placeholder >>
319	buildAndCompileAndGetInAndOutPair(Loader &loader, PlaceholderBindings &bindings,
320	llvm::ArrayRef<TypeRef> inputImageType) {
321	// Load model.
322	loader.loadModel(&bindings, inputImageType);
323
324	// Allocate tensors to back all inputs and outputs.
325	bindings.allocate(loader.getModule()->getPlaceholders());
326
327	// Convert the placeholders for now. The backing Tensor's data will be
328	// converted later.
329	if (convertInAndOutToFp16) {
330	PrecisionConfiguration precConfig;
331	TypeAToTypeBFunctionConverter converter(*loader.getFunction(),
332	ElemKind::FloatTy,
333	ElemKind::Float16Ty, precConfig);
334	for (auto *placeholder : loader.getModule()->getPlaceholders()) {
335	converter.convertPlaceholder(*placeholder, &bindings);
336	}
337	}
338
339	// Compile the model, and perform quantization/emit a bundle/dump debug info
340	// if requested from command line.
341	CompilationContext cctx = loader.getCompilationContext();
342	cctx.bindings = &bindings;
343	cctx.backendOpts.autoInstrument = autoInstrument;
344	loader.compile(cctx);
345
346	// Get input/output placeholder maps.
347	llvm::StringMap<Placeholder *> inpMap = loader.getInputPlaceholderMap();
348	llvm::StringMap<Placeholder *> outMap = loader.getOutputPlaceholderMap();
349	return std::make_pair(inpMap, outMap);
350	}
351
352	/// Setup the pool of contexts needed for a benchmark run.
353	UniquePtrVec<ExecutionContext>
354	setupContextPool(const std::vector<Placeholder *> outputPHV,
355	Placeholder *inputImagePH, Tensor &inputImageData) {
356	UniquePtrVec<ExecutionContext> contexts;
357	// Size of the pool, the smaller of poolSize or the actual number of
358	// requests.
359	unsigned iterations =
360	miniBatch ? std::min(int(poolSize), int(iterationsOpt / miniBatch)) : `1`;
361	// Setup pool of inference requests to be run.
362	for (unsigned i = `0`; i < iterations; i++) {
363	auto newContext = glow::make_unique<ExecutionContext>();
364	newContext ->setTraceContext(glow::make_unique<TraceContext>(traceLevel));
365	auto ph = newContext ->getPlaceholderBindings();
366	ph->insert(inputImagePH, Tensor (inputImageData.getType()));
367	for (auto *outputPH : outputPHV) {
368	ph->allocate(outputPH);
369	}
370	contexts.push_back(std::move(newContext));
371	}
372	return contexts;
373	}
374
375	std::mutex eventLock;
376	std::unique_ptr<TraceContext> traceContext;
377
378	/// Run inference request on HostManager. This method builds a runNetwork
379	/// request for the \p hostManager, this is a recursive call, in the callback
380	/// provided to the HostManager this function can call itself if the desired
381	/// number of warmups and requests has not yet been dispatched.
382	static void runInference(runtime::HostManager *hostManager, std::string name,
383	std::unique_ptr<ExecutionContext> batch,
384	std::promise<void> &runPromise,
385	std::atomic<unsigned> &inflight,
386	std::atomic<int> &dispatched, unsigned warmUp,
387	llvm::Timer restRunsTimer = nullptr*,
388	llvm::Timer firstRunsTimer = nullptr*,
389	double bestRunTime = nullptr*) {
390	static std::atomic<unsigned> firstRunsDone(`0`);
391	auto start = TraceEvent::now();
392	if (firstRunsTimer != nullptr && !firstRunsTimer->isRunning() &&
393	firstRunsDone < excludedFirstWarmupRuns) {
394	firstRunsTimer->startTimer();
395	} else if (restRunsTimer != nullptr &&
396	firstRunsDone >= excludedFirstWarmupRuns &&
397	!restRunsTimer->hasTriggered()) {
398	restRunsTimer->startTimer();
399	}
400
401	llvm::Timer bestRunTimer = nullptr*;
402	if (bestRunTime != nullptr) {
403	bestRunTimer = new llvm::Timer ("Best Run", "Best Inference Run");
404	bestRunTimer->startTimer();
405	}
406
407	hostManager->runNetwork(
408	name, std::move(batch),
409	[&runPromise, &inflight, &dispatched, hostManager, name, warmUp,
410	restRunsTimer, firstRunsTimer, bestRunTime, bestRunTimer,
411	start](runtime::RunIdentifierTy, Error err,
412	std::unique_ptr<ExecutionContext> contextPtr) {
413	EXIT_ON_ERR(std::move(err));
414	if (!tracePath.empty()) {
415	if (!warmUp) {
416	std::lock_guard<std::mutex> l(eventLock);
417	// Temporary (AIBench relies on inference_e2e metric)
418	// Later we switch AIBench to the metric from
419	// HostManager::dispatchNextRun()
420	traceContext ->logCompleteTraceEvent("inference_e2e",
421	TraceLevel::RUNTIME, start);
422	// Merge this run's TraceEvents into the global
423	// TraceContext.
424	traceContext ->merge(contextPtr ->getTraceContext());
425	} else {
426	contextPtr ->getTraceContext()->getTraceEvents().clear();
427	}
428	}
429	firstRunsDone ++;
430	if (firstRunsTimer != nullptr && firstRunsTimer->isRunning() &&
431	firstRunsDone == excludedFirstWarmupRuns) {
432	firstRunsTimer->stopTimer();
433	}
434	if (bestRunTime != nullptr) {
435	bestRunTimer->stopTimer();
436	double wallTime = bestRunTimer->getTotalTime().getWallTime();
437	if (wallTime < *bestRunTime)
438	*bestRunTime = wallTime;
439	bestRunTimer->clear();
440	delete bestRunTimer;
441	}
442
443	// Kick off another run.
444	if (dispatched.fetch_sub(`1`) > `0`) {
445	inflight ++;
446	runInference(hostManager, name, std::move(contextPtr), runPromise,
447	inflight, dispatched, warmUp > `0` ? warmUp - `1` : `0`,
448	restRunsTimer, firstRunsTimer, bestRunTime);
449	} else if (restRunsTimer != nullptr) {
450	restRunsTimer->stopTimer();
451	}
452
453	if (--inflight == `0`) {
454	runPromise.set_value();
455	}
456	});
457	}
458
459	/// Run the requested number of benchmark requests \p requestCount prepended by
460	/// \p warmUp cycles
461	/// through the HostManager from the \p loader using the provided context pool
462	/// \p contexts and wait for all runs to complete.
463	void runBenchmark(std::string name, Loader &loader,
464	std::vector<std::unique_ptr<ExecutionContext>> contexts,
465	unsigned requestCount, unsigned warmUp,
466	llvm::Timer restRunsTimer = nullptr*,
467	llvm::Timer firstRunsTimer = nullptr*,
468	double bestRunTime = nullptr*) {
469	runtime::HostManager *hostManager = loader.getHostManager();
470	std::atomic<unsigned> inflight(`0`);
471	std::atomic<int> dispatched(requestCount + warmUp * contexts.size());
472	std::promise<void> runPromise;
473	auto fut = runPromise.get_future();
474
475	// Kick off initial pool of requests.
476	for (size_t i = `0`, e = contexts.size(); i < e; i++) {
477	auto batch = std::move(contexts [i]);
478	inflight ++;
479	dispatched --;
480	runInference(hostManager, name, std::move(batch), runPromise, inflight,
481	dispatched, warmUp, restRunsTimer, firstRunsTimer,
482	bestRunTime);
483	}
484
485	// Wait for all to finish.
486	fut.wait();
487	}
488

Browse the source code of glow/tools/loader/ExecutorCoreHelperFunctions.cpp