Flags.cpp source code [glow/lib/Flags/Flags.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "glow/Flags/Flags.h"
18
19	#include "llvm/ADT/SmallVector.h"
20	#include "llvm/ADT/StringRef.h"
21	#include "llvm/Support/CommandLine.h"
22	#include <gflags/gflags.h>
23	#include <glog/logging.h>
24	#include <map>
25
26	/ Flags should generally go in as specific of namespace as makes sense.*
27	* That is, if a flag is specific to torch_glow, it should go in the
28	* flags::torch_glow namespace. Flags that have a generic nature, but are not
29	* supported in specific contexts, can go in a specific domain. An example is
30	* AcceptUnarySLS living in the glow::nnpi::flags namespace, as that's the only
31	* domain for which it is supported. In the same vein, it is encouraged to make
32	* flags as generic as is possible.
33	*/
34	namespace glow {
35	namespace flags {
36
37	// Generic Constants
38	int32_t NumDevices = `1`;
39	bool ScanDevices = false;
40	bool SaturateHost = false;
41	bool EnableQuantParamChanges = true;
42	size_t MaxActiveRequests = `48`;
43	size_t MaxActiveRequestsPerInstance = `48`;
44	size_t MaxQueueSize = `200`;
45	size_t ExecutorThreads = `10`;
46	bool DelayAndRecordConstantModification = false;
47	bool UseTrackedDummyQuantParams = false;
48	bool EnablePartialTensors = true;
49	bool UseCustomOpsForExport = true;
50	std::string BackendSpecificOpts = "";
51	bool EnableLoadBalancedPartitioning = true;
52	bool SkipProvisioning = false;
53	bool DisableLayoutVerifying = false;
54	bool DisableFreeCompilationResource = false;
55	bool SinkTanhBelowConcat = false;
56
57	// FP16 Constants
58	bool ConvertToFP16 = false;
59	bool SkipBiasFp32tofp16Convert = false;
60	bool ConvertPlaceholdersToFP16 = false;
61	bool ConvertConstantsToFP16 = true;
62	bool ConvertFusedScaleOffsetToFP16 = false;
63	bool ClipToFP16 = false;
64	bool SkipInputsOnClipToFP16 = true;
65	bool ForceSLSToFP16Accum = true;
66	bool ClipQuantRangeToFP16 = false;
67	bool ClipZeroScaleFP16 = false;
68
69	// Fp32 constants
70	bool ConvertFusedScaleOffsetToFP32 = false;
71
72	// Debug Constants
73	int32_t NumDebugTracesPerDump = `100`;
74	bool DumpDebugTraces = false;
75	bool LogPartition = true;
76	bool DumpPartition = false;
77	bool DumpCompilationLog = false;
78	bool DumpBackendSpecificIRJSON = false;
79	bool DumpGraph = false;
80	std::string DumpGraphPath = "./";
81	bool DumpInitialLoadedGraph = false;
82
83	// Sparse NN Partitioning Scheme Constants
84	int32_t SparseNNPartitioningSchemeNumCards = `1`;
85	int64_t SparseNNPartitioningSchemeSLSTableKBytesPerCard = `1`;
86	int32_t SparseNNPartitioningSchemeNumCoresSLS = `1`;
87	int32_t SparseNNPartitioningSchemeNumCoresOther = `1`;
88	bool UseSparseNNPartitioningScheme = false;
89	bool SparseNNPartitioningAddSLSConcats = false;
90	bool SparseNNPartitioningBalancePerfModel = false;
91	bool SparseNNPartitioningPairLNWithSLS = false;
92	bool SparseNNPartitioningPairTileWithSLS = false;
93	std::string SparseNNPartitioningPairSLSWith = "";
94	int32_t SparseNNPartitioningConcatSplitSize = `1`;
95	bool SparseNNParallelizeReshapeOnBatchDim = true;
96
97	// Dag Optimizer Constants
98	bool UseDAGOptimizer = false;
99	int32_t DAGOptimizerNumParallelChunks = `1`;
100	std::string DAGOptimizerPlacementTaggingAlgorithm = "None";
101	std::string DAGOptimizerParallelizationTaggingAlgorithm = "None";
102
103	} // namespace flags
104	} // namespace glow
105
106	namespace glow {
107	namespace nnpi {
108	namespace flags {
109	int32_t ModelParallelSplitAlignment = `1`;
110	int32_t NumParallelChunks = `0`; // Zero val for an ugly hack in NNPI.cpp
111	bool LowerAllBatchMatMul = false;
112	bool AcceptUnarySLS = false;
113	bool SpecializeAllOneSLS = false;
114	bool DisableTransforms = false;
115	bool EnableCustomIAKernels = false;
116	bool EnableCustomDSPKernels = false;
117	bool DumpCompilerData = false;
118	bool UsePerPartitionIcetConfig = false;
119	std::string InjectedIAOpKernelPath = "";
120	bool DumpCustomKernelFiles = false;
121
122	} // namespace flags
123	} // namespace nnpi
124	} // namespace glow
125
126	namespace glow {
127	namespace interpreter {
128	namespace flags {
129	bool LowerBatchMatMul = true;
130	bool LowerLayerNormalization = true;
131	} // namespace flags
132	} // namespace interpreter
133	} // namespace glow
134
135	namespace glow {
136	namespace torch_glow {
137	namespace flags {
138	bool ImaginaryFlag = false; // Placeholder flag
139	}
140	} // namespace torch_glow
141	} // namespace glow
142
143	namespace glow {
144	namespace onnxifi {
145	namespace flags {
146	std::string BackendName = "";
147	bool SaveModel = false;
148	bool SaveIO = false;
149	bool SaveDAG = false;
150	bool SaveDAGWithConstants = false;
151	bool SaveDAGInZipMode = false;
152	} // namespace flags
153	} // namespace onnxifi
154	} // namespace glow
155
156	namespace glow {
157	namespace runtime {
158	namespace flags {
159
160	unsigned CPUMemory = `0`;
161	unsigned HabanaMemory = `7` << `20`;
162	unsigned NNPIMemory = `16` << `20`;
163	unsigned NNPITimeoutMs = `0`;
164
165	std::string AvailableDevices = "";
166	unsigned InterpreterMemory = `0`;
167	bool EnableP2P = false;
168	bool EnableDRT = false;
169	unsigned DeviceInitTimeoutMs = `5000`;
170	unsigned SanitizeInputsPercent = `0`;
171	uint64_t BigTableThresholdBytes = `104857600`; // 100MB
172	unsigned NumCompilationThreads = `1`;
173	} // namespace flags
174	} // namespace runtime
175	} // namespace glow
176
177	/*
178	* Note: Validators are used to assign instead of direct assignment because
179	* direct assignment seems to result in a static order initialization fiasco.
180	*/
181	DEFINE_int32(glow_num_devices, glow::flags::NumDevices,
182	"Number of devices for Glow backend");
183	DEFINE_validator(glow_num_devices, [](const char *, int32_t val) {
184	glow::flags::NumDevices = val;
185	return true;
186	});
187	DEFINE_bool(glow_scan_devices, glow::flags::ScanDevices,
188	"Scan available devices for Glow backend");
189	DEFINE_validator(glow_scan_devices, [](const char , bool* val) {
190	glow::flags::ScanDevices = val;
191	return true;
192	});
193	DEFINE_int32(glow_snn_partitioning_num_cards,
194	glow::flags::SparseNNPartitioningSchemeNumCards,
195	"Number of devices to distribute tables across in SparseNN "
196	"partitioning");
197	DEFINE_validator(glow_snn_partitioning_num_cards,
198	[](const char *, int32_t val) {
199	glow::flags::SparseNNPartitioningSchemeNumCards = val;
200	return true;
201	});
202	DEFINE_int32(glow_snn_partitioning_kbytes_per_card,
203	glow::flags::SparseNNPartitioningSchemeSLSTableKBytesPerCard,
204	"Bytes per card used for SLS tables in SparseNN partitioning");
205	DEFINE_validator(glow_snn_partitioning_kbytes_per_card, [](const char *,
206	int32_t val) {
207	glow::flags::SparseNNPartitioningSchemeSLSTableKBytesPerCard = val;
208	return true;
209	});
210	DEFINE_int32(
211	glow_snn_partitioning_num_cores_sls,
212	glow::flags::SparseNNPartitioningSchemeNumCoresSLS,
213	"Number of cores to assign to SLS partition in SparseNN partitioning");
214	DEFINE_validator(glow_snn_partitioning_num_cores_sls,
215	[](const char *, int32_t val) {
216	glow::flags::SparseNNPartitioningSchemeNumCoresSLS = val;
217	return true;
218	});
219	DEFINE_int32(
220	glow_snn_partitioning_num_cores_other,
221	glow::flags::SparseNNPartitioningSchemeNumCoresOther,
222	"Number of cores to assign to non-SLS partition in SparseNN partitioning");
223	DEFINE_validator(glow_snn_partitioning_num_cores_other,
224	[](const char *, int32_t val) {
225	glow::flags::SparseNNPartitioningSchemeNumCoresOther = val;
226	return true;
227	});
228	DEFINE_bool(glow_dump_debug_traces, glow::flags::DumpDebugTraces,
229	"Dump traces to /tmp");
230	DEFINE_validator(glow_dump_debug_traces, [](const char , bool* val) {
231	glow::flags::DumpDebugTraces = val;
232	return true;
233	});
234	DEFINE_int32(glow_num_debug_traces_per_dump, glow::flags::NumDebugTracesPerDump,
235	"Maximum number of traces in each debug dump.");
236	DEFINE_validator(glow_num_debug_traces_per_dump, [](const char *, int32_t val) {
237	glow::flags::NumDebugTracesPerDump = val;
238	return true;
239	});
240	DEFINE_string(glow_onnxifi_backend, glow::onnxifi::flags::BackendName,
241	"Glow backend used for ONNXIFI");
242	DEFINE_validator(glow_onnxifi_backend,
243	[](const char , const* std::string &val) {
244	glow::onnxifi::flags::BackendName = val;
245	return true;
246	});
247	DEFINE_string(
248	glow_available_devices, glow::runtime::flags::AvailableDevices,
249	"Comma separated list of devices which should be used, example 2,3,4");
250	DEFINE_validator(glow_available_devices,
251	[](const char , const* std::string &val) {
252	glow::runtime::flags::AvailableDevices = val;
253	return true;
254	});
255	DEFINE_bool(glow_global_fp16, glow::flags::ConvertToFP16,
256	"Enable fp16 lowering for all ops on the net");
257	DEFINE_validator(glow_global_fp16, [](const char , bool* val) {
258	glow::flags::ConvertToFP16 = val;
259	return true;
260	});
261	DEFINE_bool(glow_skip_bias_fp32tofp16_convert,
262	glow::flags::SkipBiasFp32tofp16Convert,
263	"Skip fp32 -> fp16 convertion for Bias in FC");
264	DEFINE_validator(glow_skip_bias_fp32tofp16_convert, [](const char , bool* val) {
265	glow::flags::SkipBiasFp32tofp16Convert = val;
266	return true;
267	});
268	DEFINE_bool(torch_glow_imaginary_flag, glow::torch_glow::flags::ImaginaryFlag,
269	"Enable fp16 lowering for all ops on the net");
270	DEFINE_validator(torch_glow_imaginary_flag, [](const char , bool* val) {
271	glow::torch_glow::flags::ImaginaryFlag = val;
272	return true;
273	});
274	DEFINE_bool(glow_global_fp16_placeholders,
275	glow::flags::ConvertPlaceholdersToFP16,
276	"Enable fp16 conversion for Placeholders");
277	DEFINE_validator(glow_global_fp16_placeholders, [](const char , bool* val) {
278	glow::flags::ConvertPlaceholdersToFP16 = val;
279	return true;
280	});
281	DEFINE_bool(glow_global_fp16_constants, glow::flags::ConvertConstantsToFP16,
282	"Enable fp16 conversion for Constants");
283	DEFINE_validator(glow_global_fp16_constants, [](const char , bool* val) {
284	glow::flags::ConvertConstantsToFP16 = val;
285	return true;
286	});
287	DEFINE_bool(glow_global_fused_scale_offset_fp16,
288	glow::flags::ConvertFusedScaleOffsetToFP16,
289	"Enable fp16 lowering for all op inputs using fused scale/offset");
290	DEFINE_validator(glow_global_fused_scale_offset_fp16,
291	[](const char , bool* val) {
292	glow::flags::ConvertFusedScaleOffsetToFP16 = val;
293	return true;
294	});
295	DEFINE_bool(
296	glow_global_fused_scale_offset_fp32,
297	glow::flags::ConvertFusedScaleOffsetToFP32,
298	"Enable converting scale/offset in sls's input data from fp16 to fp32");
299	DEFINE_validator(glow_global_fused_scale_offset_fp32,
300	[](const char , bool* val) {
301	glow::flags::ConvertFusedScaleOffsetToFP32 = val;
302	return true;
303	});
304	DEFINE_bool(
305	glow_global_force_sls_fp16_accum, glow::flags::ForceSLSToFP16Accum,
306	"Force all SLS/SLWS ops to use FP16 accumulation. True by default.");
307	DEFINE_validator(glow_global_force_sls_fp16_accum, [](const char , bool* val) {
308	glow::flags::ForceSLSToFP16Accum = val;
309	return true;
310	});
311	DEFINE_bool(glow_enable_quant_param_changes,
312	glow::flags::EnableQuantParamChanges,
313	"Enable quantization param changes during optimizations");
314	DEFINE_validator(glow_enable_quant_param_changes, [](const char , bool* val) {
315	glow::flags::EnableQuantParamChanges = val;
316	return true;
317	});
318	DEFINE_bool(glow_use_sparsenn_partitioning_scheme,
319	glow::flags::UseSparseNNPartitioningScheme,
320	"Force glow to use SparseNN partitioning scheme");
321	DEFINE_validator(glow_use_sparsenn_partitioning_scheme,
322	[](const char , bool* val) {
323	glow::flags::UseSparseNNPartitioningScheme = val;
324	return true;
325	});
326	DEFINE_bool(glow_sparsenn_partitioning_add_sls_concats,
327	glow::flags::SparseNNPartitioningAddSLSConcats,
328	"Add extra concats inside of SLS partitions for more efficient "
329	"inter-partitition transfers");
330	DEFINE_validator(glow_sparsenn_partitioning_add_sls_concats,
331	[](const char , bool* val) {
332	glow::flags::SparseNNPartitioningAddSLSConcats = val;
333	return true;
334	});
335	DEFINE_bool(glow_sparsenn_partitioning_balance_perf_model,
336	glow::flags::SparseNNPartitioningBalancePerfModel,
337	"Balance SLS tables across cards using a perf model");
338	DEFINE_validator(glow_sparsenn_partitioning_balance_perf_model,
339	[](const char , bool* val) {
340	glow::flags::SparseNNPartitioningBalancePerfModel = val;
341	return true;
342	});
343	DEFINE_bool(glow_sparsenn_partitioning_pair_ln_with_sls,
344	glow::flags::SparseNNPartitioningPairLNWithSLS,
345	"Put layer normalization nodes immediately following SLS into SLS "
346	"Partitions");
347	DEFINE_validator(glow_sparsenn_partitioning_pair_ln_with_sls,
348	[](const char , bool* val) {
349	glow::flags::SparseNNPartitioningPairLNWithSLS = val;
350	return true;
351	});
352	DEFINE_bool(
353	glow_sparsenn_partitioning_pair_tile_with_sls,
354	glow::flags::SparseNNPartitioningPairTileWithSLS,
355	"Put tile nodes immediately following SLS for user embeddings into SLS "
356	"Partitions");
357	DEFINE_validator(glow_sparsenn_partitioning_pair_tile_with_sls,
358	[](const char , bool* val) {
359	glow::flags::SparseNNPartitioningPairTileWithSLS = val;
360	return true;
361	});
362	DEFINE_string(
363	glow_sparsenn_partitioning_pair_sls_with,
364	glow::flags::SparseNNPartitioningPairSLSWith,
365	"Put nodes specified immediately following SLS into SLS partitions."
366	"Supported for LayerNorm, Tile, Concat, and Tanh nodes"
367	"Comma separated list of node names, e.g. LayerNorm,Tile.");
368	DEFINE_validator(glow_sparsenn_partitioning_pair_sls_with,
369	[](const char , const* std::string &val) {
370	glow::flags::SparseNNPartitioningPairSLSWith = val;
371	return true;
372	});
373	DEFINE_int32(glow_sparsenn_partitioning_concat_split_size,
374	glow::flags::SparseNNPartitioningConcatSplitSize,
375	"The number of inputs to split each concat to be moved into SLS "
376	"partitions to");
377	DEFINE_validator(glow_sparsenn_partitioning_concat_split_size,
378	[](const char , const* int32_t val) {
379	glow::flags::SparseNNPartitioningConcatSplitSize = val;
380	return true;
381	});
382	DEFINE_bool(glow_sparsenn_parallelize_reshape_on_batch_dim,
383	glow::flags::SparseNNParallelizeReshapeOnBatchDim,
384	"Force parallelizing the reshape operators on the batch dimension");
385	DEFINE_validator(glow_sparsenn_parallelize_reshape_on_batch_dim,
386	[](const char , bool* val) {
387	glow::flags::SparseNNParallelizeReshapeOnBatchDim = val;
388	return true;
389	});
390	DEFINE_bool(glow_clip_fp16, glow::flags::ClipToFP16,
391	"Force glow to clip fp16 values to min/max");
392	DEFINE_validator(glow_clip_fp16, [](const char , bool* val) {
393	glow::flags::ClipToFP16 = val;
394	return true;
395	});
396	DEFINE_bool(glow_clip_fp16_skip_inputs, glow::flags::SkipInputsOnClipToFP16,
397	"Force glow to skip clipping fp16 Node inputs to min/max");
398	DEFINE_validator(glow_clip_fp16_skip_inputs, [](const char , bool* val) {
399	glow::flags::SkipInputsOnClipToFP16 = val;
400	return true;
401	});
402	DEFINE_bool(glow_saturate_host, glow::flags::SaturateHost,
403	"Try to use all available devices on the host");
404	DEFINE_validator(glow_saturate_host, [](const char , bool* val) {
405	glow::flags::SaturateHost = val;
406	return true;
407	});
408	DEFINE_bool(
409	glow_save_onnxifi_dag, glow::onnxifi::flags::SaveDAG,
410	"Whether to serialize the DAG that has been optimized and partitioned.");
411	DEFINE_validator(glow_save_onnxifi_dag, [](const char , bool* val) {
412	glow::onnxifi::flags::SaveDAG = val;
413	return true;
414	});
415	DEFINE_bool(glow_save_onnxifi_dag_with_constants,
416	glow::onnxifi::flags::SaveDAGWithConstants,
417	"Whether to serialize constants in the DAG that has been optimized "
418	"and partitioned.");
419	DEFINE_validator(glow_save_onnxifi_dag_with_constants,
420	[](const char , bool* val) {
421	glow::onnxifi::flags::SaveDAGWithConstants = val;
422	return true;
423	});
424	DEFINE_bool(glow_save_onnxifi_dag_in_zip_mode,
425	glow::onnxifi::flags::SaveDAGWithConstants,
426	"Whether to serialize the DAG that has been optimized and "
427	"partitioned in ZIP mode.");
428	DEFINE_validator(glow_save_onnxifi_dag_in_zip_mode, [](const char , bool* val) {
429	glow::onnxifi::flags::SaveDAGInZipMode = val;
430	return true;
431	});
432	DEFINE_bool(
433	glow_delay_and_record_constant_modification,
434	glow::flags::DelayAndRecordConstantModification,
435	"Whether to delay and record constant modification for serialization.");
436	DEFINE_validator(glow_delay_and_record_constant_modification,
437	[](const char , bool* val) {
438	glow::flags::DelayAndRecordConstantModification = val;
439	return true;
440	});
441	DEFINE_bool(glow_use_tracked_dummy_quant_params,
442	glow::flags::UseTrackedDummyQuantParams,
443	"Whether to use uniqued dummy quant params when loading the model, "
444	"which are then mapped to loaded names for serialization.");
445	DEFINE_validator(glow_use_tracked_dummy_quant_params,
446	[](const char , bool* val) {
447	glow::flags::UseTrackedDummyQuantParams = val;
448	return true;
449	});
450	DEFINE_bool(glow_clip_zero_scale_fp16, glow::flags::ClipZeroScaleFP16,
451	"Whether to clip qparam scales below 1/65504 to that val.");
452	DEFINE_validator(glow_clip_zero_scale_fp16, [](const char , bool* val) {
453	glow::flags::ClipZeroScaleFP16 = val;
454	return true;
455	});
456	DEFINE_bool(glow_clip_quant_range_to_fp16, glow::flags::ClipQuantRangeToFP16,
457	"Whether to clip quantization parameters inside the fp16 range.");
458	DEFINE_validator(glow_clip_quant_range_to_fp16, [](const char , bool* val) {
459	glow::flags::ClipQuantRangeToFP16 = val;
460	return true;
461	});
462	DEFINE_int32(glow_max_active_requests, glow::flags::MaxActiveRequests,
463	"Number of max active requests before host manager start queuing");
464	DEFINE_validator(glow_max_active_requests, [](const char *, int32_t val) {
465	glow::flags::MaxActiveRequests = val;
466	return true;
467	});
468	DEFINE_int32(glow_max_active_requests_per_instance,
469	glow::flags::MaxActiveRequestsPerInstance,
470	"Number of max active requests per instance of a network.");
471	DEFINE_validator(glow_max_active_requests_per_instance,
472	[](const char *, int32_t val) {
473	glow::flags::MaxActiveRequestsPerInstance = val;
474	return true;
475	});
476	DEFINE_int32(
477	glow_max_queue_size, glow::flags::MaxQueueSize,
478	"Max number of pending requeusts in glow's host manager queue before "
479	"rejecting new request");
480	DEFINE_validator(glow_max_queue_size, [](const char *, int32_t val) {
481	glow::flags::MaxQueueSize = val;
482	return true;
483	});
484	DEFINE_int32(glow_executor_threads, glow::flags::ExecutorThreads,
485	"Number of executor threads for host manager");
486	DEFINE_validator(glow_executor_threads, [](const char *, int32_t val) {
487	glow::flags::ExecutorThreads = val;
488	return true;
489	});
490	DEFINE_bool(glow_partitioner_enable_load_balance,
491	glow::flags::EnableLoadBalancedPartitioning,
492	"Enable a partitioner pass to optimize for load balance in "
493	"addition to memory capacity constraints");
494	DEFINE_validator(glow_partitioner_enable_load_balance,
495	[](const char , bool* val) {
496	glow::flags::EnableLoadBalancedPartitioning = val;
497	return true;
498	});
499	DEFINE_bool(glow_skip_provisioning, glow::flags::SkipProvisioning,
500	"Skip provisioning. Used for AOT opts or debugging.");
501	DEFINE_validator(glow_skip_provisioning, [](const char , bool* val) {
502	glow::flags::SkipProvisioning = val;
503	return true;
504	});
505	DEFINE_bool(glow_sink_tanh_below_concat, glow::flags::SinkTanhBelowConcat,
506	"Sink tanh ops below concat.");
507	DEFINE_validator(glow_sink_tanh_below_concat, [](const char , bool* val) {
508	glow::flags::SinkTanhBelowConcat = val;
509	return true;
510	});
511	DEFINE_bool(glow_save_onnxifi_model, glow::onnxifi::flags::SaveModel,
512	"Package the glow function and weights right before lowering");
513	DEFINE_validator(glow_save_onnxifi_model, [](const char , bool* val) {
514	glow::onnxifi::flags::SaveModel = val;
515	return true;
516	});
517	DEFINE_bool(glow_save_onnxifi_io, glow::onnxifi::flags::SaveIO,
518	"Save the input and output result around ONNXIFI boundary");
519	DEFINE_validator(glow_save_onnxifi_io, [](const char , bool* val) {
520	glow::onnxifi::flags::SaveIO = val;
521	return true;
522	});
523	DEFINE_bool(glow_enable_partial_tensors, glow::flags::EnablePartialTensors,
524	"Save the input and output result around ONNXIFI boundary");
525	DEFINE_validator(glow_enable_partial_tensors, [](const char , bool* val) {
526	glow::flags::EnablePartialTensors = val;
527	return true;
528	});
529	DEFINE_bool(glow_use_custom_ops_for_export, glow::flags::UseCustomOpsForExport,
530	"Use custom ONNX ops when exporting Glow protos.");
531	DEFINE_validator(glow_use_custom_ops_for_export, [](const char , bool* val) {
532	glow::flags::UseCustomOpsForExport = val;
533	return true;
534	});
535	DEFINE_bool(glow_dump_graph, glow::flags::DumpGraph,
536	"Dump the glow Graph into files before compilation");
537	DEFINE_validator(glow_dump_graph, [](const char , bool* val) {
538	glow::flags::DumpGraph = val;
539	return true;
540	});
541	DEFINE_string(glow_dump_graph_path, glow::flags::DumpGraphPath,
542	"Directory path for the dumped graphs.");
543	DEFINE_validator(glow_dump_graph_path,
544	[](const char , const* std::string &val) {
545	glow::flags::DumpGraphPath = val;
546	return true;
547	});
548	DEFINE_bool(glow_dump_initial_loaded_graph, glow::flags::DumpInitialLoadedGraph,
549	"Dump the glow Graph right after onnxification");
550	DEFINE_validator(glow_dump_initial_loaded_graph, [](const char , bool* val) {
551	glow::flags::DumpInitialLoadedGraph = val;
552	return true;
553	});
554	DEFINE_bool(glow_use_dag_optimizer, glow::flags::UseDAGOptimizer,
555	"Whether to call the DAG optimizer");
556	DEFINE_validator(glow_use_dag_optimizer, [](const char , bool* val) {
557	glow::flags::UseDAGOptimizer = val;
558	return true;
559	});
560	DEFINE_int32(glow_dag_optimizer_num_parallel_chunks,
561	glow::flags::DAGOptimizerNumParallelChunks,
562	"Number of parallel chunks for DAGOptimizer parallelization");
563	DEFINE_validator(glow_dag_optimizer_num_parallel_chunks,
564	[](const char *, int32_t val) {
565	glow::flags::DAGOptimizerNumParallelChunks = val;
566	return true;
567	});
568	DEFINE_string(glow_dag_optimizer_placement_tagging_algorithm,
569	glow::flags::DAGOptimizerPlacementTaggingAlgorithm,
570	"Name of placement tagging algorithm to run in DAGOptimizer");
571	DEFINE_validator(glow_dag_optimizer_placement_tagging_algorithm,
572	[](const char , const* std::string &val) {
573	glow::flags::DAGOptimizerPlacementTaggingAlgorithm = val;
574	return true;
575	});
576
577	DEFINE_string(
578	glow_dag_optimizer_parallelization_tagging_algorithm,
579	glow::flags::DAGOptimizerParallelizationTaggingAlgorithm,
580	"Name of parallelization tagging algorithm to run in DAGOptimizer");
581	DEFINE_validator(glow_dag_optimizer_parallelization_tagging_algorithm,
582	[](const char , const* std::string &val) {
583	glow::flags::DAGOptimizerParallelizationTaggingAlgorithm =
584	val;
585	return true;
586	});
587	// Defined in glow/lib/Backends/NNPI/NNPI.cpp
588	DEFINE_bool(glow_use_per_partition_icet_config,
589	glow::nnpi::flags::UsePerPartitionIcetConfig,
590	"Read an icet_config.json file for each partition");
591	DEFINE_validator(glow_use_per_partition_icet_config,
592	[](const char , bool* val) {
593	glow::nnpi::flags::UsePerPartitionIcetConfig = val;
594	return true;
595	});
596	DEFINE_bool(glow_dump_nnpi_compiler_data, glow::nnpi::flags::DumpCompilerData,
597	"Dump the NNPI compiler data into files before NNPI compilation");
598	DEFINE_validator(glow_dump_nnpi_compiler_data, [](const char , bool* val) {
599	glow::nnpi::flags::DumpCompilerData = val;
600	return true;
601	});
602	DEFINE_bool(glow_nnpi_specialize_all_one_sls,
603	glow::nnpi::flags::SpecializeAllOneSLS,
604	"Whether to import SLS ops with AllOne attribute to NNPI.");
605	DEFINE_validator(glow_nnpi_specialize_all_one_sls, [](const char , bool* val) {
606	glow::nnpi::flags::SpecializeAllOneSLS = val;
607	return true;
608	});
609	DEFINE_bool(glow_disable_nnpi_transforms, glow::nnpi::flags::DisableTransforms,
610	"Disable running NNPIBackend::transformPostLowering().");
611	DEFINE_validator(glow_disable_nnpi_transforms, [](const char , bool* val) {
612	glow::nnpi::flags::DisableTransforms = val;
613	return true;
614	});
615	DEFINE_bool(glow_enable_nnpi_custom_ia_kernels,
616	glow::nnpi::flags::EnableCustomIAKernels,
617	"Enable running NNPIBackend::transformPrivate().");
618	DEFINE_validator(glow_enable_nnpi_custom_ia_kernels,
619	[](const char , bool* val) {
620	glow::nnpi::flags::EnableCustomIAKernels = val;
621	return true;
622	});
623	DEFINE_bool(glow_enable_nnpi_custom_dsp_kernels,
624	glow::nnpi::flags::EnableCustomDSPKernels,
625	"Enable running NNPIBackend::transformPrivate().");
626	DEFINE_validator(glow_enable_nnpi_custom_dsp_kernels,
627	[](const char , bool* val) {
628	glow::nnpi::flags::EnableCustomDSPKernels = val;
629	return true;
630	});
631
632	DEFINE_string(glow_injected_ia_op_kernel_path,
633	glow::nnpi::flags::InjectedIAOpKernelPath,
634	"Path to IA kernels library to use");
635	DEFINE_validator(glow_injected_ia_op_kernel_path,
636	[](const char , const* std::string &val) {
637	glow::nnpi::flags::InjectedIAOpKernelPath = val;
638	return true;
639	});
640
641	DEFINE_bool(glow_dump_custom_kernel_files,
642	glow::nnpi::flags::DumpCustomKernelFiles,
643	"Enable dumping the compiled custom IA and DSP kernels to file.");
644	DEFINE_validator(glow_dump_custom_kernel_files, [](const char , bool* val) {
645	glow::nnpi::flags::DumpCustomKernelFiles = val;
646	return true;
647	});
648
649	DEFINE_bool(glow_nnpi_lower_all_batch_matmul,
650	glow::nnpi::flags::LowerAllBatchMatMul,
651	"Whether to override default lowering for NNPI and always lower "
652	"BatchMatMul to a series of MatMuls.");
653	DEFINE_validator(glow_nnpi_lower_all_batch_matmul, [](const char , bool* val) {
654	glow::nnpi::flags::LowerAllBatchMatMul = val;
655	return true;
656	});
657	DEFINE_bool(glow_nnpi_accept_unary_sls, glow::nnpi::flags::AcceptUnarySLS,
658	"Whether to accept unary SLS ops during ONNXIFI loading.");
659	DEFINE_validator(glow_nnpi_accept_unary_sls, [](const char , bool* val) {
660	glow::nnpi::flags::AcceptUnarySLS = val;
661	return true;
662	});
663	DEFINE_int32(glow_nnpi_num_parallel_chunks,
664	glow::nnpi::flags::NumParallelChunks,
665	"Number of parallel chunks for NNPI");
666	DEFINE_validator(glow_nnpi_num_parallel_chunks, [](const char *, int32_t val) {
667	glow::nnpi::flags::NumParallelChunks = val;
668	return true;
669	});
670	DEFINE_int32(glow_nnpi_model_parallel_split_alignment,
671	glow::nnpi::flags::ModelParallelSplitAlignment,
672	"Alignment value for model parallel splits");
673	DEFINE_validator(glow_nnpi_model_parallel_split_alignment,
674	[](const char *, int32_t val) {
675	glow::nnpi::flags::ModelParallelSplitAlignment = val;
676	return true;
677	});
678	DEFINE_int32(glow_nnpi_memory, glow::runtime::flags::NNPIMemory,
679	"Amount of DRAM to allocate per NNPI device in KiB");
680	DEFINE_validator(glow_nnpi_memory, [](const char *, int32_t val) {
681	glow::runtime::flags::NNPIMemory = val;
682	return true;
683	});
684	DEFINE_int32(glow_nnpi_timeout_ms, glow::runtime::flags::NNPITimeoutMs,
685	"Timeout threshold for inferecnce in milliseconds. Default 0 "
686	"means infinity");
687	DEFINE_validator(glow_nnpi_timeout_ms, [](const char *, int32_t val) {
688	glow::runtime::flags::NNPITimeoutMs = val;
689	return true;
690	});
691
692	DEFINE_bool(glow_interpreter_lower_batch_matmul,
693	glow::interpreter::flags::LowerBatchMatMul,
694	"Lower batch matmul node.");
695	DEFINE_validator(glow_interpreter_lower_batch_matmul,
696	[](const char , bool* val) {
697	glow::interpreter::flags::LowerBatchMatMul = val;
698	return true;
699	});
700	DEFINE_bool(glow_interpreter_lower_layer_normalization,
701	glow::interpreter::flags::LowerLayerNormalization,
702	"Lower layer normalization node.");
703	DEFINE_validator(glow_interpreter_lower_layer_normalization,
704	[](const char , bool* val) {
705	glow::interpreter::flags::LowerLayerNormalization = val;
706	return true;
707	});
708
709	DEFINE_int32(glow_interpreter_memory, glow::runtime::flags::InterpreterMemory,
710	"Amount of DRAM to allocate per Interpreter in KiB");
711	DEFINE_validator(glow_interpreter_memory, [](const char *, int32_t val) {
712	glow::runtime::flags::InterpreterMemory = val;
713	return true;
714	});
715	DEFINE_int32(glow_cpu_memory, glow::runtime::flags::CPUMemory,
716	"Amount of DRAM to allocate per CPU in KiB");
717	DEFINE_validator(glow_cpu_memory, [](const char *, int32_t val) {
718	glow::runtime::flags::CPUMemory = val;
719	return true;
720	});
721
722	DEFINE_int32(glow_habana_memory, glow::runtime::flags::HabanaMemory,
723	"Amount of DRAM to allocate per Habana device in KiB");
724	DEFINE_validator(glow_habana_memory, [](const char *, int32_t val) {
725	glow::runtime::flags::HabanaMemory = val;
726	return true;
727	});
728
729	DEFINE_int32(
730	glow_num_compilation_threads, glow::runtime::flags::NumCompilationThreads,
731	"Maximum number of threads to spawn per call to Backend::compileFunctions");
732	DEFINE_validator(glow_num_compilation_threads, [](const char *, int32_t val) {
733	glow::runtime::flags::NumCompilationThreads = val;
734	return true;
735	});
736
737	DEFINE_bool(glow_log_partition, glow::flags::LogPartition,
738	"Enable logging partition info");
739	DEFINE_validator(glow_log_partition, [](const char , bool* val) {
740	glow::flags::LogPartition = val;
741	return true;
742	});
743	DEFINE_bool(glow_enable_p2p, glow::runtime::flags::EnableP2P,
744	"Enable peer to peer support");
745	DEFINE_validator(glow_enable_p2p, [](const char , bool* val) {
746	glow::runtime::flags::EnableP2P = val;
747	return true;
748	});
749	DEFINE_bool(glow_enable_drt, glow::runtime::flags::EnableDRT,
750	"Enable device resident tensor support");
751	DEFINE_validator(glow_enable_drt, [](const char , bool* val) {
752	glow::runtime::flags::EnableDRT = val;
753	return true;
754	});
755	DEFINE_int32(glow_device_init_timeout_ms,
756	glow::runtime::flags::DeviceInitTimeoutMs,
757	"Timeout threshold for device initialization in milliseconds. "
758	"Default 5000");
759	DEFINE_validator(glow_device_init_timeout_ms, [](const char *, int32_t val) {
760	glow::runtime::flags::DeviceInitTimeoutMs = val;
761	return true;
762	});
763	DEFINE_uint64(
764	glow_partition_big_table_threshold_bytes,
765	glow::runtime::flags::BigTableThresholdBytes,
766	"Threshold to determin big tables, and used in partitioning algorithm. "
767	"Default 104857600(100MB)");
768	DEFINE_validator(glow_partition_big_table_threshold_bytes,
769	[](const char *, uint64_t val) {
770	glow::runtime::flags::BigTableThresholdBytes = val;
771	return true;
772	});
773	DEFINE_int32(glow_enable_sanitize_inputs,
774	glow::runtime::flags::SanitizeInputsPercent,
775	"Sanitize a percentage of inferences");
776	DEFINE_validator(glow_enable_sanitize_inputs, [](const char *, int32_t val) {
777	if (val < `0` \|\| val > `100`) {
778	return false;
779	}
780
781	glow::runtime::flags::SanitizeInputsPercent = val;
782	return true;
783	});
784
785	DEFINE_bool(glow_dump_partition, glow::flags::DumpPartition,
786	"Enable dumping the graph of each partition");
787	DEFINE_validator(glow_dump_partition, [](const char , bool* val) {
788	glow::flags::DumpPartition = val;
789	return true;
790	});
791	DEFINE_bool(glow_dump_compilation_log, glow::flags::DumpCompilationLog,
792	"Dump the glow compilation log into /tmp during compilation");
793	DEFINE_validator(glow_dump_compilation_log, [](const char , bool* val) {
794	glow::flags::DumpCompilationLog = val;
795	return true;
796	});
797	DEFINE_bool(glow_dump_backend_specific_ir_json,
798	glow::flags::DumpBackendSpecificIRJSON,
799	"Dump the backend-specific IR JSON file");
800	DEFINE_validator(glow_dump_backend_specific_ir_json,
801	[](const char , bool* val) {
802	glow::flags::DumpBackendSpecificIRJSON = val;
803	return true;
804	});
805	DEFINE_string(glow_backend_specific_opts, glow::flags::BackendSpecificOpts,
806	"Glow backend specific options. Comma separated list of "
807	"key=value pairs, e.g. key1=val1,key2=val2.");
808	DEFINE_validator(glow_backend_specific_opts,
809	[](const char , const* std::string &val) {
810	glow::flags::BackendSpecificOpts = val;
811	return true;
812	});
813
814	bool glow::flags::processBackendSpecificOpts(
815	std::map<std::string, std::string> &optsMap, llvm::StringRef optsStr) {
816	if (optsStr.empty()) {
817	return true;
818	}
819	llvm::SmallVector<llvm::StringRef, `4`> splitOpts;
820	optsStr.split(splitOpts, `','`);
821
822	for (const llvm::StringRef &opt : splitOpts) {
823	LOG(INFO) << "Adding backend specific option: " << opt.str();
824	auto keyValPair = opt.split(`'='`);
825	if (keyValPair.second.empty()) {
826	LOG(ERROR) << "No '=' found in backend-specific opt " << opt.str();
827	return false;
828	}
829	optsMap.emplace(keyValPair.first, keyValPair.second);
830	}
831	return true;
832	}
833
834	namespace {
835	llvm::cl::OptionCategory flagsLibCat("Glow Flags Lib CmdLine Options");
836	/// Allows enabling DRT support.
837	llvm::cl::opt<bool, / ExternalStorage / true>
838	enableDRT("enable-DRT",
839	llvm::cl::desc (
840	"Deprecated. Enabled DRT support. Alias to glow_enable_drt."),
841	llvm::cl::Optional,
842	llvm::cl::location(glow::runtime::flags::EnableDRT),
843	llvm::cl::cat (flagsLibCat));
844
845	/// Allows enabling P2P support.
846	llvm::cl::opt<bool, / ExternalStorage / true>
847	enableP2P("enable-P2P",
848	llvm::cl::desc (
849	"Deprecated. Enabled P2P support. Alias to glow_enable_drt."),
850	llvm::cl::Optional,
851	llvm::cl::location(glow::runtime::flags::EnableP2P),
852	llvm::cl::cat (flagsLibCat));
853	} // namespace
854

Browse the source code of glow/lib/Flags/Flags.cpp