1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "glow/Flags/Flags.h"
18
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/Support/CommandLine.h"
22#include <gflags/gflags.h>
23#include <glog/logging.h>
24#include <map>
25
26/* Flags should generally go in as specific of namespace as makes sense.
27 * That is, if a flag is specific to torch_glow, it should go in the
28 * flags::torch_glow namespace. Flags that have a generic nature, but are not
29 * supported in specific contexts, can go in a specific domain. An example is
30 * AcceptUnarySLS living in the glow::nnpi::flags namespace, as that's the only
31 * domain for which it is supported. In the same vein, it is encouraged to make
32 * flags as generic as is possible.
33 */
34namespace glow {
35namespace flags {
36
37// Generic Constants
38int32_t NumDevices = 1;
39bool ScanDevices = false;
40bool SaturateHost = false;
41bool EnableQuantParamChanges = true;
42size_t MaxActiveRequests = 48;
43size_t MaxActiveRequestsPerInstance = 48;
44size_t MaxQueueSize = 200;
45size_t ExecutorThreads = 10;
46bool DelayAndRecordConstantModification = false;
47bool UseTrackedDummyQuantParams = false;
48bool EnablePartialTensors = true;
49bool UseCustomOpsForExport = true;
50std::string BackendSpecificOpts = "";
51bool EnableLoadBalancedPartitioning = true;
52bool SkipProvisioning = false;
53bool DisableLayoutVerifying = false;
54bool DisableFreeCompilationResource = false;
55bool SinkTanhBelowConcat = false;
56
57// FP16 Constants
58bool ConvertToFP16 = false;
59bool SkipBiasFp32tofp16Convert = false;
60bool ConvertPlaceholdersToFP16 = false;
61bool ConvertConstantsToFP16 = true;
62bool ConvertFusedScaleOffsetToFP16 = false;
63bool ClipToFP16 = false;
64bool SkipInputsOnClipToFP16 = true;
65bool ForceSLSToFP16Accum = true;
66bool ClipQuantRangeToFP16 = false;
67bool ClipZeroScaleFP16 = false;
68
69// Fp32 constants
70bool ConvertFusedScaleOffsetToFP32 = false;
71
72// Debug Constants
73int32_t NumDebugTracesPerDump = 100;
74bool DumpDebugTraces = false;
75bool LogPartition = true;
76bool DumpPartition = false;
77bool DumpCompilationLog = false;
78bool DumpBackendSpecificIRJSON = false;
79bool DumpGraph = false;
80std::string DumpGraphPath = "./";
81bool DumpInitialLoadedGraph = false;
82
83// Sparse NN Partitioning Scheme Constants
84int32_t SparseNNPartitioningSchemeNumCards = 1;
85int64_t SparseNNPartitioningSchemeSLSTableKBytesPerCard = 1;
86int32_t SparseNNPartitioningSchemeNumCoresSLS = 1;
87int32_t SparseNNPartitioningSchemeNumCoresOther = 1;
88bool UseSparseNNPartitioningScheme = false;
89bool SparseNNPartitioningAddSLSConcats = false;
90bool SparseNNPartitioningBalancePerfModel = false;
91bool SparseNNPartitioningPairLNWithSLS = false;
92bool SparseNNPartitioningPairTileWithSLS = false;
93std::string SparseNNPartitioningPairSLSWith = "";
94int32_t SparseNNPartitioningConcatSplitSize = 1;
95bool SparseNNParallelizeReshapeOnBatchDim = true;
96
97// Dag Optimizer Constants
98bool UseDAGOptimizer = false;
99int32_t DAGOptimizerNumParallelChunks = 1;
100std::string DAGOptimizerPlacementTaggingAlgorithm = "None";
101std::string DAGOptimizerParallelizationTaggingAlgorithm = "None";
102
103} // namespace flags
104} // namespace glow
105
106namespace glow {
107namespace nnpi {
108namespace flags {
109int32_t ModelParallelSplitAlignment = 1;
110int32_t NumParallelChunks = 0; // Zero val for an ugly hack in NNPI.cpp
111bool LowerAllBatchMatMul = false;
112bool AcceptUnarySLS = false;
113bool SpecializeAllOneSLS = false;
114bool DisableTransforms = false;
115bool EnableCustomIAKernels = false;
116bool EnableCustomDSPKernels = false;
117bool DumpCompilerData = false;
118bool UsePerPartitionIcetConfig = false;
119std::string InjectedIAOpKernelPath = "";
120bool DumpCustomKernelFiles = false;
121
122} // namespace flags
123} // namespace nnpi
124} // namespace glow
125
126namespace glow {
127namespace interpreter {
128namespace flags {
129bool LowerBatchMatMul = true;
130bool LowerLayerNormalization = true;
131} // namespace flags
132} // namespace interpreter
133} // namespace glow
134
135namespace glow {
136namespace torch_glow {
137namespace flags {
138bool ImaginaryFlag = false; // Placeholder flag
139}
140} // namespace torch_glow
141} // namespace glow
142
143namespace glow {
144namespace onnxifi {
145namespace flags {
146std::string BackendName = "";
147bool SaveModel = false;
148bool SaveIO = false;
149bool SaveDAG = false;
150bool SaveDAGWithConstants = false;
151bool SaveDAGInZipMode = false;
152} // namespace flags
153} // namespace onnxifi
154} // namespace glow
155
156namespace glow {
157namespace runtime {
158namespace flags {
159
160unsigned CPUMemory = 0;
161unsigned HabanaMemory = 7 << 20;
162unsigned NNPIMemory = 16 << 20;
163unsigned NNPITimeoutMs = 0;
164
165std::string AvailableDevices = "";
166unsigned InterpreterMemory = 0;
167bool EnableP2P = false;
168bool EnableDRT = false;
169unsigned DeviceInitTimeoutMs = 5000;
170unsigned SanitizeInputsPercent = 0;
171uint64_t BigTableThresholdBytes = 104857600; // 100MB
172unsigned NumCompilationThreads = 1;
173} // namespace flags
174} // namespace runtime
175} // namespace glow
176
177/*
178 * Note: Validators are used to assign instead of direct assignment because
179 * direct assignment seems to result in a static order initialization fiasco.
180 */
181DEFINE_int32(glow_num_devices, glow::flags::NumDevices,
182 "Number of devices for Glow backend");
183DEFINE_validator(glow_num_devices, [](const char *, int32_t val) {
184 glow::flags::NumDevices = val;
185 return true;
186});
187DEFINE_bool(glow_scan_devices, glow::flags::ScanDevices,
188 "Scan available devices for Glow backend");
189DEFINE_validator(glow_scan_devices, [](const char *, bool val) {
190 glow::flags::ScanDevices = val;
191 return true;
192});
193DEFINE_int32(glow_snn_partitioning_num_cards,
194 glow::flags::SparseNNPartitioningSchemeNumCards,
195 "Number of devices to distribute tables across in SparseNN "
196 "partitioning");
197DEFINE_validator(glow_snn_partitioning_num_cards,
198 [](const char *, int32_t val) {
199 glow::flags::SparseNNPartitioningSchemeNumCards = val;
200 return true;
201 });
202DEFINE_int32(glow_snn_partitioning_kbytes_per_card,
203 glow::flags::SparseNNPartitioningSchemeSLSTableKBytesPerCard,
204 "Bytes per card used for SLS tables in SparseNN partitioning");
205DEFINE_validator(glow_snn_partitioning_kbytes_per_card, [](const char *,
206 int32_t val) {
207 glow::flags::SparseNNPartitioningSchemeSLSTableKBytesPerCard = val;
208 return true;
209});
210DEFINE_int32(
211 glow_snn_partitioning_num_cores_sls,
212 glow::flags::SparseNNPartitioningSchemeNumCoresSLS,
213 "Number of cores to assign to SLS partition in SparseNN partitioning");
214DEFINE_validator(glow_snn_partitioning_num_cores_sls,
215 [](const char *, int32_t val) {
216 glow::flags::SparseNNPartitioningSchemeNumCoresSLS = val;
217 return true;
218 });
219DEFINE_int32(
220 glow_snn_partitioning_num_cores_other,
221 glow::flags::SparseNNPartitioningSchemeNumCoresOther,
222 "Number of cores to assign to non-SLS partition in SparseNN partitioning");
223DEFINE_validator(glow_snn_partitioning_num_cores_other,
224 [](const char *, int32_t val) {
225 glow::flags::SparseNNPartitioningSchemeNumCoresOther = val;
226 return true;
227 });
228DEFINE_bool(glow_dump_debug_traces, glow::flags::DumpDebugTraces,
229 "Dump traces to /tmp");
230DEFINE_validator(glow_dump_debug_traces, [](const char *, bool val) {
231 glow::flags::DumpDebugTraces = val;
232 return true;
233});
234DEFINE_int32(glow_num_debug_traces_per_dump, glow::flags::NumDebugTracesPerDump,
235 "Maximum number of traces in each debug dump.");
236DEFINE_validator(glow_num_debug_traces_per_dump, [](const char *, int32_t val) {
237 glow::flags::NumDebugTracesPerDump = val;
238 return true;
239});
240DEFINE_string(glow_onnxifi_backend, glow::onnxifi::flags::BackendName,
241 "Glow backend used for ONNXIFI");
242DEFINE_validator(glow_onnxifi_backend,
243 [](const char *, const std::string &val) {
244 glow::onnxifi::flags::BackendName = val;
245 return true;
246 });
247DEFINE_string(
248 glow_available_devices, glow::runtime::flags::AvailableDevices,
249 "Comma separated list of devices which should be used, example 2,3,4");
250DEFINE_validator(glow_available_devices,
251 [](const char *, const std::string &val) {
252 glow::runtime::flags::AvailableDevices = val;
253 return true;
254 });
255DEFINE_bool(glow_global_fp16, glow::flags::ConvertToFP16,
256 "Enable fp16 lowering for all ops on the net");
257DEFINE_validator(glow_global_fp16, [](const char *, bool val) {
258 glow::flags::ConvertToFP16 = val;
259 return true;
260});
261DEFINE_bool(glow_skip_bias_fp32tofp16_convert,
262 glow::flags::SkipBiasFp32tofp16Convert,
263 "Skip fp32 -> fp16 convertion for Bias in FC");
264DEFINE_validator(glow_skip_bias_fp32tofp16_convert, [](const char *, bool val) {
265 glow::flags::SkipBiasFp32tofp16Convert = val;
266 return true;
267});
268DEFINE_bool(torch_glow_imaginary_flag, glow::torch_glow::flags::ImaginaryFlag,
269 "Enable fp16 lowering for all ops on the net");
270DEFINE_validator(torch_glow_imaginary_flag, [](const char *, bool val) {
271 glow::torch_glow::flags::ImaginaryFlag = val;
272 return true;
273});
274DEFINE_bool(glow_global_fp16_placeholders,
275 glow::flags::ConvertPlaceholdersToFP16,
276 "Enable fp16 conversion for Placeholders");
277DEFINE_validator(glow_global_fp16_placeholders, [](const char *, bool val) {
278 glow::flags::ConvertPlaceholdersToFP16 = val;
279 return true;
280});
281DEFINE_bool(glow_global_fp16_constants, glow::flags::ConvertConstantsToFP16,
282 "Enable fp16 conversion for Constants");
283DEFINE_validator(glow_global_fp16_constants, [](const char *, bool val) {
284 glow::flags::ConvertConstantsToFP16 = val;
285 return true;
286});
287DEFINE_bool(glow_global_fused_scale_offset_fp16,
288 glow::flags::ConvertFusedScaleOffsetToFP16,
289 "Enable fp16 lowering for all op inputs using fused scale/offset");
290DEFINE_validator(glow_global_fused_scale_offset_fp16,
291 [](const char *, bool val) {
292 glow::flags::ConvertFusedScaleOffsetToFP16 = val;
293 return true;
294 });
295DEFINE_bool(
296 glow_global_fused_scale_offset_fp32,
297 glow::flags::ConvertFusedScaleOffsetToFP32,
298 "Enable converting scale/offset in sls's input data from fp16 to fp32");
299DEFINE_validator(glow_global_fused_scale_offset_fp32,
300 [](const char *, bool val) {
301 glow::flags::ConvertFusedScaleOffsetToFP32 = val;
302 return true;
303 });
304DEFINE_bool(
305 glow_global_force_sls_fp16_accum, glow::flags::ForceSLSToFP16Accum,
306 "Force all SLS/SLWS ops to use FP16 accumulation. True by default.");
307DEFINE_validator(glow_global_force_sls_fp16_accum, [](const char *, bool val) {
308 glow::flags::ForceSLSToFP16Accum = val;
309 return true;
310});
311DEFINE_bool(glow_enable_quant_param_changes,
312 glow::flags::EnableQuantParamChanges,
313 "Enable quantization param changes during optimizations");
314DEFINE_validator(glow_enable_quant_param_changes, [](const char *, bool val) {
315 glow::flags::EnableQuantParamChanges = val;
316 return true;
317});
318DEFINE_bool(glow_use_sparsenn_partitioning_scheme,
319 glow::flags::UseSparseNNPartitioningScheme,
320 "Force glow to use SparseNN partitioning scheme");
321DEFINE_validator(glow_use_sparsenn_partitioning_scheme,
322 [](const char *, bool val) {
323 glow::flags::UseSparseNNPartitioningScheme = val;
324 return true;
325 });
326DEFINE_bool(glow_sparsenn_partitioning_add_sls_concats,
327 glow::flags::SparseNNPartitioningAddSLSConcats,
328 "Add extra concats inside of SLS partitions for more efficient "
329 "inter-partitition transfers");
330DEFINE_validator(glow_sparsenn_partitioning_add_sls_concats,
331 [](const char *, bool val) {
332 glow::flags::SparseNNPartitioningAddSLSConcats = val;
333 return true;
334 });
335DEFINE_bool(glow_sparsenn_partitioning_balance_perf_model,
336 glow::flags::SparseNNPartitioningBalancePerfModel,
337 "Balance SLS tables across cards using a perf model");
338DEFINE_validator(glow_sparsenn_partitioning_balance_perf_model,
339 [](const char *, bool val) {
340 glow::flags::SparseNNPartitioningBalancePerfModel = val;
341 return true;
342 });
343DEFINE_bool(glow_sparsenn_partitioning_pair_ln_with_sls,
344 glow::flags::SparseNNPartitioningPairLNWithSLS,
345 "Put layer normalization nodes immediately following SLS into SLS "
346 "Partitions");
347DEFINE_validator(glow_sparsenn_partitioning_pair_ln_with_sls,
348 [](const char *, bool val) {
349 glow::flags::SparseNNPartitioningPairLNWithSLS = val;
350 return true;
351 });
352DEFINE_bool(
353 glow_sparsenn_partitioning_pair_tile_with_sls,
354 glow::flags::SparseNNPartitioningPairTileWithSLS,
355 "Put tile nodes immediately following SLS for user embeddings into SLS "
356 "Partitions");
357DEFINE_validator(glow_sparsenn_partitioning_pair_tile_with_sls,
358 [](const char *, bool val) {
359 glow::flags::SparseNNPartitioningPairTileWithSLS = val;
360 return true;
361 });
362DEFINE_string(
363 glow_sparsenn_partitioning_pair_sls_with,
364 glow::flags::SparseNNPartitioningPairSLSWith,
365 "Put nodes specified immediately following SLS into SLS partitions."
366 "Supported for LayerNorm, Tile, Concat, and Tanh nodes"
367 "Comma separated list of node names, e.g. LayerNorm,Tile.");
368DEFINE_validator(glow_sparsenn_partitioning_pair_sls_with,
369 [](const char *, const std::string &val) {
370 glow::flags::SparseNNPartitioningPairSLSWith = val;
371 return true;
372 });
373DEFINE_int32(glow_sparsenn_partitioning_concat_split_size,
374 glow::flags::SparseNNPartitioningConcatSplitSize,
375 "The number of inputs to split each concat to be moved into SLS "
376 "partitions to");
377DEFINE_validator(glow_sparsenn_partitioning_concat_split_size,
378 [](const char *, const int32_t val) {
379 glow::flags::SparseNNPartitioningConcatSplitSize = val;
380 return true;
381 });
382DEFINE_bool(glow_sparsenn_parallelize_reshape_on_batch_dim,
383 glow::flags::SparseNNParallelizeReshapeOnBatchDim,
384 "Force parallelizing the reshape operators on the batch dimension");
385DEFINE_validator(glow_sparsenn_parallelize_reshape_on_batch_dim,
386 [](const char *, bool val) {
387 glow::flags::SparseNNParallelizeReshapeOnBatchDim = val;
388 return true;
389 });
390DEFINE_bool(glow_clip_fp16, glow::flags::ClipToFP16,
391 "Force glow to clip fp16 values to min/max");
392DEFINE_validator(glow_clip_fp16, [](const char *, bool val) {
393 glow::flags::ClipToFP16 = val;
394 return true;
395});
396DEFINE_bool(glow_clip_fp16_skip_inputs, glow::flags::SkipInputsOnClipToFP16,
397 "Force glow to skip clipping fp16 Node inputs to min/max");
398DEFINE_validator(glow_clip_fp16_skip_inputs, [](const char *, bool val) {
399 glow::flags::SkipInputsOnClipToFP16 = val;
400 return true;
401});
402DEFINE_bool(glow_saturate_host, glow::flags::SaturateHost,
403 "Try to use all available devices on the host");
404DEFINE_validator(glow_saturate_host, [](const char *, bool val) {
405 glow::flags::SaturateHost = val;
406 return true;
407});
408DEFINE_bool(
409 glow_save_onnxifi_dag, glow::onnxifi::flags::SaveDAG,
410 "Whether to serialize the DAG that has been optimized and partitioned.");
411DEFINE_validator(glow_save_onnxifi_dag, [](const char *, bool val) {
412 glow::onnxifi::flags::SaveDAG = val;
413 return true;
414});
415DEFINE_bool(glow_save_onnxifi_dag_with_constants,
416 glow::onnxifi::flags::SaveDAGWithConstants,
417 "Whether to serialize constants in the DAG that has been optimized "
418 "and partitioned.");
419DEFINE_validator(glow_save_onnxifi_dag_with_constants,
420 [](const char *, bool val) {
421 glow::onnxifi::flags::SaveDAGWithConstants = val;
422 return true;
423 });
424DEFINE_bool(glow_save_onnxifi_dag_in_zip_mode,
425 glow::onnxifi::flags::SaveDAGWithConstants,
426 "Whether to serialize the DAG that has been optimized and "
427 "partitioned in ZIP mode.");
428DEFINE_validator(glow_save_onnxifi_dag_in_zip_mode, [](const char *, bool val) {
429 glow::onnxifi::flags::SaveDAGInZipMode = val;
430 return true;
431});
432DEFINE_bool(
433 glow_delay_and_record_constant_modification,
434 glow::flags::DelayAndRecordConstantModification,
435 "Whether to delay and record constant modification for serialization.");
436DEFINE_validator(glow_delay_and_record_constant_modification,
437 [](const char *, bool val) {
438 glow::flags::DelayAndRecordConstantModification = val;
439 return true;
440 });
441DEFINE_bool(glow_use_tracked_dummy_quant_params,
442 glow::flags::UseTrackedDummyQuantParams,
443 "Whether to use uniqued dummy quant params when loading the model, "
444 "which are then mapped to loaded names for serialization.");
445DEFINE_validator(glow_use_tracked_dummy_quant_params,
446 [](const char *, bool val) {
447 glow::flags::UseTrackedDummyQuantParams = val;
448 return true;
449 });
450DEFINE_bool(glow_clip_zero_scale_fp16, glow::flags::ClipZeroScaleFP16,
451 "Whether to clip qparam scales below 1/65504 to that val.");
452DEFINE_validator(glow_clip_zero_scale_fp16, [](const char *, bool val) {
453 glow::flags::ClipZeroScaleFP16 = val;
454 return true;
455});
456DEFINE_bool(glow_clip_quant_range_to_fp16, glow::flags::ClipQuantRangeToFP16,
457 "Whether to clip quantization parameters inside the fp16 range.");
458DEFINE_validator(glow_clip_quant_range_to_fp16, [](const char *, bool val) {
459 glow::flags::ClipQuantRangeToFP16 = val;
460 return true;
461});
462DEFINE_int32(glow_max_active_requests, glow::flags::MaxActiveRequests,
463 "Number of max active requests before host manager start queuing");
464DEFINE_validator(glow_max_active_requests, [](const char *, int32_t val) {
465 glow::flags::MaxActiveRequests = val;
466 return true;
467});
468DEFINE_int32(glow_max_active_requests_per_instance,
469 glow::flags::MaxActiveRequestsPerInstance,
470 "Number of max active requests per instance of a network.");
471DEFINE_validator(glow_max_active_requests_per_instance,
472 [](const char *, int32_t val) {
473 glow::flags::MaxActiveRequestsPerInstance = val;
474 return true;
475 });
476DEFINE_int32(
477 glow_max_queue_size, glow::flags::MaxQueueSize,
478 "Max number of pending requeusts in glow's host manager queue before "
479 "rejecting new request");
480DEFINE_validator(glow_max_queue_size, [](const char *, int32_t val) {
481 glow::flags::MaxQueueSize = val;
482 return true;
483});
484DEFINE_int32(glow_executor_threads, glow::flags::ExecutorThreads,
485 "Number of executor threads for host manager");
486DEFINE_validator(glow_executor_threads, [](const char *, int32_t val) {
487 glow::flags::ExecutorThreads = val;
488 return true;
489});
490DEFINE_bool(glow_partitioner_enable_load_balance,
491 glow::flags::EnableLoadBalancedPartitioning,
492 "Enable a partitioner pass to optimize for load balance in "
493 "addition to memory capacity constraints");
494DEFINE_validator(glow_partitioner_enable_load_balance,
495 [](const char *, bool val) {
496 glow::flags::EnableLoadBalancedPartitioning = val;
497 return true;
498 });
499DEFINE_bool(glow_skip_provisioning, glow::flags::SkipProvisioning,
500 "Skip provisioning. Used for AOT opts or debugging.");
501DEFINE_validator(glow_skip_provisioning, [](const char *, bool val) {
502 glow::flags::SkipProvisioning = val;
503 return true;
504});
505DEFINE_bool(glow_sink_tanh_below_concat, glow::flags::SinkTanhBelowConcat,
506 "Sink tanh ops below concat.");
507DEFINE_validator(glow_sink_tanh_below_concat, [](const char *, bool val) {
508 glow::flags::SinkTanhBelowConcat = val;
509 return true;
510});
511DEFINE_bool(glow_save_onnxifi_model, glow::onnxifi::flags::SaveModel,
512 "Package the glow function and weights right before lowering");
513DEFINE_validator(glow_save_onnxifi_model, [](const char *, bool val) {
514 glow::onnxifi::flags::SaveModel = val;
515 return true;
516});
517DEFINE_bool(glow_save_onnxifi_io, glow::onnxifi::flags::SaveIO,
518 "Save the input and output result around ONNXIFI boundary");
519DEFINE_validator(glow_save_onnxifi_io, [](const char *, bool val) {
520 glow::onnxifi::flags::SaveIO = val;
521 return true;
522});
523DEFINE_bool(glow_enable_partial_tensors, glow::flags::EnablePartialTensors,
524 "Save the input and output result around ONNXIFI boundary");
525DEFINE_validator(glow_enable_partial_tensors, [](const char *, bool val) {
526 glow::flags::EnablePartialTensors = val;
527 return true;
528});
529DEFINE_bool(glow_use_custom_ops_for_export, glow::flags::UseCustomOpsForExport,
530 "Use custom ONNX ops when exporting Glow protos.");
531DEFINE_validator(glow_use_custom_ops_for_export, [](const char *, bool val) {
532 glow::flags::UseCustomOpsForExport = val;
533 return true;
534});
535DEFINE_bool(glow_dump_graph, glow::flags::DumpGraph,
536 "Dump the glow Graph into files before compilation");
537DEFINE_validator(glow_dump_graph, [](const char *, bool val) {
538 glow::flags::DumpGraph = val;
539 return true;
540});
541DEFINE_string(glow_dump_graph_path, glow::flags::DumpGraphPath,
542 "Directory path for the dumped graphs.");
543DEFINE_validator(glow_dump_graph_path,
544 [](const char *, const std::string &val) {
545 glow::flags::DumpGraphPath = val;
546 return true;
547 });
548DEFINE_bool(glow_dump_initial_loaded_graph, glow::flags::DumpInitialLoadedGraph,
549 "Dump the glow Graph right after onnxification");
550DEFINE_validator(glow_dump_initial_loaded_graph, [](const char *, bool val) {
551 glow::flags::DumpInitialLoadedGraph = val;
552 return true;
553});
554DEFINE_bool(glow_use_dag_optimizer, glow::flags::UseDAGOptimizer,
555 "Whether to call the DAG optimizer");
556DEFINE_validator(glow_use_dag_optimizer, [](const char *, bool val) {
557 glow::flags::UseDAGOptimizer = val;
558 return true;
559});
560DEFINE_int32(glow_dag_optimizer_num_parallel_chunks,
561 glow::flags::DAGOptimizerNumParallelChunks,
562 "Number of parallel chunks for DAGOptimizer parallelization");
563DEFINE_validator(glow_dag_optimizer_num_parallel_chunks,
564 [](const char *, int32_t val) {
565 glow::flags::DAGOptimizerNumParallelChunks = val;
566 return true;
567 });
568DEFINE_string(glow_dag_optimizer_placement_tagging_algorithm,
569 glow::flags::DAGOptimizerPlacementTaggingAlgorithm,
570 "Name of placement tagging algorithm to run in DAGOptimizer");
571DEFINE_validator(glow_dag_optimizer_placement_tagging_algorithm,
572 [](const char *, const std::string &val) {
573 glow::flags::DAGOptimizerPlacementTaggingAlgorithm = val;
574 return true;
575 });
576
577DEFINE_string(
578 glow_dag_optimizer_parallelization_tagging_algorithm,
579 glow::flags::DAGOptimizerParallelizationTaggingAlgorithm,
580 "Name of parallelization tagging algorithm to run in DAGOptimizer");
581DEFINE_validator(glow_dag_optimizer_parallelization_tagging_algorithm,
582 [](const char *, const std::string &val) {
583 glow::flags::DAGOptimizerParallelizationTaggingAlgorithm =
584 val;
585 return true;
586 });
587// Defined in glow/lib/Backends/NNPI/NNPI.cpp
588DEFINE_bool(glow_use_per_partition_icet_config,
589 glow::nnpi::flags::UsePerPartitionIcetConfig,
590 "Read an icet_config.json file for each partition");
591DEFINE_validator(glow_use_per_partition_icet_config,
592 [](const char *, bool val) {
593 glow::nnpi::flags::UsePerPartitionIcetConfig = val;
594 return true;
595 });
596DEFINE_bool(glow_dump_nnpi_compiler_data, glow::nnpi::flags::DumpCompilerData,
597 "Dump the NNPI compiler data into files before NNPI compilation");
598DEFINE_validator(glow_dump_nnpi_compiler_data, [](const char *, bool val) {
599 glow::nnpi::flags::DumpCompilerData = val;
600 return true;
601});
602DEFINE_bool(glow_nnpi_specialize_all_one_sls,
603 glow::nnpi::flags::SpecializeAllOneSLS,
604 "Whether to import SLS ops with AllOne attribute to NNPI.");
605DEFINE_validator(glow_nnpi_specialize_all_one_sls, [](const char *, bool val) {
606 glow::nnpi::flags::SpecializeAllOneSLS = val;
607 return true;
608});
609DEFINE_bool(glow_disable_nnpi_transforms, glow::nnpi::flags::DisableTransforms,
610 "Disable running NNPIBackend::transformPostLowering().");
611DEFINE_validator(glow_disable_nnpi_transforms, [](const char *, bool val) {
612 glow::nnpi::flags::DisableTransforms = val;
613 return true;
614});
615DEFINE_bool(glow_enable_nnpi_custom_ia_kernels,
616 glow::nnpi::flags::EnableCustomIAKernels,
617 "Enable running NNPIBackend::transformPrivate().");
618DEFINE_validator(glow_enable_nnpi_custom_ia_kernels,
619 [](const char *, bool val) {
620 glow::nnpi::flags::EnableCustomIAKernels = val;
621 return true;
622 });
623DEFINE_bool(glow_enable_nnpi_custom_dsp_kernels,
624 glow::nnpi::flags::EnableCustomDSPKernels,
625 "Enable running NNPIBackend::transformPrivate().");
626DEFINE_validator(glow_enable_nnpi_custom_dsp_kernels,
627 [](const char *, bool val) {
628 glow::nnpi::flags::EnableCustomDSPKernels = val;
629 return true;
630 });
631
632DEFINE_string(glow_injected_ia_op_kernel_path,
633 glow::nnpi::flags::InjectedIAOpKernelPath,
634 "Path to IA kernels library to use");
635DEFINE_validator(glow_injected_ia_op_kernel_path,
636 [](const char *, const std::string &val) {
637 glow::nnpi::flags::InjectedIAOpKernelPath = val;
638 return true;
639 });
640
641DEFINE_bool(glow_dump_custom_kernel_files,
642 glow::nnpi::flags::DumpCustomKernelFiles,
643 "Enable dumping the compiled custom IA and DSP kernels to file.");
644DEFINE_validator(glow_dump_custom_kernel_files, [](const char *, bool val) {
645 glow::nnpi::flags::DumpCustomKernelFiles = val;
646 return true;
647});
648
649DEFINE_bool(glow_nnpi_lower_all_batch_matmul,
650 glow::nnpi::flags::LowerAllBatchMatMul,
651 "Whether to override default lowering for NNPI and always lower "
652 "BatchMatMul to a series of MatMuls.");
653DEFINE_validator(glow_nnpi_lower_all_batch_matmul, [](const char *, bool val) {
654 glow::nnpi::flags::LowerAllBatchMatMul = val;
655 return true;
656});
657DEFINE_bool(glow_nnpi_accept_unary_sls, glow::nnpi::flags::AcceptUnarySLS,
658 "Whether to accept unary SLS ops during ONNXIFI loading.");
659DEFINE_validator(glow_nnpi_accept_unary_sls, [](const char *, bool val) {
660 glow::nnpi::flags::AcceptUnarySLS = val;
661 return true;
662});
663DEFINE_int32(glow_nnpi_num_parallel_chunks,
664 glow::nnpi::flags::NumParallelChunks,
665 "Number of parallel chunks for NNPI");
666DEFINE_validator(glow_nnpi_num_parallel_chunks, [](const char *, int32_t val) {
667 glow::nnpi::flags::NumParallelChunks = val;
668 return true;
669});
670DEFINE_int32(glow_nnpi_model_parallel_split_alignment,
671 glow::nnpi::flags::ModelParallelSplitAlignment,
672 "Alignment value for model parallel splits");
673DEFINE_validator(glow_nnpi_model_parallel_split_alignment,
674 [](const char *, int32_t val) {
675 glow::nnpi::flags::ModelParallelSplitAlignment = val;
676 return true;
677 });
678DEFINE_int32(glow_nnpi_memory, glow::runtime::flags::NNPIMemory,
679 "Amount of DRAM to allocate per NNPI device in KiB");
680DEFINE_validator(glow_nnpi_memory, [](const char *, int32_t val) {
681 glow::runtime::flags::NNPIMemory = val;
682 return true;
683});
684DEFINE_int32(glow_nnpi_timeout_ms, glow::runtime::flags::NNPITimeoutMs,
685 "Timeout threshold for inferecnce in milliseconds. Default 0 "
686 "means infinity");
687DEFINE_validator(glow_nnpi_timeout_ms, [](const char *, int32_t val) {
688 glow::runtime::flags::NNPITimeoutMs = val;
689 return true;
690});
691
692DEFINE_bool(glow_interpreter_lower_batch_matmul,
693 glow::interpreter::flags::LowerBatchMatMul,
694 "Lower batch matmul node.");
695DEFINE_validator(glow_interpreter_lower_batch_matmul,
696 [](const char *, bool val) {
697 glow::interpreter::flags::LowerBatchMatMul = val;
698 return true;
699 });
700DEFINE_bool(glow_interpreter_lower_layer_normalization,
701 glow::interpreter::flags::LowerLayerNormalization,
702 "Lower layer normalization node.");
703DEFINE_validator(glow_interpreter_lower_layer_normalization,
704 [](const char *, bool val) {
705 glow::interpreter::flags::LowerLayerNormalization = val;
706 return true;
707 });
708
709DEFINE_int32(glow_interpreter_memory, glow::runtime::flags::InterpreterMemory,
710 "Amount of DRAM to allocate per Interpreter in KiB");
711DEFINE_validator(glow_interpreter_memory, [](const char *, int32_t val) {
712 glow::runtime::flags::InterpreterMemory = val;
713 return true;
714});
715DEFINE_int32(glow_cpu_memory, glow::runtime::flags::CPUMemory,
716 "Amount of DRAM to allocate per CPU in KiB");
717DEFINE_validator(glow_cpu_memory, [](const char *, int32_t val) {
718 glow::runtime::flags::CPUMemory = val;
719 return true;
720});
721
722DEFINE_int32(glow_habana_memory, glow::runtime::flags::HabanaMemory,
723 "Amount of DRAM to allocate per Habana device in KiB");
724DEFINE_validator(glow_habana_memory, [](const char *, int32_t val) {
725 glow::runtime::flags::HabanaMemory = val;
726 return true;
727});
728
729DEFINE_int32(
730 glow_num_compilation_threads, glow::runtime::flags::NumCompilationThreads,
731 "Maximum number of threads to spawn per call to Backend::compileFunctions");
732DEFINE_validator(glow_num_compilation_threads, [](const char *, int32_t val) {
733 glow::runtime::flags::NumCompilationThreads = val;
734 return true;
735});
736
737DEFINE_bool(glow_log_partition, glow::flags::LogPartition,
738 "Enable logging partition info");
739DEFINE_validator(glow_log_partition, [](const char *, bool val) {
740 glow::flags::LogPartition = val;
741 return true;
742});
743DEFINE_bool(glow_enable_p2p, glow::runtime::flags::EnableP2P,
744 "Enable peer to peer support");
745DEFINE_validator(glow_enable_p2p, [](const char *, bool val) {
746 glow::runtime::flags::EnableP2P = val;
747 return true;
748});
749DEFINE_bool(glow_enable_drt, glow::runtime::flags::EnableDRT,
750 "Enable device resident tensor support");
751DEFINE_validator(glow_enable_drt, [](const char *, bool val) {
752 glow::runtime::flags::EnableDRT = val;
753 return true;
754});
755DEFINE_int32(glow_device_init_timeout_ms,
756 glow::runtime::flags::DeviceInitTimeoutMs,
757 "Timeout threshold for device initialization in milliseconds. "
758 "Default 5000");
759DEFINE_validator(glow_device_init_timeout_ms, [](const char *, int32_t val) {
760 glow::runtime::flags::DeviceInitTimeoutMs = val;
761 return true;
762});
763DEFINE_uint64(
764 glow_partition_big_table_threshold_bytes,
765 glow::runtime::flags::BigTableThresholdBytes,
766 "Threshold to determin big tables, and used in partitioning algorithm. "
767 "Default 104857600(100MB)");
768DEFINE_validator(glow_partition_big_table_threshold_bytes,
769 [](const char *, uint64_t val) {
770 glow::runtime::flags::BigTableThresholdBytes = val;
771 return true;
772 });
773DEFINE_int32(glow_enable_sanitize_inputs,
774 glow::runtime::flags::SanitizeInputsPercent,
775 "Sanitize a percentage of inferences");
776DEFINE_validator(glow_enable_sanitize_inputs, [](const char *, int32_t val) {
777 if (val < 0 || val > 100) {
778 return false;
779 }
780
781 glow::runtime::flags::SanitizeInputsPercent = val;
782 return true;
783});
784
785DEFINE_bool(glow_dump_partition, glow::flags::DumpPartition,
786 "Enable dumping the graph of each partition");
787DEFINE_validator(glow_dump_partition, [](const char *, bool val) {
788 glow::flags::DumpPartition = val;
789 return true;
790});
791DEFINE_bool(glow_dump_compilation_log, glow::flags::DumpCompilationLog,
792 "Dump the glow compilation log into /tmp during compilation");
793DEFINE_validator(glow_dump_compilation_log, [](const char *, bool val) {
794 glow::flags::DumpCompilationLog = val;
795 return true;
796});
797DEFINE_bool(glow_dump_backend_specific_ir_json,
798 glow::flags::DumpBackendSpecificIRJSON,
799 "Dump the backend-specific IR JSON file");
800DEFINE_validator(glow_dump_backend_specific_ir_json,
801 [](const char *, bool val) {
802 glow::flags::DumpBackendSpecificIRJSON = val;
803 return true;
804 });
805DEFINE_string(glow_backend_specific_opts, glow::flags::BackendSpecificOpts,
806 "Glow backend specific options. Comma separated list of "
807 "key=value pairs, e.g. key1=val1,key2=val2.");
808DEFINE_validator(glow_backend_specific_opts,
809 [](const char *, const std::string &val) {
810 glow::flags::BackendSpecificOpts = val;
811 return true;
812 });
813
814bool glow::flags::processBackendSpecificOpts(
815 std::map<std::string, std::string> &optsMap, llvm::StringRef optsStr) {
816 if (optsStr.empty()) {
817 return true;
818 }
819 llvm::SmallVector<llvm::StringRef, 4> splitOpts;
820 optsStr.split(splitOpts, ',');
821
822 for (const llvm::StringRef &opt : splitOpts) {
823 LOG(INFO) << "Adding backend specific option: " << opt.str();
824 auto keyValPair = opt.split('=');
825 if (keyValPair.second.empty()) {
826 LOG(ERROR) << "No '=' found in backend-specific opt " << opt.str();
827 return false;
828 }
829 optsMap.emplace(keyValPair.first, keyValPair.second);
830 }
831 return true;
832}
833
834namespace {
835llvm::cl::OptionCategory flagsLibCat("Glow Flags Lib CmdLine Options");
836/// Allows enabling DRT support.
837llvm::cl::opt<bool, /* ExternalStorage */ true>
838 enableDRT("enable-DRT",
839 llvm::cl::desc(
840 "Deprecated. Enabled DRT support. Alias to glow_enable_drt."),
841 llvm::cl::Optional,
842 llvm::cl::location(glow::runtime::flags::EnableDRT),
843 llvm::cl::cat(flagsLibCat));
844
845/// Allows enabling P2P support.
846llvm::cl::opt<bool, /* ExternalStorage */ true>
847 enableP2P("enable-P2P",
848 llvm::cl::desc(
849 "Deprecated. Enabled P2P support. Alias to glow_enable_drt."),
850 llvm::cl::Optional,
851 llvm::cl::location(glow::runtime::flags::EnableP2P),
852 llvm::cl::cat(flagsLibCat));
853} // namespace
854