1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "glow/Flags/Flags.h" |
18 | |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/Support/CommandLine.h" |
22 | #include <gflags/gflags.h> |
23 | #include <glog/logging.h> |
24 | #include <map> |
25 | |
26 | /* Flags should generally go in as specific of namespace as makes sense. |
27 | * That is, if a flag is specific to torch_glow, it should go in the |
28 | * flags::torch_glow namespace. Flags that have a generic nature, but are not |
29 | * supported in specific contexts, can go in a specific domain. An example is |
30 | * AcceptUnarySLS living in the glow::nnpi::flags namespace, as that's the only |
31 | * domain for which it is supported. In the same vein, it is encouraged to make |
32 | * flags as generic as is possible. |
33 | */ |
34 | namespace glow { |
35 | namespace flags { |
36 | |
37 | // Generic Constants |
38 | int32_t NumDevices = 1; |
39 | bool ScanDevices = false; |
40 | bool SaturateHost = false; |
41 | bool EnableQuantParamChanges = true; |
42 | size_t MaxActiveRequests = 48; |
43 | size_t MaxActiveRequestsPerInstance = 48; |
44 | size_t MaxQueueSize = 200; |
45 | size_t ExecutorThreads = 10; |
46 | bool DelayAndRecordConstantModification = false; |
47 | bool UseTrackedDummyQuantParams = false; |
48 | bool EnablePartialTensors = true; |
49 | bool UseCustomOpsForExport = true; |
50 | std::string BackendSpecificOpts = "" ; |
51 | bool EnableLoadBalancedPartitioning = true; |
52 | bool SkipProvisioning = false; |
53 | bool DisableLayoutVerifying = false; |
54 | bool DisableFreeCompilationResource = false; |
55 | bool SinkTanhBelowConcat = false; |
56 | |
57 | // FP16 Constants |
58 | bool ConvertToFP16 = false; |
59 | bool SkipBiasFp32tofp16Convert = false; |
60 | bool ConvertPlaceholdersToFP16 = false; |
61 | bool ConvertConstantsToFP16 = true; |
62 | bool ConvertFusedScaleOffsetToFP16 = false; |
63 | bool ClipToFP16 = false; |
64 | bool SkipInputsOnClipToFP16 = true; |
65 | bool ForceSLSToFP16Accum = true; |
66 | bool ClipQuantRangeToFP16 = false; |
67 | bool ClipZeroScaleFP16 = false; |
68 | |
69 | // Fp32 constants |
70 | bool ConvertFusedScaleOffsetToFP32 = false; |
71 | |
72 | // Debug Constants |
73 | int32_t NumDebugTracesPerDump = 100; |
74 | bool DumpDebugTraces = false; |
75 | bool LogPartition = true; |
76 | bool DumpPartition = false; |
77 | bool DumpCompilationLog = false; |
78 | bool DumpBackendSpecificIRJSON = false; |
79 | bool DumpGraph = false; |
80 | std::string DumpGraphPath = "./" ; |
81 | bool DumpInitialLoadedGraph = false; |
82 | |
83 | // Sparse NN Partitioning Scheme Constants |
84 | int32_t = 1; |
85 | int64_t SparseNNPartitioningSchemeSLSTableKBytesPerCard = 1; |
86 | int32_t = 1; |
87 | int32_t = 1; |
88 | bool UseSparseNNPartitioningScheme = false; |
89 | bool SparseNNPartitioningAddSLSConcats = false; |
90 | bool SparseNNPartitioningBalancePerfModel = false; |
91 | bool SparseNNPartitioningPairLNWithSLS = false; |
92 | bool SparseNNPartitioningPairTileWithSLS = false; |
93 | std::string SparseNNPartitioningPairSLSWith = "" ; |
94 | int32_t SparseNNPartitioningConcatSplitSize = 1; |
95 | bool SparseNNParallelizeReshapeOnBatchDim = true; |
96 | |
97 | // Dag Optimizer Constants |
98 | bool UseDAGOptimizer = false; |
99 | int32_t DAGOptimizerNumParallelChunks = 1; |
100 | std::string DAGOptimizerPlacementTaggingAlgorithm = "None" ; |
101 | std::string DAGOptimizerParallelizationTaggingAlgorithm = "None" ; |
102 | |
103 | } // namespace flags |
104 | } // namespace glow |
105 | |
106 | namespace glow { |
107 | namespace nnpi { |
108 | namespace flags { |
109 | int32_t ModelParallelSplitAlignment = 1; |
110 | int32_t NumParallelChunks = 0; // Zero val for an ugly hack in NNPI.cpp |
111 | bool LowerAllBatchMatMul = false; |
112 | bool AcceptUnarySLS = false; |
113 | bool SpecializeAllOneSLS = false; |
114 | bool DisableTransforms = false; |
115 | bool EnableCustomIAKernels = false; |
116 | bool EnableCustomDSPKernels = false; |
117 | bool DumpCompilerData = false; |
118 | bool UsePerPartitionIcetConfig = false; |
119 | std::string InjectedIAOpKernelPath = "" ; |
120 | bool DumpCustomKernelFiles = false; |
121 | |
122 | } // namespace flags |
123 | } // namespace nnpi |
124 | } // namespace glow |
125 | |
126 | namespace glow { |
127 | namespace interpreter { |
128 | namespace flags { |
129 | bool LowerBatchMatMul = true; |
130 | bool LowerLayerNormalization = true; |
131 | } // namespace flags |
132 | } // namespace interpreter |
133 | } // namespace glow |
134 | |
135 | namespace glow { |
136 | namespace torch_glow { |
137 | namespace flags { |
138 | bool ImaginaryFlag = false; // Placeholder flag |
139 | } |
140 | } // namespace torch_glow |
141 | } // namespace glow |
142 | |
143 | namespace glow { |
144 | namespace onnxifi { |
145 | namespace flags { |
146 | std::string BackendName = "" ; |
147 | bool SaveModel = false; |
148 | bool SaveIO = false; |
149 | bool SaveDAG = false; |
150 | bool SaveDAGWithConstants = false; |
151 | bool SaveDAGInZipMode = false; |
152 | } // namespace flags |
153 | } // namespace onnxifi |
154 | } // namespace glow |
155 | |
156 | namespace glow { |
157 | namespace runtime { |
158 | namespace flags { |
159 | |
160 | unsigned CPUMemory = 0; |
161 | unsigned HabanaMemory = 7 << 20; |
162 | unsigned NNPIMemory = 16 << 20; |
163 | unsigned NNPITimeoutMs = 0; |
164 | |
165 | std::string AvailableDevices = "" ; |
166 | unsigned InterpreterMemory = 0; |
167 | bool EnableP2P = false; |
168 | bool EnableDRT = false; |
169 | unsigned DeviceInitTimeoutMs = 5000; |
170 | unsigned SanitizeInputsPercent = 0; |
171 | uint64_t BigTableThresholdBytes = 104857600; // 100MB |
172 | unsigned NumCompilationThreads = 1; |
173 | } // namespace flags |
174 | } // namespace runtime |
175 | } // namespace glow |
176 | |
177 | /* |
178 | * Note: Validators are used to assign instead of direct assignment because |
179 | * direct assignment seems to result in a static order initialization fiasco. |
180 | */ |
181 | DEFINE_int32(glow_num_devices, glow::flags::NumDevices, |
182 | "Number of devices for Glow backend" ); |
183 | DEFINE_validator(glow_num_devices, [](const char *, int32_t val) { |
184 | glow::flags::NumDevices = val; |
185 | return true; |
186 | }); |
187 | DEFINE_bool(glow_scan_devices, glow::flags::ScanDevices, |
188 | "Scan available devices for Glow backend" ); |
189 | DEFINE_validator(glow_scan_devices, [](const char *, bool val) { |
190 | glow::flags::ScanDevices = val; |
191 | return true; |
192 | }); |
193 | DEFINE_int32(glow_snn_partitioning_num_cards, |
194 | glow::flags::SparseNNPartitioningSchemeNumCards, |
195 | "Number of devices to distribute tables across in SparseNN " |
196 | "partitioning" ); |
197 | DEFINE_validator(glow_snn_partitioning_num_cards, |
198 | [](const char *, int32_t val) { |
199 | glow::flags::SparseNNPartitioningSchemeNumCards = val; |
200 | return true; |
201 | }); |
202 | DEFINE_int32(glow_snn_partitioning_kbytes_per_card, |
203 | glow::flags::SparseNNPartitioningSchemeSLSTableKBytesPerCard, |
204 | "Bytes per card used for SLS tables in SparseNN partitioning" ); |
205 | DEFINE_validator(glow_snn_partitioning_kbytes_per_card, [](const char *, |
206 | int32_t val) { |
207 | glow::flags::SparseNNPartitioningSchemeSLSTableKBytesPerCard = val; |
208 | return true; |
209 | }); |
210 | DEFINE_int32( |
211 | glow_snn_partitioning_num_cores_sls, |
212 | glow::flags::SparseNNPartitioningSchemeNumCoresSLS, |
213 | "Number of cores to assign to SLS partition in SparseNN partitioning" ); |
214 | DEFINE_validator(glow_snn_partitioning_num_cores_sls, |
215 | [](const char *, int32_t val) { |
216 | glow::flags::SparseNNPartitioningSchemeNumCoresSLS = val; |
217 | return true; |
218 | }); |
219 | DEFINE_int32( |
220 | glow_snn_partitioning_num_cores_other, |
221 | glow::flags::SparseNNPartitioningSchemeNumCoresOther, |
222 | "Number of cores to assign to non-SLS partition in SparseNN partitioning" ); |
223 | DEFINE_validator(glow_snn_partitioning_num_cores_other, |
224 | [](const char *, int32_t val) { |
225 | glow::flags::SparseNNPartitioningSchemeNumCoresOther = val; |
226 | return true; |
227 | }); |
228 | DEFINE_bool(glow_dump_debug_traces, glow::flags::DumpDebugTraces, |
229 | "Dump traces to /tmp" ); |
230 | DEFINE_validator(glow_dump_debug_traces, [](const char *, bool val) { |
231 | glow::flags::DumpDebugTraces = val; |
232 | return true; |
233 | }); |
234 | DEFINE_int32(glow_num_debug_traces_per_dump, glow::flags::NumDebugTracesPerDump, |
235 | "Maximum number of traces in each debug dump." ); |
236 | DEFINE_validator(glow_num_debug_traces_per_dump, [](const char *, int32_t val) { |
237 | glow::flags::NumDebugTracesPerDump = val; |
238 | return true; |
239 | }); |
240 | DEFINE_string(glow_onnxifi_backend, glow::onnxifi::flags::BackendName, |
241 | "Glow backend used for ONNXIFI" ); |
242 | DEFINE_validator(glow_onnxifi_backend, |
243 | [](const char *, const std::string &val) { |
244 | glow::onnxifi::flags::BackendName = val; |
245 | return true; |
246 | }); |
247 | DEFINE_string( |
248 | glow_available_devices, glow::runtime::flags::AvailableDevices, |
249 | "Comma separated list of devices which should be used, example 2,3,4" ); |
250 | DEFINE_validator(glow_available_devices, |
251 | [](const char *, const std::string &val) { |
252 | glow::runtime::flags::AvailableDevices = val; |
253 | return true; |
254 | }); |
255 | DEFINE_bool(glow_global_fp16, glow::flags::ConvertToFP16, |
256 | "Enable fp16 lowering for all ops on the net" ); |
257 | DEFINE_validator(glow_global_fp16, [](const char *, bool val) { |
258 | glow::flags::ConvertToFP16 = val; |
259 | return true; |
260 | }); |
261 | DEFINE_bool(glow_skip_bias_fp32tofp16_convert, |
262 | glow::flags::SkipBiasFp32tofp16Convert, |
263 | "Skip fp32 -> fp16 convertion for Bias in FC" ); |
264 | DEFINE_validator(glow_skip_bias_fp32tofp16_convert, [](const char *, bool val) { |
265 | glow::flags::SkipBiasFp32tofp16Convert = val; |
266 | return true; |
267 | }); |
268 | DEFINE_bool(torch_glow_imaginary_flag, glow::torch_glow::flags::ImaginaryFlag, |
269 | "Enable fp16 lowering for all ops on the net" ); |
270 | DEFINE_validator(torch_glow_imaginary_flag, [](const char *, bool val) { |
271 | glow::torch_glow::flags::ImaginaryFlag = val; |
272 | return true; |
273 | }); |
274 | DEFINE_bool(glow_global_fp16_placeholders, |
275 | glow::flags::ConvertPlaceholdersToFP16, |
276 | "Enable fp16 conversion for Placeholders" ); |
277 | DEFINE_validator(glow_global_fp16_placeholders, [](const char *, bool val) { |
278 | glow::flags::ConvertPlaceholdersToFP16 = val; |
279 | return true; |
280 | }); |
281 | DEFINE_bool(glow_global_fp16_constants, glow::flags::ConvertConstantsToFP16, |
282 | "Enable fp16 conversion for Constants" ); |
283 | DEFINE_validator(glow_global_fp16_constants, [](const char *, bool val) { |
284 | glow::flags::ConvertConstantsToFP16 = val; |
285 | return true; |
286 | }); |
287 | DEFINE_bool(glow_global_fused_scale_offset_fp16, |
288 | glow::flags::ConvertFusedScaleOffsetToFP16, |
289 | "Enable fp16 lowering for all op inputs using fused scale/offset" ); |
290 | DEFINE_validator(glow_global_fused_scale_offset_fp16, |
291 | [](const char *, bool val) { |
292 | glow::flags::ConvertFusedScaleOffsetToFP16 = val; |
293 | return true; |
294 | }); |
295 | DEFINE_bool( |
296 | glow_global_fused_scale_offset_fp32, |
297 | glow::flags::ConvertFusedScaleOffsetToFP32, |
298 | "Enable converting scale/offset in sls's input data from fp16 to fp32" ); |
299 | DEFINE_validator(glow_global_fused_scale_offset_fp32, |
300 | [](const char *, bool val) { |
301 | glow::flags::ConvertFusedScaleOffsetToFP32 = val; |
302 | return true; |
303 | }); |
304 | DEFINE_bool( |
305 | glow_global_force_sls_fp16_accum, glow::flags::ForceSLSToFP16Accum, |
306 | "Force all SLS/SLWS ops to use FP16 accumulation. True by default." ); |
307 | DEFINE_validator(glow_global_force_sls_fp16_accum, [](const char *, bool val) { |
308 | glow::flags::ForceSLSToFP16Accum = val; |
309 | return true; |
310 | }); |
311 | DEFINE_bool(glow_enable_quant_param_changes, |
312 | glow::flags::EnableQuantParamChanges, |
313 | "Enable quantization param changes during optimizations" ); |
314 | DEFINE_validator(glow_enable_quant_param_changes, [](const char *, bool val) { |
315 | glow::flags::EnableQuantParamChanges = val; |
316 | return true; |
317 | }); |
318 | DEFINE_bool(glow_use_sparsenn_partitioning_scheme, |
319 | glow::flags::UseSparseNNPartitioningScheme, |
320 | "Force glow to use SparseNN partitioning scheme" ); |
321 | DEFINE_validator(glow_use_sparsenn_partitioning_scheme, |
322 | [](const char *, bool val) { |
323 | glow::flags::UseSparseNNPartitioningScheme = val; |
324 | return true; |
325 | }); |
326 | DEFINE_bool(glow_sparsenn_partitioning_add_sls_concats, |
327 | glow::flags::SparseNNPartitioningAddSLSConcats, |
328 | "Add extra concats inside of SLS partitions for more efficient " |
329 | "inter-partitition transfers" ); |
330 | DEFINE_validator(glow_sparsenn_partitioning_add_sls_concats, |
331 | [](const char *, bool val) { |
332 | glow::flags::SparseNNPartitioningAddSLSConcats = val; |
333 | return true; |
334 | }); |
335 | DEFINE_bool(glow_sparsenn_partitioning_balance_perf_model, |
336 | glow::flags::SparseNNPartitioningBalancePerfModel, |
337 | "Balance SLS tables across cards using a perf model" ); |
338 | DEFINE_validator(glow_sparsenn_partitioning_balance_perf_model, |
339 | [](const char *, bool val) { |
340 | glow::flags::SparseNNPartitioningBalancePerfModel = val; |
341 | return true; |
342 | }); |
343 | DEFINE_bool(glow_sparsenn_partitioning_pair_ln_with_sls, |
344 | glow::flags::SparseNNPartitioningPairLNWithSLS, |
345 | "Put layer normalization nodes immediately following SLS into SLS " |
346 | "Partitions" ); |
347 | DEFINE_validator(glow_sparsenn_partitioning_pair_ln_with_sls, |
348 | [](const char *, bool val) { |
349 | glow::flags::SparseNNPartitioningPairLNWithSLS = val; |
350 | return true; |
351 | }); |
352 | DEFINE_bool( |
353 | glow_sparsenn_partitioning_pair_tile_with_sls, |
354 | glow::flags::SparseNNPartitioningPairTileWithSLS, |
355 | "Put tile nodes immediately following SLS for user embeddings into SLS " |
356 | "Partitions" ); |
357 | DEFINE_validator(glow_sparsenn_partitioning_pair_tile_with_sls, |
358 | [](const char *, bool val) { |
359 | glow::flags::SparseNNPartitioningPairTileWithSLS = val; |
360 | return true; |
361 | }); |
362 | DEFINE_string( |
363 | glow_sparsenn_partitioning_pair_sls_with, |
364 | glow::flags::SparseNNPartitioningPairSLSWith, |
365 | "Put nodes specified immediately following SLS into SLS partitions." |
366 | "Supported for LayerNorm, Tile, Concat, and Tanh nodes" |
367 | "Comma separated list of node names, e.g. LayerNorm,Tile." ); |
368 | DEFINE_validator(glow_sparsenn_partitioning_pair_sls_with, |
369 | [](const char *, const std::string &val) { |
370 | glow::flags::SparseNNPartitioningPairSLSWith = val; |
371 | return true; |
372 | }); |
373 | DEFINE_int32(glow_sparsenn_partitioning_concat_split_size, |
374 | glow::flags::SparseNNPartitioningConcatSplitSize, |
375 | "The number of inputs to split each concat to be moved into SLS " |
376 | "partitions to" ); |
377 | DEFINE_validator(glow_sparsenn_partitioning_concat_split_size, |
378 | [](const char *, const int32_t val) { |
379 | glow::flags::SparseNNPartitioningConcatSplitSize = val; |
380 | return true; |
381 | }); |
382 | DEFINE_bool(glow_sparsenn_parallelize_reshape_on_batch_dim, |
383 | glow::flags::SparseNNParallelizeReshapeOnBatchDim, |
384 | "Force parallelizing the reshape operators on the batch dimension" ); |
385 | DEFINE_validator(glow_sparsenn_parallelize_reshape_on_batch_dim, |
386 | [](const char *, bool val) { |
387 | glow::flags::SparseNNParallelizeReshapeOnBatchDim = val; |
388 | return true; |
389 | }); |
390 | DEFINE_bool(glow_clip_fp16, glow::flags::ClipToFP16, |
391 | "Force glow to clip fp16 values to min/max" ); |
392 | DEFINE_validator(glow_clip_fp16, [](const char *, bool val) { |
393 | glow::flags::ClipToFP16 = val; |
394 | return true; |
395 | }); |
396 | DEFINE_bool(glow_clip_fp16_skip_inputs, glow::flags::SkipInputsOnClipToFP16, |
397 | "Force glow to skip clipping fp16 Node inputs to min/max" ); |
398 | DEFINE_validator(glow_clip_fp16_skip_inputs, [](const char *, bool val) { |
399 | glow::flags::SkipInputsOnClipToFP16 = val; |
400 | return true; |
401 | }); |
402 | DEFINE_bool(glow_saturate_host, glow::flags::SaturateHost, |
403 | "Try to use all available devices on the host" ); |
404 | DEFINE_validator(glow_saturate_host, [](const char *, bool val) { |
405 | glow::flags::SaturateHost = val; |
406 | return true; |
407 | }); |
408 | DEFINE_bool( |
409 | glow_save_onnxifi_dag, glow::onnxifi::flags::SaveDAG, |
410 | "Whether to serialize the DAG that has been optimized and partitioned." ); |
411 | DEFINE_validator(glow_save_onnxifi_dag, [](const char *, bool val) { |
412 | glow::onnxifi::flags::SaveDAG = val; |
413 | return true; |
414 | }); |
415 | DEFINE_bool(glow_save_onnxifi_dag_with_constants, |
416 | glow::onnxifi::flags::SaveDAGWithConstants, |
417 | "Whether to serialize constants in the DAG that has been optimized " |
418 | "and partitioned." ); |
419 | DEFINE_validator(glow_save_onnxifi_dag_with_constants, |
420 | [](const char *, bool val) { |
421 | glow::onnxifi::flags::SaveDAGWithConstants = val; |
422 | return true; |
423 | }); |
424 | DEFINE_bool(glow_save_onnxifi_dag_in_zip_mode, |
425 | glow::onnxifi::flags::SaveDAGWithConstants, |
426 | "Whether to serialize the DAG that has been optimized and " |
427 | "partitioned in ZIP mode." ); |
428 | DEFINE_validator(glow_save_onnxifi_dag_in_zip_mode, [](const char *, bool val) { |
429 | glow::onnxifi::flags::SaveDAGInZipMode = val; |
430 | return true; |
431 | }); |
432 | DEFINE_bool( |
433 | glow_delay_and_record_constant_modification, |
434 | glow::flags::DelayAndRecordConstantModification, |
435 | "Whether to delay and record constant modification for serialization." ); |
436 | DEFINE_validator(glow_delay_and_record_constant_modification, |
437 | [](const char *, bool val) { |
438 | glow::flags::DelayAndRecordConstantModification = val; |
439 | return true; |
440 | }); |
441 | DEFINE_bool(glow_use_tracked_dummy_quant_params, |
442 | glow::flags::UseTrackedDummyQuantParams, |
443 | "Whether to use uniqued dummy quant params when loading the model, " |
444 | "which are then mapped to loaded names for serialization." ); |
445 | DEFINE_validator(glow_use_tracked_dummy_quant_params, |
446 | [](const char *, bool val) { |
447 | glow::flags::UseTrackedDummyQuantParams = val; |
448 | return true; |
449 | }); |
450 | DEFINE_bool(glow_clip_zero_scale_fp16, glow::flags::ClipZeroScaleFP16, |
451 | "Whether to clip qparam scales below 1/65504 to that val." ); |
452 | DEFINE_validator(glow_clip_zero_scale_fp16, [](const char *, bool val) { |
453 | glow::flags::ClipZeroScaleFP16 = val; |
454 | return true; |
455 | }); |
456 | DEFINE_bool(glow_clip_quant_range_to_fp16, glow::flags::ClipQuantRangeToFP16, |
457 | "Whether to clip quantization parameters inside the fp16 range." ); |
458 | DEFINE_validator(glow_clip_quant_range_to_fp16, [](const char *, bool val) { |
459 | glow::flags::ClipQuantRangeToFP16 = val; |
460 | return true; |
461 | }); |
462 | DEFINE_int32(glow_max_active_requests, glow::flags::MaxActiveRequests, |
463 | "Number of max active requests before host manager start queuing" ); |
464 | DEFINE_validator(glow_max_active_requests, [](const char *, int32_t val) { |
465 | glow::flags::MaxActiveRequests = val; |
466 | return true; |
467 | }); |
468 | DEFINE_int32(glow_max_active_requests_per_instance, |
469 | glow::flags::MaxActiveRequestsPerInstance, |
470 | "Number of max active requests per instance of a network." ); |
471 | DEFINE_validator(glow_max_active_requests_per_instance, |
472 | [](const char *, int32_t val) { |
473 | glow::flags::MaxActiveRequestsPerInstance = val; |
474 | return true; |
475 | }); |
476 | DEFINE_int32( |
477 | glow_max_queue_size, glow::flags::MaxQueueSize, |
478 | "Max number of pending requeusts in glow's host manager queue before " |
479 | "rejecting new request" ); |
480 | DEFINE_validator(glow_max_queue_size, [](const char *, int32_t val) { |
481 | glow::flags::MaxQueueSize = val; |
482 | return true; |
483 | }); |
484 | DEFINE_int32(glow_executor_threads, glow::flags::ExecutorThreads, |
485 | "Number of executor threads for host manager" ); |
486 | DEFINE_validator(glow_executor_threads, [](const char *, int32_t val) { |
487 | glow::flags::ExecutorThreads = val; |
488 | return true; |
489 | }); |
490 | DEFINE_bool(glow_partitioner_enable_load_balance, |
491 | glow::flags::EnableLoadBalancedPartitioning, |
492 | "Enable a partitioner pass to optimize for load balance in " |
493 | "addition to memory capacity constraints" ); |
494 | DEFINE_validator(glow_partitioner_enable_load_balance, |
495 | [](const char *, bool val) { |
496 | glow::flags::EnableLoadBalancedPartitioning = val; |
497 | return true; |
498 | }); |
499 | DEFINE_bool(glow_skip_provisioning, glow::flags::SkipProvisioning, |
500 | "Skip provisioning. Used for AOT opts or debugging." ); |
501 | DEFINE_validator(glow_skip_provisioning, [](const char *, bool val) { |
502 | glow::flags::SkipProvisioning = val; |
503 | return true; |
504 | }); |
505 | DEFINE_bool(glow_sink_tanh_below_concat, glow::flags::SinkTanhBelowConcat, |
506 | "Sink tanh ops below concat." ); |
507 | DEFINE_validator(glow_sink_tanh_below_concat, [](const char *, bool val) { |
508 | glow::flags::SinkTanhBelowConcat = val; |
509 | return true; |
510 | }); |
511 | DEFINE_bool(glow_save_onnxifi_model, glow::onnxifi::flags::SaveModel, |
512 | "Package the glow function and weights right before lowering" ); |
513 | DEFINE_validator(glow_save_onnxifi_model, [](const char *, bool val) { |
514 | glow::onnxifi::flags::SaveModel = val; |
515 | return true; |
516 | }); |
517 | DEFINE_bool(glow_save_onnxifi_io, glow::onnxifi::flags::SaveIO, |
518 | "Save the input and output result around ONNXIFI boundary" ); |
519 | DEFINE_validator(glow_save_onnxifi_io, [](const char *, bool val) { |
520 | glow::onnxifi::flags::SaveIO = val; |
521 | return true; |
522 | }); |
523 | DEFINE_bool(glow_enable_partial_tensors, glow::flags::EnablePartialTensors, |
524 | "Save the input and output result around ONNXIFI boundary" ); |
525 | DEFINE_validator(glow_enable_partial_tensors, [](const char *, bool val) { |
526 | glow::flags::EnablePartialTensors = val; |
527 | return true; |
528 | }); |
529 | DEFINE_bool(glow_use_custom_ops_for_export, glow::flags::UseCustomOpsForExport, |
530 | "Use custom ONNX ops when exporting Glow protos." ); |
531 | DEFINE_validator(glow_use_custom_ops_for_export, [](const char *, bool val) { |
532 | glow::flags::UseCustomOpsForExport = val; |
533 | return true; |
534 | }); |
535 | DEFINE_bool(glow_dump_graph, glow::flags::DumpGraph, |
536 | "Dump the glow Graph into files before compilation" ); |
537 | DEFINE_validator(glow_dump_graph, [](const char *, bool val) { |
538 | glow::flags::DumpGraph = val; |
539 | return true; |
540 | }); |
541 | DEFINE_string(glow_dump_graph_path, glow::flags::DumpGraphPath, |
542 | "Directory path for the dumped graphs." ); |
543 | DEFINE_validator(glow_dump_graph_path, |
544 | [](const char *, const std::string &val) { |
545 | glow::flags::DumpGraphPath = val; |
546 | return true; |
547 | }); |
548 | DEFINE_bool(glow_dump_initial_loaded_graph, glow::flags::DumpInitialLoadedGraph, |
549 | "Dump the glow Graph right after onnxification" ); |
550 | DEFINE_validator(glow_dump_initial_loaded_graph, [](const char *, bool val) { |
551 | glow::flags::DumpInitialLoadedGraph = val; |
552 | return true; |
553 | }); |
554 | DEFINE_bool(glow_use_dag_optimizer, glow::flags::UseDAGOptimizer, |
555 | "Whether to call the DAG optimizer" ); |
556 | DEFINE_validator(glow_use_dag_optimizer, [](const char *, bool val) { |
557 | glow::flags::UseDAGOptimizer = val; |
558 | return true; |
559 | }); |
560 | DEFINE_int32(glow_dag_optimizer_num_parallel_chunks, |
561 | glow::flags::DAGOptimizerNumParallelChunks, |
562 | "Number of parallel chunks for DAGOptimizer parallelization" ); |
563 | DEFINE_validator(glow_dag_optimizer_num_parallel_chunks, |
564 | [](const char *, int32_t val) { |
565 | glow::flags::DAGOptimizerNumParallelChunks = val; |
566 | return true; |
567 | }); |
568 | DEFINE_string(glow_dag_optimizer_placement_tagging_algorithm, |
569 | glow::flags::DAGOptimizerPlacementTaggingAlgorithm, |
570 | "Name of placement tagging algorithm to run in DAGOptimizer" ); |
571 | DEFINE_validator(glow_dag_optimizer_placement_tagging_algorithm, |
572 | [](const char *, const std::string &val) { |
573 | glow::flags::DAGOptimizerPlacementTaggingAlgorithm = val; |
574 | return true; |
575 | }); |
576 | |
577 | DEFINE_string( |
578 | glow_dag_optimizer_parallelization_tagging_algorithm, |
579 | glow::flags::DAGOptimizerParallelizationTaggingAlgorithm, |
580 | "Name of parallelization tagging algorithm to run in DAGOptimizer" ); |
581 | DEFINE_validator(glow_dag_optimizer_parallelization_tagging_algorithm, |
582 | [](const char *, const std::string &val) { |
583 | glow::flags::DAGOptimizerParallelizationTaggingAlgorithm = |
584 | val; |
585 | return true; |
586 | }); |
587 | // Defined in glow/lib/Backends/NNPI/NNPI.cpp |
588 | DEFINE_bool(glow_use_per_partition_icet_config, |
589 | glow::nnpi::flags::UsePerPartitionIcetConfig, |
590 | "Read an icet_config.json file for each partition" ); |
591 | DEFINE_validator(glow_use_per_partition_icet_config, |
592 | [](const char *, bool val) { |
593 | glow::nnpi::flags::UsePerPartitionIcetConfig = val; |
594 | return true; |
595 | }); |
596 | DEFINE_bool(glow_dump_nnpi_compiler_data, glow::nnpi::flags::DumpCompilerData, |
597 | "Dump the NNPI compiler data into files before NNPI compilation" ); |
598 | DEFINE_validator(glow_dump_nnpi_compiler_data, [](const char *, bool val) { |
599 | glow::nnpi::flags::DumpCompilerData = val; |
600 | return true; |
601 | }); |
602 | DEFINE_bool(glow_nnpi_specialize_all_one_sls, |
603 | glow::nnpi::flags::SpecializeAllOneSLS, |
604 | "Whether to import SLS ops with AllOne attribute to NNPI." ); |
605 | DEFINE_validator(glow_nnpi_specialize_all_one_sls, [](const char *, bool val) { |
606 | glow::nnpi::flags::SpecializeAllOneSLS = val; |
607 | return true; |
608 | }); |
609 | DEFINE_bool(glow_disable_nnpi_transforms, glow::nnpi::flags::DisableTransforms, |
610 | "Disable running NNPIBackend::transformPostLowering()." ); |
611 | DEFINE_validator(glow_disable_nnpi_transforms, [](const char *, bool val) { |
612 | glow::nnpi::flags::DisableTransforms = val; |
613 | return true; |
614 | }); |
615 | DEFINE_bool(glow_enable_nnpi_custom_ia_kernels, |
616 | glow::nnpi::flags::EnableCustomIAKernels, |
617 | "Enable running NNPIBackend::transformPrivate()." ); |
618 | DEFINE_validator(glow_enable_nnpi_custom_ia_kernels, |
619 | [](const char *, bool val) { |
620 | glow::nnpi::flags::EnableCustomIAKernels = val; |
621 | return true; |
622 | }); |
623 | DEFINE_bool(glow_enable_nnpi_custom_dsp_kernels, |
624 | glow::nnpi::flags::EnableCustomDSPKernels, |
625 | "Enable running NNPIBackend::transformPrivate()." ); |
626 | DEFINE_validator(glow_enable_nnpi_custom_dsp_kernels, |
627 | [](const char *, bool val) { |
628 | glow::nnpi::flags::EnableCustomDSPKernels = val; |
629 | return true; |
630 | }); |
631 | |
632 | DEFINE_string(glow_injected_ia_op_kernel_path, |
633 | glow::nnpi::flags::InjectedIAOpKernelPath, |
634 | "Path to IA kernels library to use" ); |
635 | DEFINE_validator(glow_injected_ia_op_kernel_path, |
636 | [](const char *, const std::string &val) { |
637 | glow::nnpi::flags::InjectedIAOpKernelPath = val; |
638 | return true; |
639 | }); |
640 | |
641 | DEFINE_bool(glow_dump_custom_kernel_files, |
642 | glow::nnpi::flags::DumpCustomKernelFiles, |
643 | "Enable dumping the compiled custom IA and DSP kernels to file." ); |
644 | DEFINE_validator(glow_dump_custom_kernel_files, [](const char *, bool val) { |
645 | glow::nnpi::flags::DumpCustomKernelFiles = val; |
646 | return true; |
647 | }); |
648 | |
649 | DEFINE_bool(glow_nnpi_lower_all_batch_matmul, |
650 | glow::nnpi::flags::LowerAllBatchMatMul, |
651 | "Whether to override default lowering for NNPI and always lower " |
652 | "BatchMatMul to a series of MatMuls." ); |
653 | DEFINE_validator(glow_nnpi_lower_all_batch_matmul, [](const char *, bool val) { |
654 | glow::nnpi::flags::LowerAllBatchMatMul = val; |
655 | return true; |
656 | }); |
657 | DEFINE_bool(glow_nnpi_accept_unary_sls, glow::nnpi::flags::AcceptUnarySLS, |
658 | "Whether to accept unary SLS ops during ONNXIFI loading." ); |
659 | DEFINE_validator(glow_nnpi_accept_unary_sls, [](const char *, bool val) { |
660 | glow::nnpi::flags::AcceptUnarySLS = val; |
661 | return true; |
662 | }); |
663 | DEFINE_int32(glow_nnpi_num_parallel_chunks, |
664 | glow::nnpi::flags::NumParallelChunks, |
665 | "Number of parallel chunks for NNPI" ); |
666 | DEFINE_validator(glow_nnpi_num_parallel_chunks, [](const char *, int32_t val) { |
667 | glow::nnpi::flags::NumParallelChunks = val; |
668 | return true; |
669 | }); |
670 | DEFINE_int32(glow_nnpi_model_parallel_split_alignment, |
671 | glow::nnpi::flags::ModelParallelSplitAlignment, |
672 | "Alignment value for model parallel splits" ); |
673 | DEFINE_validator(glow_nnpi_model_parallel_split_alignment, |
674 | [](const char *, int32_t val) { |
675 | glow::nnpi::flags::ModelParallelSplitAlignment = val; |
676 | return true; |
677 | }); |
678 | DEFINE_int32(glow_nnpi_memory, glow::runtime::flags::NNPIMemory, |
679 | "Amount of DRAM to allocate per NNPI device in KiB" ); |
680 | DEFINE_validator(glow_nnpi_memory, [](const char *, int32_t val) { |
681 | glow::runtime::flags::NNPIMemory = val; |
682 | return true; |
683 | }); |
684 | DEFINE_int32(glow_nnpi_timeout_ms, glow::runtime::flags::NNPITimeoutMs, |
685 | "Timeout threshold for inferecnce in milliseconds. Default 0 " |
686 | "means infinity" ); |
687 | DEFINE_validator(glow_nnpi_timeout_ms, [](const char *, int32_t val) { |
688 | glow::runtime::flags::NNPITimeoutMs = val; |
689 | return true; |
690 | }); |
691 | |
692 | DEFINE_bool(glow_interpreter_lower_batch_matmul, |
693 | glow::interpreter::flags::LowerBatchMatMul, |
694 | "Lower batch matmul node." ); |
695 | DEFINE_validator(glow_interpreter_lower_batch_matmul, |
696 | [](const char *, bool val) { |
697 | glow::interpreter::flags::LowerBatchMatMul = val; |
698 | return true; |
699 | }); |
700 | DEFINE_bool(glow_interpreter_lower_layer_normalization, |
701 | glow::interpreter::flags::LowerLayerNormalization, |
702 | "Lower layer normalization node." ); |
703 | DEFINE_validator(glow_interpreter_lower_layer_normalization, |
704 | [](const char *, bool val) { |
705 | glow::interpreter::flags::LowerLayerNormalization = val; |
706 | return true; |
707 | }); |
708 | |
709 | DEFINE_int32(glow_interpreter_memory, glow::runtime::flags::InterpreterMemory, |
710 | "Amount of DRAM to allocate per Interpreter in KiB" ); |
711 | DEFINE_validator(glow_interpreter_memory, [](const char *, int32_t val) { |
712 | glow::runtime::flags::InterpreterMemory = val; |
713 | return true; |
714 | }); |
715 | DEFINE_int32(glow_cpu_memory, glow::runtime::flags::CPUMemory, |
716 | "Amount of DRAM to allocate per CPU in KiB" ); |
717 | DEFINE_validator(glow_cpu_memory, [](const char *, int32_t val) { |
718 | glow::runtime::flags::CPUMemory = val; |
719 | return true; |
720 | }); |
721 | |
722 | DEFINE_int32(glow_habana_memory, glow::runtime::flags::HabanaMemory, |
723 | "Amount of DRAM to allocate per Habana device in KiB" ); |
724 | DEFINE_validator(glow_habana_memory, [](const char *, int32_t val) { |
725 | glow::runtime::flags::HabanaMemory = val; |
726 | return true; |
727 | }); |
728 | |
729 | DEFINE_int32( |
730 | glow_num_compilation_threads, glow::runtime::flags::NumCompilationThreads, |
731 | "Maximum number of threads to spawn per call to Backend::compileFunctions" ); |
732 | DEFINE_validator(glow_num_compilation_threads, [](const char *, int32_t val) { |
733 | glow::runtime::flags::NumCompilationThreads = val; |
734 | return true; |
735 | }); |
736 | |
737 | DEFINE_bool(glow_log_partition, glow::flags::LogPartition, |
738 | "Enable logging partition info" ); |
739 | DEFINE_validator(glow_log_partition, [](const char *, bool val) { |
740 | glow::flags::LogPartition = val; |
741 | return true; |
742 | }); |
743 | DEFINE_bool(glow_enable_p2p, glow::runtime::flags::EnableP2P, |
744 | "Enable peer to peer support" ); |
745 | DEFINE_validator(glow_enable_p2p, [](const char *, bool val) { |
746 | glow::runtime::flags::EnableP2P = val; |
747 | return true; |
748 | }); |
749 | DEFINE_bool(glow_enable_drt, glow::runtime::flags::EnableDRT, |
750 | "Enable device resident tensor support" ); |
751 | DEFINE_validator(glow_enable_drt, [](const char *, bool val) { |
752 | glow::runtime::flags::EnableDRT = val; |
753 | return true; |
754 | }); |
755 | DEFINE_int32(glow_device_init_timeout_ms, |
756 | glow::runtime::flags::DeviceInitTimeoutMs, |
757 | "Timeout threshold for device initialization in milliseconds. " |
758 | "Default 5000" ); |
759 | DEFINE_validator(glow_device_init_timeout_ms, [](const char *, int32_t val) { |
760 | glow::runtime::flags::DeviceInitTimeoutMs = val; |
761 | return true; |
762 | }); |
763 | DEFINE_uint64( |
764 | glow_partition_big_table_threshold_bytes, |
765 | glow::runtime::flags::BigTableThresholdBytes, |
766 | "Threshold to determin big tables, and used in partitioning algorithm. " |
767 | "Default 104857600(100MB)" ); |
768 | DEFINE_validator(glow_partition_big_table_threshold_bytes, |
769 | [](const char *, uint64_t val) { |
770 | glow::runtime::flags::BigTableThresholdBytes = val; |
771 | return true; |
772 | }); |
773 | DEFINE_int32(glow_enable_sanitize_inputs, |
774 | glow::runtime::flags::SanitizeInputsPercent, |
775 | "Sanitize a percentage of inferences" ); |
776 | DEFINE_validator(glow_enable_sanitize_inputs, [](const char *, int32_t val) { |
777 | if (val < 0 || val > 100) { |
778 | return false; |
779 | } |
780 | |
781 | glow::runtime::flags::SanitizeInputsPercent = val; |
782 | return true; |
783 | }); |
784 | |
785 | DEFINE_bool(glow_dump_partition, glow::flags::DumpPartition, |
786 | "Enable dumping the graph of each partition" ); |
787 | DEFINE_validator(glow_dump_partition, [](const char *, bool val) { |
788 | glow::flags::DumpPartition = val; |
789 | return true; |
790 | }); |
791 | DEFINE_bool(glow_dump_compilation_log, glow::flags::DumpCompilationLog, |
792 | "Dump the glow compilation log into /tmp during compilation" ); |
793 | DEFINE_validator(glow_dump_compilation_log, [](const char *, bool val) { |
794 | glow::flags::DumpCompilationLog = val; |
795 | return true; |
796 | }); |
797 | DEFINE_bool(glow_dump_backend_specific_ir_json, |
798 | glow::flags::DumpBackendSpecificIRJSON, |
799 | "Dump the backend-specific IR JSON file" ); |
800 | DEFINE_validator(glow_dump_backend_specific_ir_json, |
801 | [](const char *, bool val) { |
802 | glow::flags::DumpBackendSpecificIRJSON = val; |
803 | return true; |
804 | }); |
805 | DEFINE_string(glow_backend_specific_opts, glow::flags::BackendSpecificOpts, |
806 | "Glow backend specific options. Comma separated list of " |
807 | "key=value pairs, e.g. key1=val1,key2=val2." ); |
808 | DEFINE_validator(glow_backend_specific_opts, |
809 | [](const char *, const std::string &val) { |
810 | glow::flags::BackendSpecificOpts = val; |
811 | return true; |
812 | }); |
813 | |
814 | bool glow::flags::processBackendSpecificOpts( |
815 | std::map<std::string, std::string> &optsMap, llvm::StringRef optsStr) { |
816 | if (optsStr.empty()) { |
817 | return true; |
818 | } |
819 | llvm::SmallVector<llvm::StringRef, 4> splitOpts; |
820 | optsStr.split(splitOpts, ','); |
821 | |
822 | for (const llvm::StringRef &opt : splitOpts) { |
823 | LOG(INFO) << "Adding backend specific option: " << opt.str(); |
824 | auto keyValPair = opt.split('='); |
825 | if (keyValPair.second.empty()) { |
826 | LOG(ERROR) << "No '=' found in backend-specific opt " << opt.str(); |
827 | return false; |
828 | } |
829 | optsMap.emplace(keyValPair.first, keyValPair.second); |
830 | } |
831 | return true; |
832 | } |
833 | |
834 | namespace { |
835 | llvm::cl::OptionCategory flagsLibCat("Glow Flags Lib CmdLine Options" ); |
836 | /// Allows enabling DRT support. |
837 | llvm::cl::opt<bool, /* ExternalStorage */ true> |
838 | enableDRT("enable-DRT" , |
839 | llvm::cl::desc( |
840 | "Deprecated. Enabled DRT support. Alias to glow_enable_drt." ), |
841 | llvm::cl::Optional, |
842 | llvm::cl::location(glow::runtime::flags::EnableDRT), |
843 | llvm::cl::cat(flagsLibCat)); |
844 | |
845 | /// Allows enabling P2P support. |
846 | llvm::cl::opt<bool, /* ExternalStorage */ true> |
847 | enableP2P("enable-P2P" , |
848 | llvm::cl::desc( |
849 | "Deprecated. Enabled P2P support. Alias to glow_enable_drt." ), |
850 | llvm::cl::Optional, |
851 | llvm::cl::location(glow::runtime::flags::EnableP2P), |
852 | llvm::cl::cat(flagsLibCat)); |
853 | } // namespace |
854 | |