1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include "glow/PassManager/Pipeline.h" |
17 | |
18 | #include "glow/Optimizer/GraphOptimizer/FunctionPassManager.h" |
19 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
20 | #include "glow/PassManager/PassConfigUtils.h" |
21 | #include "glow/Support/Memory.h" |
22 | |
23 | #include <fstream> |
24 | |
25 | namespace { |
26 | /// A helper class to represent a FunctionPassConfig in a way which can be |
27 | /// easily handled by YAML functions. |
28 | struct FunctionPassConfigHelper { |
29 | std::string passName; |
30 | glow::ConvergenceMode convergenceMode; |
31 | CompilationModes enabledCompilationModes; |
32 | glow::DCERequiredMode dceMode; |
33 | FunctionPassConfigHelper(const glow::FunctionPassConfig &config) |
34 | : passName(config.getNameOfPass()), |
35 | convergenceMode(config.getConvergenceMode()), |
36 | enabledCompilationModes(config.getEnabledCompilationModes()), |
37 | dceMode(config.getDCERequiredMode()) {} |
38 | FunctionPassConfigHelper() = default; |
39 | }; |
40 | } // namespace |
41 | |
42 | namespace llvm { |
43 | namespace yaml { |
44 | /// Define the YAML mapping for PassConfigHelper. |
45 | template <> struct MappingTraits<FunctionPassConfigHelper> { |
46 | static void mapping(IO &io, FunctionPassConfigHelper &config) { |
47 | io.mapRequired("passName" , config.passName); |
48 | io.mapRequired("convergenceMode" , config.convergenceMode); |
49 | io.mapRequired("enabledCompilationModes" , config.enabledCompilationModes); |
50 | io.mapRequired("dceMode" , config.dceMode); |
51 | } |
52 | }; |
53 | } // namespace yaml |
54 | } // namespace llvm |
55 | |
56 | namespace glow { |
57 | |
58 | std::unique_ptr<FunctionPassPipeline> |
59 | createDefaultGraphOptimizationPassPipeline() { |
60 | std::initializer_list<FunctionPassConfig> configs{ |
61 | // Eliminate nodes which do not do anything. Do it as early as |
62 | // possible to prevent such nodes from affecting other optimizations. |
63 | {FunctionPassID::EliminateNoop}, |
64 | |
65 | // Sink transpose operations in an attempt to cancel them out. |
66 | // Perform code sinking until a fixed-point is reached. |
67 | // On big functions, the number of iterations until the fixpoint |
68 | // is usually at most 2 or 3 iterations. |
69 | {FunctionPassID::SinkCode, ConvergenceMode::UntilFixedPoint}, |
70 | |
71 | // ConvTranspose + BiasAdd |
72 | {FunctionPassID::ConvTransposeBiasAddFold}, |
73 | |
74 | // Transposes that don't move data are optimized into Reshapes, which |
75 | // enables further optimizations. |
76 | {FunctionPassID::OptimizeTransposeIntoReshape}, |
77 | |
78 | // Optimize arithmetic nodes based on algebraic identities. |
79 | {FunctionPassID::OptimizeArithmeticNodes}, |
80 | |
81 | // Fold some Arithmetic ops following a LayerNorm into LayerNorm. |
82 | {FunctionPassID::FoldLayerNormArithmetic}, |
83 | |
84 | // Reshapes and transposes can prevent other optimizations from |
85 | // triggering, |
86 | // so try to optimize them out first. |
87 | {FunctionPassID::OptimizeReshape}, |
88 | |
89 | {FunctionPassID::TransposeConstants, |
90 | ConvergenceMode::OnePass, |
91 | {CompilationMode::Infer}}, |
92 | |
93 | // Perform Common Subexpression Elimination. |
94 | {FunctionPassID::CSE}, |
95 | |
96 | // Optimize Pad nodes |
97 | {FunctionPassID::MergePadIntoConvolution}, |
98 | |
99 | // Optimize Convolution nodes with small input tensors. |
100 | {FunctionPassID::OptimizeSmallConv}, |
101 | |
102 | // Merge multiple matmul nodes into a single large matmul. |
103 | {FunctionPassID::MergeMatMulOnLHS}, |
104 | {FunctionPassID::MergeMatMulOnRHS}, |
105 | // Merge multiple batched adds into a larger batched add. |
106 | {FunctionPassID::MergeBatchedAdd}, |
107 | |
108 | // Merge ReduceMean into AveragePool if possible. |
109 | {FunctionPassID::OptimizeReduceMean}, |
110 | |
111 | // Optimize Resize nodes. |
112 | {FunctionPassID::OptimizeResize}, |
113 | |
114 | // Optimize Insert nodes. |
115 | {FunctionPassID::OptimizeInsert}, |
116 | |
117 | // Convert BatchMatMuls with a broadcasted RHS to a single MatMul. |
118 | {FunctionPassID::ConvertBroadcastedBatchMatMul}, |
119 | |
120 | // Eliminate nodes which do not do anything. |
121 | {FunctionPassID::EliminateNoop}, |
122 | |
123 | // Perform Common Subexpression Elimination. |
124 | {FunctionPassID::CSE}, |
125 | |
126 | // Optimize arithmetic nodes based on algebraic identities. |
127 | {FunctionPassID::OptimizeArithmeticNodes}, |
128 | |
129 | // Optimize Splat nodes. |
130 | {FunctionPassID::OptimizeSplat}, |
131 | |
132 | // Optimize Concat nodes. |
133 | {FunctionPassID::OptimizeConcatNodes}, |
134 | |
135 | // Eliminate Concat-Slice patterns which are unnecessary. |
136 | {FunctionPassID::EliminateConcatSlice}, |
137 | |
138 | // Merge Transpose into MatMul/FC. |
139 | {FunctionPassID::MergeTransposeIntoMatMulOrFC}, |
140 | |
141 | // Optimize away intermediate type conversions. |
142 | {FunctionPassID::OptimizeConversions}, |
143 | |
144 | // Eliminate clips outside the FP16 range. This is a specialized pass that |
145 | // is disabled by default. |
146 | {FunctionPassID::EliminateClipsOutsideFP16Range}, |
147 | |
148 | // Look for float Relus that we can fuse up into quantized FCs. |
149 | {FunctionPassID::OptimizeQuantFCFloatRelu}, |
150 | |
151 | // Eliminate clips outside the FP16 range. This is a specialized pass that |
152 | // is disabled by default. |
153 | {FunctionPassID::EliminateClipsOutsideFP16Range}, |
154 | |
155 | // Optimize away intermediate consecutive Clips. |
156 | {FunctionPassID::OptimizeClips}, |
157 | |
158 | // Optimize quantization related operators. |
159 | {FunctionPassID::OptimizeQuantization, ConvergenceMode::UntilFixedPoint}, |
160 | |
161 | // Optimize patterns of concats with quantization/dequantization. |
162 | {FunctionPassID::OptimizeConcatQuantization}, |
163 | |
164 | // Optimize reshapes introduced during above optimizations. |
165 | {FunctionPassID::OptimizeReshape}, |
166 | |
167 | // Run a round of constant folding. |
168 | {FunctionPassID::ConstantFold}, |
169 | |
170 | // Optimize Gather with const scalar index to Slice. |
171 | {FunctionPassID::GatherToSlice}, |
172 | |
173 | // Optimize combinations of Quantized Nodes and Clips. |
174 | {FunctionPassID::OptimizeQuantizeClip}, |
175 | |
176 | // Remove identity Relu and Clip nodes. |
177 | {FunctionPassID::RemoveIdentityRelu}, |
178 | {FunctionPassID::RemoveIdentityClip}, |
179 | |
180 | // Fold a Convolution dilated manually using Transpose, SpaceToDepth and |
181 | // DepthToSpace nodes into a single Convolution node. |
182 | // Run Reshape/Transpose optimizations afterwards to clean up the graph. |
183 | {FunctionPassID::FoldDilatedConv}, |
184 | {FunctionPassID::OptimizeReshape}, |
185 | {FunctionPassID::SinkCode, ConvergenceMode::UntilFixedPoint}, |
186 | |
187 | // Fold Arithmetic chain w/ constants into Batch Norm, when Conv preceeds. |
188 | {FunctionPassID::FoldArithmeticChainUnderConvIntoBN, |
189 | ConvergenceMode::OnePass, |
190 | {CompilationMode::Infer}}, |
191 | |
192 | // Fold Arithmetic chain w/ constants into the preceding Batch Norm. |
193 | {FunctionPassID::FoldBatchNormalizationWithArithmeticChain, |
194 | ConvergenceMode::OnePass, |
195 | {CompilationMode::Infer}}, |
196 | |
197 | // Merge batch normalization operations. |
198 | // Do after transpose constant folding, as weight transposes can prevent |
199 | // the optimization from triggering. |
200 | {FunctionPassID::OptimizeBatchNorm, |
201 | ConvergenceMode::UntilFixedPoint, |
202 | {CompilationMode::Infer}}, |
203 | |
204 | // Try to remove unnecessary Split-Concat operations |
205 | {FunctionPassID::EliminateSliceConcat}, |
206 | |
207 | // Perform Common Subexpression Elimination. |
208 | {FunctionPassID::CSE}, |
209 | |
210 | // Some sinking transformations are harmful for performance if a sunken |
211 | // node does not get optimized out (e.g. sinking of Transpose below Tile). |
212 | // Run code hoisting pass to undo such unsuccessful sinking. |
213 | {FunctionPassID::HoistCode, ConvergenceMode::UntilFixedPoint}, |
214 | |
215 | // Try to eliminate Reshape nodes by sinking them through the graph. |
216 | // Such sinking can create new optimization opportunities as well as |
217 | // prevent some optimizations from happening, so do it at the very end of |
218 | // the pipeline to keep the current iteration unaffected and bear all |
219 | // benefits/consequences on the next pipeline iteration. |
220 | {FunctionPassID::SinkReshapes, ConvergenceMode::UntilFixedPoint}, |
221 | {FunctionPassID::OptimizeReshape}, |
222 | |
223 | // Perform a round of Dead Code Elimination to cleanup the final pass. |
224 | getDCEPassConfig(), |
225 | }; |
226 | return glow::make_unique<FunctionPassPipeline>(configs); |
227 | } |
228 | |
229 | std::unique_ptr<FunctionPassPipeline> |
230 | createFP16GraphOptimizationPassPipeline() { |
231 | std::initializer_list<FunctionPassConfig> configs{ |
232 | // Optimize away intermediate type conversions. |
233 | {FunctionPassID::OptimizeConversions}, |
234 | |
235 | // Eliminate clips outside the FP16 range. This is a specialized pass that |
236 | // is disabled by default. |
237 | {FunctionPassID::EliminateClipsOutsideFP16Range}, |
238 | |
239 | // Look for float Relus that we can fuse up into quantized FCs. |
240 | {FunctionPassID::OptimizeQuantFCFloatRelu}, |
241 | |
242 | // Eliminate clips outside the FP16 range. This is a specialized pass that |
243 | // is disabled by default. |
244 | {FunctionPassID::EliminateClipsOutsideFP16Range}, |
245 | |
246 | // Optimize away intermediate consecutive Clips. |
247 | {FunctionPassID::OptimizeClips}, |
248 | }; |
249 | return glow::make_unique<FunctionPassPipeline>(configs); |
250 | } |
251 | |
252 | std::unique_ptr<FunctionPassPipeline> createDefaultFoldPassPipeline() { |
253 | std::initializer_list<FunctionPassConfig> configs{ |
254 | // Optimize arithmetic nodes based on algebraic identities. |
255 | // In this function, constant operators in communative nodes are moved to |
256 | // the RHS. Some folding functions depend on this. (e.g. FoldMinMaxToClip) |
257 | {FunctionPassID::OptimizeArithmeticNodes}, |
258 | |
259 | // Get Reshape nodes merged into constants to simplify folding. |
260 | {FunctionPassID::OptimizeReshape}, |
261 | |
262 | // Fold sub-graphs corresponding to leakyRelu. |
263 | {FunctionPassID::FoldLeakyRelu}, |
264 | |
265 | // Fold Reshape->Transpose->Reshape into ChannelShuffle when applicable. |
266 | {FunctionPassID::FoldChannelShuffle}, |
267 | |
268 | // Fold MatMul->Add into FullyConnected. |
269 | {FunctionPassID::FoldMatMulAddIntoFullyConnected}, |
270 | |
271 | // Fold Min + Max to Clip |
272 | {FunctionPassID::FoldMinMaxToClip}, |
273 | |
274 | // Fold exp + reduce sum + div into softmax |
275 | {FunctionPassID::FoldExpSumDivIntoSoftmax}, |
276 | |
277 | // Perform Dead Code Elimination. |
278 | getDCEPassConfig(), |
279 | }; |
280 | return glow::make_unique<FunctionPassPipeline>(configs); |
281 | } |
282 | |
283 | FunctionPassConfig getDCEPassConfig() { |
284 | return FunctionPassConfig( |
285 | FunctionPassID::DCE, ConvergenceMode::OnePass, |
286 | std::set<CompilationMode>{CompilationMode::Infer, CompilationMode::Train}, |
287 | DCERequiredMode::None); |
288 | } |
289 | |
290 | llvm::StringRef getNameOfPass(FunctionPassID passID) { |
291 | switch (passID) { |
292 | #define FUN_PASS(PASS_NAME) \ |
293 | case FunctionPassID::PASS_NAME: \ |
294 | return #PASS_NAME; |
295 | #include "glow/Optimizer/GraphOptimizer/FunctionPasses.def" |
296 | } |
297 | llvm_unreachable("Unexpected pass." ); |
298 | } |
299 | |
300 | llvm::StringRef FunctionPassConfig::getNameOfPass() const { |
301 | return glow::getNameOfPass(getPassID()); |
302 | } |
303 | |
304 | #define FUN_PASS(PASS_NAME) {#PASS_NAME, FunctionPassID::PASS_NAME}, |
305 | |
306 | static llvm::StringMap<FunctionPassID> passNameToID{ |
307 | #include "glow/Optimizer/GraphOptimizer/FunctionPasses.def" |
308 | }; |
309 | |
310 | static FunctionPassID getPassID(llvm::StringRef name) { |
311 | CHECK_GT(passNameToID.count(name), 0) << "Unknown pass name: " << name.str(); |
312 | return passNameToID.lookup(name); |
313 | } |
314 | |
315 | template <> |
316 | void FunctionPassPipeline::initFromFile(llvm::StringRef pipelineDefFilename) { |
317 | clear(); |
318 | auto configs = deserializeFromYaml<std::vector<FunctionPassConfigHelper>>( |
319 | pipelineDefFilename); |
320 | for (auto &config : configs) { |
321 | FunctionPassConfig functionPassConfig( |
322 | getPassID(config.passName), config.convergenceMode, |
323 | config.enabledCompilationModes, config.dceMode); |
324 | pushBack(functionPassConfig); |
325 | } |
326 | } |
327 | |
328 | template <> |
329 | void FunctionPassPipeline::dumpToFile(llvm::StringRef pipelineDefFilename) { |
330 | std::vector<FunctionPassConfigHelper> configs; |
331 | for (unsigned idx = 0, e = size(); idx < e; ++idx) { |
332 | const auto &config = at(idx); |
333 | configs.emplace_back(FunctionPassConfigHelper(config)); |
334 | } |
335 | serializeToYaml(pipelineDefFilename, configs); |
336 | } |
337 | |
338 | static constexpr char const *tab = " " ; |
339 | |
340 | void FunctionPassConfig::dump(llvm::raw_ostream &os, |
341 | llvm::StringRef passName) const { |
342 | PassConfigBase::dump(os, passName); |
343 | |
344 | os << tab << "DCERequiredMode: " ; |
345 | switch (getDCERequiredMode()) { |
346 | case DCERequiredMode::BeforePass: |
347 | os << "BeforePass," ; |
348 | break; |
349 | case DCERequiredMode::None: |
350 | os << "None," ; |
351 | break; |
352 | } |
353 | os << "\n" ; |
354 | } |
355 | |
356 | bool FunctionPassConfig::equals(const PassConfigBase &other) const { |
357 | return dceMode_ == static_cast<const FunctionPassConfig &>(other).dceMode_ && |
358 | PassConfigBase::equals(other); |
359 | } |
360 | |
361 | } // namespace glow |
362 | |