1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #ifndef GLOW_OPTIMIZER_GRAPHOPTIMIZER_GRAPHOPTIMIZER_H |
17 | #define GLOW_OPTIMIZER_GRAPHOPTIMIZER_GRAPHOPTIMIZER_H |
18 | |
19 | #include "glow/Optimizer/GraphOptimizer/CompilationContext.h" |
20 | #include "glow/Optimizer/GraphOptimizer/FunctionPassManager.h" |
21 | #include "glow/Support/Error.h" |
22 | |
23 | #include "llvm/ADT/ArrayRef.h" |
24 | |
25 | namespace glow { |
26 | |
27 | class Function; |
28 | class Backend; |
29 | class Module; |
30 | class PlaceholderBindings; |
31 | class Placeholder; |
32 | |
33 | namespace runtime { |
34 | struct DeviceInfo; |
35 | } |
36 | |
37 | /// Use to keep a record what happened during constant folding -- key is the |
38 | /// Constant created during constant folding, and associated value is the |
39 | /// SaveNode from the constant folding partition that saved that Constant when |
40 | /// it was run. |
41 | using ConstantFoldingRecordMap = llvm::DenseMap<Constant *, SaveNode *>; |
42 | |
43 | /// Perform optimizations on the graph representation. |
44 | void optimize(Function *F, const CompilationContext &cctx); |
45 | void optimize(Function *F, CompilationMode mode); |
46 | void optimize(Function *F, const CompilationContext &cctx, const Backend &B); |
47 | |
48 | /// Delete unused Constants from \p mod. |
49 | void deleteUnusedConstants(Module &mod); |
50 | |
51 | /// Fold nodes that were expressed lowered in the input model. |
52 | void fold(Function *F, const CompilationContext &cctx, |
53 | const Backend *B = nullptr); |
54 | |
55 | /// Performs the actual constant quantization in function \p F. |
56 | void convertQuantizedConstants(Function *F, CompilationContext &cctx); |
57 | |
58 | /// Lower the high-level neural network nodes found in \p F into low-level |
59 | /// linear algebra operators. If \p B is not a nullptr then it can prevent |
60 | /// lowering of a node via \ref Backend::shouldLower(); otherwise everything |
61 | /// will be lowered. \p cctx will contain a mapping of loweredMap from output |
62 | /// names of the nodes found and lowered in \p F to the output names of the |
63 | /// nodes they were lowered from along with the NodeKind. \p doNotLowerKinds is |
64 | /// a set of NodeKinds which represents all Nodes that should not be lowered. |
65 | void lower(Function *F, CompilationContext &cctx, const Backend *B = nullptr, |
66 | const KindSet &doNotLowerKinds = {}); |
67 | |
68 | /// Convert placeholders in Module \p M to constants based on the values in \p |
69 | /// bindings. Do not convert any placeholders explicitly listed in \p vars. |
70 | void convertPlaceholdersToConstants(Function *F, |
71 | const PlaceholderBindings &bindings, |
72 | llvm::ArrayRef<Placeholder *> vars); |
73 | |
74 | /// Instrument function \p F by inserting quantization profile nodes for |
75 | /// capturing stats for quantization. The nodes will refer to tensors allocate |
76 | /// in context \p bindings. The instrumentation for profiling will be performed |
77 | /// according to the profiling configuration \p profConfig. |
78 | void profileQuantization( |
79 | PlaceholderBindings &bindings, Function *F, |
80 | const quantization::ProfilingConfiguration &profConfig); |
81 | |
82 | /// Optimize the Function \p F given compilation options \p cctx for Backend \B. |
83 | /// \returns success if all nodes in the final resulting optimized Function are |
84 | /// supported by \p B; if not, this represents a compiler error. |
85 | Error optimizeFunction(Function *F, const Backend &B, CompilationContext &cctx, |
86 | const glow::runtime::DeviceInfo *devInfo = nullptr); |
87 | |
88 | /// Optimize the Function \p F given compilation options \p cctx performing |
89 | /// backend-independent optimizations that can be done before lowering. |
90 | /// \returns success if there were no compiler errors; if not, this represents a |
91 | /// compiler error. |
92 | Error optimizeFunctionBeforeLowering(Function *F, CompilationContext &cctx); |
93 | |
94 | /// Helper function that may transform \p F given preferences of \p cctx and |
95 | /// \p B. The specific transformations are done based on the |
96 | /// PrecisionConfiguration found in \p cctx. This could include quantization, |
97 | /// profiling, and FP16 conversion. |
98 | void transformForPrecisionMode(const Backend &B, Function *F, |
99 | CompilationContext &cctx); |
100 | |
101 | /// Perform a compile-time constant folding of the node \p N. |
102 | /// \returns list of constants which are the result of the constant-folding. |
103 | /// These constants correspond to results of the node. If no constant folding |
104 | /// was possible an empty vector will be returned. If \p foldSingleSplats then |
105 | /// single splat subgraphs will be forced to fold. |
106 | std::vector<Constant *> constantFold(Node *N, bool foldSingleSplats = false); |
107 | |
108 | /// Perform constant folding for all Nodes in \p F given \p cctx. \returns a |
109 | /// record of what Constants are created by what SaveNodes pointing to |
110 | /// Placeholders that replace them. The Functions and output Placeholders |
111 | /// created for running the constant folding subgraph are not deleted from the |
112 | /// module for this API. |
113 | ConstantFoldingRecordMap constantFoldAndRecord(Function *F, |
114 | const CompilationContext &cctx); |
115 | |
116 | /// Given \p record, remove the constant folding Functions and their associated |
117 | /// output Placeholder from \p mod and \p bindings. |
118 | void cleanupConstantFolding(Module &mod, const ConstantFoldingRecordMap &record, |
119 | PlaceholderBindings *bindings = nullptr); |
120 | |
121 | /// Execute function \p F by the \p backend using the provided \p bindings and |
122 | /// the compilation context \p cctx. If \p enableQuantizeConstFolding then |
123 | /// QuantizeNodes can be folded as part of a constant chain. |
124 | /// \returns error if function is not a constant function. |
125 | Error executeConstantFunction(Backend &backend, Function &F, |
126 | PlaceholderBindings &bindings, |
127 | CompilationContext &cctx, |
128 | bool enableQuantizeConstFolding = false); |
129 | |
130 | /// Perform vertical split of FC weights in a given function. |
131 | /// Optimization could facilitate parallel execution of FCs on multiple device |
132 | /// cores. |
133 | /// \returns true in case split took place. |
134 | /// \param[in,out] F function to optimize. |
135 | /// \param[in] numOfChunks number of chunks to split weights and bias into. |
136 | /// \param[in] minKToSplit minimum size of the second dimension of weights |
137 | /// when the split is applied. |
138 | bool executeVerticalFCWeightsSplit(Function *F, unsigned numOfChunks, |
139 | unsigned minKToSplit); |
140 | |
141 | /// Represents what kind of parallelization transformation should be performed |
142 | /// by \ref parallelizeOps(). |
143 | /// \p Data indicates splitting along batch axis (dim = 0) |
144 | /// \p Model indicates splitting along dim = 1 |
145 | /// \p Model[n] where \p n is in \p [1-5] indicates splitting along dim = \p n |
146 | enum class ParallelTransformKind { |
147 | None, |
148 | Data, |
149 | Model, |
150 | Model_Axis1, |
151 | Model_Axis2, |
152 | Model_Axis3, |
153 | Model_Axis4, |
154 | Model_Axis5 |
155 | }; |
156 | |
157 | /// A specialized ScopeGuard which prevents constant modification from occuring |
158 | /// by swappiing in temporary Placeholders in place of Constants during the |
159 | /// scope of the ConstantModificationPreventer. Automatically replaces the |
160 | /// Constants back once the scope ends. |
161 | class ConstantModificationPreventer : protected ScopeGuard { |
162 | /// Module which contains Constants we want to prevent modification of. |
163 | Module &mod_; |
164 | |
165 | /// CompilationContext under which we're compiling. |
166 | CompilationContext &cctx_; |
167 | |
168 | /// Original setting in \ref cctx_ for if constant folding was enabled. |
169 | bool origEnableConstantFolding_; |
170 | |
171 | /// Map from temporary Placeholders to the Constants they replaced. |
172 | std::unordered_map<Placeholder *, Constant *> tmpPHToConstMap_; |
173 | |
174 | public: |
175 | /// Ctor. |
176 | ConstantModificationPreventer(Module &mod, CompilationContext &cctx); |
177 | |
178 | /// Make not copyable. |
179 | ConstantModificationPreventer(const ConstantModificationPreventer &) = delete; |
180 | |
181 | /// Make not assignable. |
182 | ConstantModificationPreventer & |
183 | operator=(const ConstantModificationPreventer &) = delete; |
184 | |
185 | /// \returns the mapping of tmp PH to Constants. |
186 | const std::unordered_map<Placeholder *, Constant *> &getMapping() const { |
187 | return tmpPHToConstMap_; |
188 | } |
189 | |
190 | /// Activate the preventer. By default it is deactivated when constructed. |
191 | void activate(); |
192 | |
193 | /// Deactivate the preventer and cleanup. This just forwards to |
194 | /// ScopeGuard::runAndDismiss(), which would have otherwise occurred when |
195 | /// falling out of scope. |
196 | void deactivateAndCleanup() { runAndDismiss(); } |
197 | }; |
198 | |
199 | /// Perform data or model parallel transformation of supported Nodes in \p F. |
200 | /// \p numOfChunksMap maps Nodes to how many chunks they should be split into; |
201 | /// if not listed this falls back to \p numOfChunks. \p parOpts represents what |
202 | /// kind of parallelism to use. \p modelParallelSplitAlignment optionally can |
203 | /// increase the size of model parallel splits to multiple of the given value. |
204 | /// \returns an expected map of Nodes from \p F to the ConcatNode that they were |
205 | /// replaced with. |
206 | Expected<std::unordered_map<Node *, ConcatNode *>> |
207 | parallelizeOps(Function *F, |
208 | const llvm::DenseMap<Node *, size_t> &numOfChunksMap, |
209 | const llvm::DenseMap<Node *, ParallelTransformKind> &parOpts, |
210 | size_t numOfChunks = 1, size_t modelParallelSplitAlignment = 1); |
211 | |
212 | /// Update quantized Relu output types found in \p F that have negative min to |
213 | /// have min of zero. This normally happens during graph optz, but during AOT |
214 | /// the qparams can not be calculated AOT because all qparams are dummies. |
215 | void updateQuantReluTypes(Function *F); |
216 | |
217 | } // namespace glow |
218 | |
219 | #endif // GLOW_OPTIMIZER_GRAPHOPTIMIZER_GRAPHOPTIMIZER_H |
220 | |