1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef GLOW_OPTIMIZER_GRAPHOPTIMIZER_GRAPHOPTIMIZER_H
17#define GLOW_OPTIMIZER_GRAPHOPTIMIZER_GRAPHOPTIMIZER_H
18
19#include "glow/Optimizer/GraphOptimizer/CompilationContext.h"
20#include "glow/Optimizer/GraphOptimizer/FunctionPassManager.h"
21#include "glow/Support/Error.h"
22
23#include "llvm/ADT/ArrayRef.h"
24
25namespace glow {
26
27class Function;
28class Backend;
29class Module;
30class PlaceholderBindings;
31class Placeholder;
32
33namespace runtime {
34struct DeviceInfo;
35}
36
37/// Use to keep a record what happened during constant folding -- key is the
38/// Constant created during constant folding, and associated value is the
39/// SaveNode from the constant folding partition that saved that Constant when
40/// it was run.
41using ConstantFoldingRecordMap = llvm::DenseMap<Constant *, SaveNode *>;
42
43/// Perform optimizations on the graph representation.
44void optimize(Function *F, const CompilationContext &cctx);
45void optimize(Function *F, CompilationMode mode);
46void optimize(Function *F, const CompilationContext &cctx, const Backend &B);
47
48/// Delete unused Constants from \p mod.
49void deleteUnusedConstants(Module &mod);
50
51/// Fold nodes that were expressed lowered in the input model.
52void fold(Function *F, const CompilationContext &cctx,
53 const Backend *B = nullptr);
54
55/// Performs the actual constant quantization in function \p F.
56void convertQuantizedConstants(Function *F, CompilationContext &cctx);
57
58/// Lower the high-level neural network nodes found in \p F into low-level
59/// linear algebra operators. If \p B is not a nullptr then it can prevent
60/// lowering of a node via \ref Backend::shouldLower(); otherwise everything
61/// will be lowered. \p cctx will contain a mapping of loweredMap from output
62/// names of the nodes found and lowered in \p F to the output names of the
63/// nodes they were lowered from along with the NodeKind. \p doNotLowerKinds is
64/// a set of NodeKinds which represents all Nodes that should not be lowered.
65void lower(Function *F, CompilationContext &cctx, const Backend *B = nullptr,
66 const KindSet &doNotLowerKinds = {});
67
68/// Convert placeholders in Module \p M to constants based on the values in \p
69/// bindings. Do not convert any placeholders explicitly listed in \p vars.
70void convertPlaceholdersToConstants(Function *F,
71 const PlaceholderBindings &bindings,
72 llvm::ArrayRef<Placeholder *> vars);
73
74/// Instrument function \p F by inserting quantization profile nodes for
75/// capturing stats for quantization. The nodes will refer to tensors allocate
76/// in context \p bindings. The instrumentation for profiling will be performed
77/// according to the profiling configuration \p profConfig.
78void profileQuantization(
79 PlaceholderBindings &bindings, Function *F,
80 const quantization::ProfilingConfiguration &profConfig);
81
82/// Optimize the Function \p F given compilation options \p cctx for Backend \B.
83/// \returns success if all nodes in the final resulting optimized Function are
84/// supported by \p B; if not, this represents a compiler error.
85Error optimizeFunction(Function *F, const Backend &B, CompilationContext &cctx,
86 const glow::runtime::DeviceInfo *devInfo = nullptr);
87
88/// Optimize the Function \p F given compilation options \p cctx performing
89/// backend-independent optimizations that can be done before lowering.
90/// \returns success if there were no compiler errors; if not, this represents a
91/// compiler error.
92Error optimizeFunctionBeforeLowering(Function *F, CompilationContext &cctx);
93
94/// Helper function that may transform \p F given preferences of \p cctx and
95/// \p B. The specific transformations are done based on the
96/// PrecisionConfiguration found in \p cctx. This could include quantization,
97/// profiling, and FP16 conversion.
98void transformForPrecisionMode(const Backend &B, Function *F,
99 CompilationContext &cctx);
100
101/// Perform a compile-time constant folding of the node \p N.
102/// \returns list of constants which are the result of the constant-folding.
103/// These constants correspond to results of the node. If no constant folding
104/// was possible an empty vector will be returned. If \p foldSingleSplats then
105/// single splat subgraphs will be forced to fold.
106std::vector<Constant *> constantFold(Node *N, bool foldSingleSplats = false);
107
108/// Perform constant folding for all Nodes in \p F given \p cctx. \returns a
109/// record of what Constants are created by what SaveNodes pointing to
110/// Placeholders that replace them. The Functions and output Placeholders
111/// created for running the constant folding subgraph are not deleted from the
112/// module for this API.
113ConstantFoldingRecordMap constantFoldAndRecord(Function *F,
114 const CompilationContext &cctx);
115
116/// Given \p record, remove the constant folding Functions and their associated
117/// output Placeholder from \p mod and \p bindings.
118void cleanupConstantFolding(Module &mod, const ConstantFoldingRecordMap &record,
119 PlaceholderBindings *bindings = nullptr);
120
121/// Execute function \p F by the \p backend using the provided \p bindings and
122/// the compilation context \p cctx. If \p enableQuantizeConstFolding then
123/// QuantizeNodes can be folded as part of a constant chain.
124/// \returns error if function is not a constant function.
125Error executeConstantFunction(Backend &backend, Function &F,
126 PlaceholderBindings &bindings,
127 CompilationContext &cctx,
128 bool enableQuantizeConstFolding = false);
129
130/// Perform vertical split of FC weights in a given function.
131/// Optimization could facilitate parallel execution of FCs on multiple device
132/// cores.
133/// \returns true in case split took place.
134/// \param[in,out] F function to optimize.
135/// \param[in] numOfChunks number of chunks to split weights and bias into.
136/// \param[in] minKToSplit minimum size of the second dimension of weights
137/// when the split is applied.
138bool executeVerticalFCWeightsSplit(Function *F, unsigned numOfChunks,
139 unsigned minKToSplit);
140
141/// Represents what kind of parallelization transformation should be performed
142/// by \ref parallelizeOps().
143/// \p Data indicates splitting along batch axis (dim = 0)
144/// \p Model indicates splitting along dim = 1
145/// \p Model[n] where \p n is in \p [1-5] indicates splitting along dim = \p n
146enum class ParallelTransformKind {
147 None,
148 Data,
149 Model,
150 Model_Axis1,
151 Model_Axis2,
152 Model_Axis3,
153 Model_Axis4,
154 Model_Axis5
155};
156
157/// A specialized ScopeGuard which prevents constant modification from occuring
158/// by swappiing in temporary Placeholders in place of Constants during the
159/// scope of the ConstantModificationPreventer. Automatically replaces the
160/// Constants back once the scope ends.
161class ConstantModificationPreventer : protected ScopeGuard {
162 /// Module which contains Constants we want to prevent modification of.
163 Module &mod_;
164
165 /// CompilationContext under which we're compiling.
166 CompilationContext &cctx_;
167
168 /// Original setting in \ref cctx_ for if constant folding was enabled.
169 bool origEnableConstantFolding_;
170
171 /// Map from temporary Placeholders to the Constants they replaced.
172 std::unordered_map<Placeholder *, Constant *> tmpPHToConstMap_;
173
174public:
175 /// Ctor.
176 ConstantModificationPreventer(Module &mod, CompilationContext &cctx);
177
178 /// Make not copyable.
179 ConstantModificationPreventer(const ConstantModificationPreventer &) = delete;
180
181 /// Make not assignable.
182 ConstantModificationPreventer &
183 operator=(const ConstantModificationPreventer &) = delete;
184
185 /// \returns the mapping of tmp PH to Constants.
186 const std::unordered_map<Placeholder *, Constant *> &getMapping() const {
187 return tmpPHToConstMap_;
188 }
189
190 /// Activate the preventer. By default it is deactivated when constructed.
191 void activate();
192
193 /// Deactivate the preventer and cleanup. This just forwards to
194 /// ScopeGuard::runAndDismiss(), which would have otherwise occurred when
195 /// falling out of scope.
196 void deactivateAndCleanup() { runAndDismiss(); }
197};
198
199/// Perform data or model parallel transformation of supported Nodes in \p F.
200/// \p numOfChunksMap maps Nodes to how many chunks they should be split into;
201/// if not listed this falls back to \p numOfChunks. \p parOpts represents what
202/// kind of parallelism to use. \p modelParallelSplitAlignment optionally can
203/// increase the size of model parallel splits to multiple of the given value.
204/// \returns an expected map of Nodes from \p F to the ConcatNode that they were
205/// replaced with.
206Expected<std::unordered_map<Node *, ConcatNode *>>
207parallelizeOps(Function *F,
208 const llvm::DenseMap<Node *, size_t> &numOfChunksMap,
209 const llvm::DenseMap<Node *, ParallelTransformKind> &parOpts,
210 size_t numOfChunks = 1, size_t modelParallelSplitAlignment = 1);
211
212/// Update quantized Relu output types found in \p F that have negative min to
213/// have min of zero. This normally happens during graph optz, but during AOT
214/// the qparams can not be calculated AOT because all qparams are dummies.
215void updateQuantReluTypes(Function *F);
216
217} // namespace glow
218
219#endif // GLOW_OPTIMIZER_GRAPHOPTIMIZER_GRAPHOPTIMIZER_H
220