1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #ifndef GLOW_IMPORTER_PROTOBUFLOADER_H |
18 | #define GLOW_IMPORTER_PROTOBUFLOADER_H |
19 | |
20 | #include "glow/Base/Tensor.h" |
21 | #include "glow/ExecutionEngine/ExecutionEngine.h" |
22 | #include "glow/Graph/Graph.h" |
23 | #include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h" |
24 | #include "glow/Support/Error.h" |
25 | |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/StringMap.h" |
28 | #include "llvm/ADT/StringRef.h" |
29 | #include "llvm/Support/FormatVariadic.h" |
30 | #include "llvm/Support/raw_ostream.h" |
31 | |
32 | #include <google/protobuf/text_format.h> |
33 | |
34 | #include <memory> |
35 | #include <string> |
36 | #include <vector> |
37 | |
38 | /// This is the maximum allowed protobuf size (2GB). |
39 | #define MAX_PROTO_SIZE 0x7FFFFFFF |
40 | |
41 | namespace glow { |
42 | |
43 | /// Some model formats (Caffe2, PyTorch, TensorFlowLite) allow defining weights |
44 | /// and activations in UINT8 format. Since Glow supports only INT8 weights and |
45 | /// activations we do a transformation from UINT8 to INT8 quantized data by |
46 | /// subtracting the value 128 from both the quantized values and the offset: |
47 | /// val(int8) = val(uint8) - 128 |
48 | /// scale(int8) = scale(uint8) (scale value is preserved) |
49 | /// offset(int8) = scale(uint8) - 128 |
50 | /// The constant definition below defines the value used for subtraction. |
51 | constexpr int32_t UINT8_TO_INT8_SHIFT = 128; |
52 | |
53 | /// Enables or disables constant-folding of Loader Ops with \p flag. |
54 | void setConstantFoldLoaderOpsFlag(bool flag); |
55 | |
56 | /// Returns true if constant-folding for loader Ops is enabled. |
57 | bool getConstantFoldLoaderOpsFlag(); |
58 | |
59 | /// Returns true iff all elements of \p a are the same. |
60 | bool isArrayConstant(const llvm::ArrayRef<size_t> a); |
61 | |
62 | /// Prints a single serialized protocol buffer node. This method is useful for |
63 | /// debugging the network and printing errors. |
64 | template <typename T> |
65 | std::string unexpectedNodeErrorMessage(const T &node, llvm::StringRef message) { |
66 | std::string str; |
67 | google::protobuf::TextFormat::PrintToString(node, &str); |
68 | return llvm::formatv("{0}\n{1}" , message, str); |
69 | } |
70 | |
71 | /// Reads a single integer. |
72 | template <typename T> static Expected<int> loadInt(const T *arg) { |
73 | RETURN_ERR_IF_NOT(arg->has_i(), "Node has no Int value" ); |
74 | return arg->i(); |
75 | } |
76 | |
77 | /// Reads a single float. |
78 | template <typename T> static Expected<float> loadFloat(const T *arg) { |
79 | RETURN_ERR_IF_NOT(arg->has_f(), "Node has no float value" ); |
80 | return arg->f(); |
81 | } |
82 | |
83 | /// Reads a single string. |
84 | template <typename T> static Expected<std::string> loadStr(const T *arg) { |
85 | RETURN_ERR_IF_NOT(arg->has_s(), "Node has no str value" ); |
86 | return arg->s(); |
87 | } |
88 | |
89 | /// Load the 'shape' record from \p arg into a vector of sizes. \returns the |
90 | /// vector if there is no error, or an error otherwise. If \p allowEmptyShape |
91 | /// then no error will be returned if the vector is empty. |
92 | template <typename ElemTy, typename AttrType> |
93 | Expected<std::vector<ElemTy>> getShape(const AttrType *arg, |
94 | bool allowEmptyShape = false) { |
95 | RETURN_ERR_IF_NOT(arg, "Node has no ints attribute with this name" ); |
96 | if (!allowEmptyShape) { |
97 | RETURN_ERR_IF_NOT(arg->ints_size() > 0, "Node has no ints values" ); |
98 | } |
99 | std::vector<ElemTy> dim; |
100 | for (auto i : arg->ints()) { |
101 | dim.push_back(i); |
102 | } |
103 | return dim; |
104 | } |
105 | |
106 | /// Load a floating record vector from \p arg into a vector. \returns the vector |
107 | /// if there is no error, or an error otherwise. |
108 | template <typename AttrType> |
109 | Expected<std::vector<float>> getFloats(const AttrType *arg) { |
110 | RETURN_ERR_IF_NOT(arg, "Node has no floats attribute with this name" ); |
111 | std::vector<float> dim; |
112 | for (auto i : arg->floats()) { |
113 | dim.push_back(i); |
114 | } |
115 | return dim; |
116 | } |
117 | |
118 | /// Load a string record vector from \p arg into a vector. \returns the vector |
119 | /// if there is no error, or an error otherwise. |
120 | template <typename AttrType> |
121 | Expected<std::vector<std::string>> getStrings(const AttrType *arg) { |
122 | RETURN_ERR_IF_NOT(arg, "Node has no strings attribute with this name" ); |
123 | RETURN_ERR_IF_NOT(arg->strings_size() > 0, "Node has no strings values" ); |
124 | std::vector<std::string> strs; |
125 | for (const auto &s : arg->strings()) { |
126 | strs.push_back(s); |
127 | } |
128 | return strs; |
129 | } |
130 | |
131 | /// Returns canonical name for a given operator: either \p name() from proto, |
132 | /// or its type name. |
133 | template <typename T> std::string loadOperatorName(const T &op) { |
134 | if (op.name().length()) { |
135 | return op.name(); |
136 | } |
137 | if (op.output_size() > 0) { |
138 | return op.op_type() + "_" + op.output(0); |
139 | } |
140 | return op.op_type(); |
141 | } |
142 | |
143 | /// \returns the positive value (modulo) of \p axis given the \p rank (number |
144 | /// of dimensions) of the tensor it refers to. The proto format allows negative |
145 | /// axis in which case the axis is used for counting dimensions from the back. |
146 | /// Since Glow cannot use a negative axis we use this converter utility. For |
147 | /// example an axis value of -1 for a tensor with 3 dimensions (rank 3) is |
148 | /// converted to 2. A good definition of the axis value requires to be in the |
149 | /// range [rank, rank-1]. |
150 | template <typename T> Expected<T> getPositiveAxis(int axis, int rank) { |
151 | RETURN_ERR_IF_NOT( |
152 | (-rank <= axis) && (axis < rank), |
153 | strFormat("Axis value %d is invalid! Should be in the range [%d, %d]!" , |
154 | axis, -rank, rank - 1)); |
155 | int axisPos = (axis < 0) ? axis + rank : axis; |
156 | return static_cast<T>(axisPos); |
157 | } |
158 | |
159 | /// \returns the positive value of \p axis given the rank of the value \p val. |
160 | template <typename T> Expected<T> getPositiveAxis(int axis, NodeValue val) { |
161 | return getPositiveAxis<T>(axis, val.dims().size()); |
162 | } |
163 | |
164 | /// Reads a single axis parameter which is wrapped if negative using \p rank |
165 | /// based on the logic of \ref getPositiveAxis. |
166 | template <typename ElemTy, typename T> |
167 | static Expected<ElemTy> loadAxis(const T *arg, int rank) { |
168 | int axis; |
169 | ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(arg)); |
170 | ASSIGN_VALUE_OR_RETURN_ERR(axis, getPositiveAxis<int>(axis, rank)); |
171 | return static_cast<ElemTy>(axis); |
172 | } |
173 | |
174 | /// Reads multiple axes as an array which are wrapped if negative using \p rank |
175 | /// based on the logic of \ref getPositiveAxis. |
176 | template <typename ElemTy, typename T> |
177 | static Expected<std::vector<ElemTy>> loadAxes(const T *arg, int rank) { |
178 | std::vector<int> axes; |
179 | ASSIGN_VALUE_OR_RETURN_ERR(axes, getShape<int>(arg)); |
180 | std::vector<ElemTy> axesPos; |
181 | for (int axis : axes) { |
182 | int axisPos; |
183 | ASSIGN_VALUE_OR_RETURN_ERR(axisPos, getPositiveAxis<int>(axis, rank)); |
184 | axesPos.push_back(static_cast<ElemTy>(axisPos)); |
185 | } |
186 | return axesPos; |
187 | } |
188 | |
189 | /// Loads model: graph and weights. |
190 | class ProtobufLoader { |
191 | protected: |
192 | /// The graph that we are constructing. |
193 | Function *G_; |
194 | /// The module containing the graph(s) that we are constructing. |
195 | Module &mod_; |
196 | /// A map from partition names to Function for it. |
197 | llvm::StringMap<Function *> partNameToFun_; |
198 | /// Saves network nodes by name. |
199 | llvm::StringMap<NodeValue> nodeValueByName_; |
200 | /// Saves intermediate PHs by name, i.e. those that are used to communicate |
201 | /// between partitions. |
202 | llvm::StringMap<Placeholder *> intermediatePHsByName_; |
203 | /// A map from names of the external outputs of the network to Variables. |
204 | llvm::StringMap<Placeholder *> outputVarsByName_; |
205 | /// A map from names of the external inputs of the network to Variables. |
206 | llvm::StringMap<Placeholder *> inputVarsByName_; |
207 | /// A vector of input names ordered by their position of inference interface |
208 | std::vector<std::string> positionalInputNames_; |
209 | /// A vector of output names ordered by their position of inference interface |
210 | std::vector<std::string> positionalOutputNames_; |
211 | /// Whether to try constant folding as we load each op from a protobuf. |
212 | bool constFoldInLoader_{true}; |
213 | /// Whether to load the proto into the existing module. All Functions and |
214 | /// Storage should already exist for the proto; the Functions should be empty |
215 | /// and will be filled with Nodes from the proto connected to Storage. |
216 | bool loadIntoExistingModule_{false}; |
217 | /// Uniqued offset value used when saving the mapping from quant params to |
218 | /// loader names. |
219 | int32_t currUniqueOffset_{0}; |
220 | /// An optional mapping from the names of ops and inputs that are quantized to |
221 | /// the TQP that it came with. |
222 | OriginNameToTQPMap *originNameToTQPMap_{nullptr}; |
223 | /// Whether to load uniqued dummy quantization params instead of the actual |
224 | /// quantization params in the model. \ref originNameToTQPMap_ must be |
225 | /// non-null when this is true. |
226 | bool loadUniquedDummyQParams_{false}; |
227 | /// New TQP to use, indexed by the unique dummy offset it is mapped to. |
228 | std::vector<TensorQuantizationParams> updatedTQPs_; |
229 | /// Whether to try to replace dummy TQPs found during loading with real |
230 | /// updated ones in \ref updatedTQPs_. |
231 | bool replaceDummyTQPs_{false}; |
232 | /// If true, when scales for qparams are loaded, they are clipped to |
233 | /// kMinScaleFP16 if below kMinScaleFP16. |
234 | bool zeroScaleFP16Clip_{false}; |
235 | /// Whether to the range of any loaded qparams to min/max of FP16. |
236 | bool clipQuantRangeToFP16_{false}; |
237 | |
238 | // Delete all Constants that have no users. This is useful because some |
239 | // Constants may have been copied and modified during loading instead of used |
240 | // directly so they may be unused. |
241 | void deleteUnusedConstants(); |
242 | |
243 | /// Create a new constant that's initialized with \p tensor, and register it |
244 | /// under the name \p name. If an existing Placeholder is already registered |
245 | /// under the same name then the tensor is thrown out and no new Constant |
246 | /// is created. The Constant will have Layout \p layout. |
247 | Error createAndRegisterConstant(llvm::StringRef name, Tensor &&tensor, |
248 | const std::string &layout = ANY_LAYOUT); |
249 | |
250 | /// Create a new Placeholder of type \p T, and register it |
251 | /// under the name \p name. If \p isStatic is true register the Placeholder as |
252 | /// a static placeholder. \p isTrainable and \p layout are set in the |
253 | /// Placeholder accoringly. \returns The newly created placeholder. |
254 | Expected<Placeholder *> |
255 | createAndRegisterPlaceholder(llvm::StringRef name, TypeRef T, |
256 | bool isStatic = false, bool isTrainable = false, |
257 | const std::string &layout = ANY_LAYOUT); |
258 | |
259 | /// \returns the NodeValue that was registered with the name \p name or |
260 | /// a nullptr wrapped in a NodeValue if no node has been registered with this |
261 | /// name. Storage NodeValues are always returned if found. Otherwise, if |
262 | /// \ref G_ is the same as the parent of the NodeValue (or if |
263 | /// \p ignoreSrcFun), then the direct mapping is returned from |
264 | /// \ref nodeValueByName_. Otherwise, a SaveNode will be created and output to |
265 | /// a Placeholder, which will be added to \ref nodeValueByName_ and returned. |
266 | NodeValue getNodeValueByNameOrNullNodeValue(llvm::StringRef name, |
267 | bool ignoreSrcFun = false); |
268 | |
269 | Placeholder *getStaticPlaceholderByNameOrNull(llvm::StringRef name) const; |
270 | |
271 | /// \returns the Constant registered with the given \p name and nullptr if |
272 | /// no Constant has been registered with this name. |
273 | Constant *getConstantByNameOrNull(llvm::StringRef name) const; |
274 | |
275 | /// \returns an Expected of the Constant registered with the given \p |
276 | /// name and returns and Error if no Constant has been registered with this |
277 | /// name. |
278 | Expected<Constant *> getConstantByName(llvm::StringRef name) const; |
279 | |
280 | /// \returns whether or not a Constant has been registered with the given \p |
281 | /// name. |
282 | bool hasConstantByName(llvm::StringRef name) const; |
283 | |
284 | /// Sets up a new Loader based on \p tensorNames, \p types, and sets any error |
285 | /// in \p errPtr. |
286 | void setupLoader(llvm::ArrayRef<const char *> tensorNames, |
287 | llvm::ArrayRef<TypeRef> types, Error *errPtr); |
288 | |
289 | /// \returns the TQP located at \p uniqueOffsetIdx in \ref updatedTQPs_. |
290 | Expected<TensorQuantizationParams> getUpdatedTQP(int32_t uniqueOffsetIdx); |
291 | |
292 | /// \returns a quantized type with loader name \p name given \p k, \p dims, |
293 | /// \p scale, and \p offset. If \p shiftUInt8ToInt8 and \p k is Int8QTy, then |
294 | /// the offset is shifted by UINT8_TO_INT8_SHIFT to Int8. |
295 | Expected<TypeRef> loadQuantTy(const std::string &name, ElemKind k, |
296 | llvm::ArrayRef<dim_t> dims, float scale, |
297 | int32_t offset, bool shiftUInt8ToInt8 = true, |
298 | bool skipClipQuantRangeToFP16 = false); |
299 | |
300 | public: |
301 | /// \returns the NodeValue that was registered with the name \p name. If |
302 | /// looking up a NodeValue which is produced from a different Function (and if |
303 | /// not \p ignoreSrcFun) then this returns (and creates if doesn't yet exist) |
304 | /// a new SaveNode and Placeholder to connect the two Nodes from different |
305 | /// Functions. |
306 | /// \pre hasNodeByName(name) |
307 | Expected<NodeValue> getNodeValueByName(llvm::StringRef name, |
308 | bool ignoreSrcFun = false); |
309 | |
310 | /// \returns True if the node that's registered using \p name exists. |
311 | bool hasNodeByName(llvm::StringRef name) const; |
312 | |
313 | /// Constructs new ProtobufLoader object. It will populate the network into |
314 | /// \p F. The list \p types and \p names are used to initialize the inputs |
315 | /// of the model with specific names and types. If \p errPtr is not null then |
316 | /// if an error occurs it will get assigned there otherwise if an error |
317 | /// occurs it will abort. If \p loadIntoExistingModule then all Functions and |
318 | /// Storage is expected to already exist, so they will be searched for |
319 | /// according to the proto being loaded instead of created as usual. |
320 | ProtobufLoader(llvm::ArrayRef<const char *> tensorNames, |
321 | llvm::ArrayRef<TypeRef> types, Function *F, |
322 | Error *errPtr = nullptr, bool loadIntoExistingModule = false, |
323 | OriginNameToTQPMap *originNameToTQPMap = nullptr, |
324 | bool loadUniquedDummyQParams = false, |
325 | bool replaceDummyTQPs = false, bool zeroScaleFP16Clip = false, |
326 | bool clipQuantRangeToFP16 = false); |
327 | |
328 | /// Constructs new ProtobufLoader object. It will populate the network into |
329 | /// \p mod. The list \p types and \p names are used to initialize the inputs |
330 | /// of the model with specific names and types. If \p errPtr is not null then |
331 | /// if an error occurs it will get assigned there otherwise if an error |
332 | /// occurs it will abort. If \p loadIntoExistingModule then all Functions and |
333 | /// Storage is expected to already exist, so they will be searched for |
334 | /// according to the proto being loaded instead of created as usual. |
335 | ProtobufLoader(llvm::ArrayRef<const char *> tensorNames, |
336 | llvm::ArrayRef<TypeRef> types, Module &mod, |
337 | Error *errPtr = nullptr, bool loadIntoExistingModule = false, |
338 | OriginNameToTQPMap *originNameToTQPMap = nullptr, |
339 | bool loadUniquedDummyQParams = false, |
340 | bool replaceDummyTQPs = false, bool zeroScaleFP16Clip = false, |
341 | bool clipQuantRangeToFP16 = false); |
342 | |
343 | ProtobufLoader(const ProtobufLoader &other) = delete; |
344 | ProtobufLoader &operator=(const ProtobufLoader &) = delete; |
345 | virtual ~ProtobufLoader() = default; |
346 | |
347 | /// \returns mapping between external names and actual Glow output nodes. |
348 | const llvm::StringMap<Placeholder *> &getOutputVarsMapping() const { |
349 | return outputVarsByName_; |
350 | } |
351 | |
352 | /// \returns mapping between external names and actual Glow input nodes. |
353 | const llvm::StringMap<Placeholder *> &getInputVarsMapping() const { |
354 | return inputVarsByName_; |
355 | } |
356 | |
357 | /// \returns vector of primary input names based on their position |
358 | const std::vector<std::string> &getPositionalInputNames() const { |
359 | return positionalInputNames_; |
360 | } |
361 | |
362 | /// \returns vector of primary output names based on their position |
363 | const std::vector<std::string> &getPositionalOutputNames() const { |
364 | return positionalOutputNames_; |
365 | } |
366 | |
367 | /// \returns the single final output of the network. The function assumes |
368 | /// that there is only one output, returns Error otherwise. For image |
369 | /// classification, this single final output is usually the result of the |
370 | /// last softmax or regression layer. |
371 | Expected<Placeholder *> getSingleOutput() const; |
372 | |
373 | /// \returns the single input of the network. The function assumes that there |
374 | /// is only one input, returns Error otherwise. For most of the models the |
375 | /// single input is usually an image tensor. |
376 | Expected<Placeholder *> getSingleInput() const; |
377 | |
378 | /// \returns the Placeholder for the external output with \p name. |
379 | /// \pre outputVarsByName_.find(name) != outputVarsByName_.end() |
380 | Expected<Placeholder *> getOutputByName(llvm::StringRef name) const; |
381 | |
382 | /// \returns the Placeholder for the external input with \p name. |
383 | /// \pre inputVarsByName_.find(name) != inputVarsByName_.end() |
384 | Expected<Placeholder *> getInputByName(llvm::StringRef name) const; |
385 | |
386 | /// \returns True if the operator with name \p typeName having input node |
387 | /// list as \p inputs is constant foldable. |
388 | bool isConstantFoldable(llvm::ArrayRef<NodeValue> inputs, |
389 | std::string typeName) const; |
390 | }; |
391 | |
392 | /// \returns success if the folding of operator \p op in the loader |
393 | /// \p loader is successful. The folding utility uses temporary |
394 | /// loader \p tmpLoader, and associated temporary function \p F. |
395 | template <class LoaderType, class OpType> |
396 | Error constantFoldInLoader(Function *F, LoaderType &tmpLoader, |
397 | LoaderType *loader, const OpType &op) { |
398 | PlaceholderBindings bindings; |
399 | std::vector<Tensor *> outTensors; |
400 | |
401 | // Register the constant inputs to the current op with the constant folding |
402 | // loader. |
403 | for (unsigned i = 0; i < (dim_t)op.input_size(); i++) { |
404 | Constant *tmpConst = loader->getConstantByNameOrNull(op.input(i)); |
405 | RETURN_ERR_IF_NOT(tmpConst, "No constant found" ); |
406 | tmpLoader.nodeValueByName_[op.input(i)] = tmpConst->getOutput(); |
407 | } |
408 | |
409 | // Using the loader to load the current operator. |
410 | RETURN_IF_ERR(tmpLoader.loadOperator(op)); |
411 | |
412 | // To collect the folded outputs allocate and add save nodes to the folding |
413 | // function. |
414 | llvm::SmallVector<Placeholder *, 2> tmpPHs; |
415 | for (int i = 0; i < op.output_size(); i++) { |
416 | const auto &outputName = op.output(i); |
417 | NodeValue r; |
418 | ASSIGN_VALUE_OR_RETURN_ERR(r, tmpLoader.getNodeValueByName(outputName)); |
419 | Placeholder *PH = F->getParent()->createPlaceholder( |
420 | r.getType(), "__CONSTFOLD__TMP__" + outputName, false); |
421 | SaveNode *SN = F->createSave("save_" + PH->getName().str(), r, PH); |
422 | auto *result = bindings.allocate(SN->getPlaceholder()); |
423 | outTensors.push_back(result); |
424 | tmpPHs.push_back(PH); |
425 | } |
426 | |
427 | // Cleanup to remove the temporary Placeholders we created. |
428 | ScopeGuard cleanup([&]() { |
429 | auto &mod = *F->getParent(); |
430 | auto &modPHs = mod.getPlaceholders(); |
431 | for (Placeholder *tmpPH : tmpPHs) { |
432 | mod.erasePlaceholder(std::find(modPHs.begin(), modPHs.end(), tmpPH)); |
433 | } |
434 | }); |
435 | |
436 | // Evaluate the constant outputs using interpreter backend. |
437 | std::unique_ptr<Backend> backend(createBackend("Interpreter" )); |
438 | CompilationContext cctx; |
439 | cctx.compMode = CompilationMode::Infer; |
440 | cctx.optimizationOpts.enableConstantFolding = false; |
441 | cctx.optimizationOpts.enableConstantDeduplication = false; |
442 | cctx.backendOpts.collectConstants = true; |
443 | // Do not print out compilation errors encountered, as constant folding is a |
444 | // best effort; simply silently give up and continue with compilation. |
445 | cctx.verboseCompile = false; |
446 | RETURN_IF_ERR(executeConstantFunction(*backend, *F, bindings, cctx)); |
447 | |
448 | // Using the graph output, place constant nodes in the original graph. |
449 | for (int i = 0; i < op.output_size(); i++) { |
450 | RETURN_IF_ERR(loader->createAndRegisterConstant(op.output(i), |
451 | std::move(*outTensors[i]))); |
452 | } |
453 | |
454 | return Error::success(); |
455 | } |
456 | |
457 | } // namespace glow |
458 | |
459 | #endif // GLOW_IMPORTER_PROTOBUFLOADER_H |
460 | |