1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef GLOW_IMPORTER_PROTOBUFLOADER_H
18#define GLOW_IMPORTER_PROTOBUFLOADER_H
19
20#include "glow/Base/Tensor.h"
21#include "glow/ExecutionEngine/ExecutionEngine.h"
22#include "glow/Graph/Graph.h"
23#include "glow/Optimizer/GraphOptimizer/GraphOptimizer.h"
24#include "glow/Support/Error.h"
25
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/StringMap.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/Support/FormatVariadic.h"
30#include "llvm/Support/raw_ostream.h"
31
32#include <google/protobuf/text_format.h>
33
34#include <memory>
35#include <string>
36#include <vector>
37
38/// This is the maximum allowed protobuf size (2GB).
39#define MAX_PROTO_SIZE 0x7FFFFFFF
40
41namespace glow {
42
43/// Some model formats (Caffe2, PyTorch, TensorFlowLite) allow defining weights
44/// and activations in UINT8 format. Since Glow supports only INT8 weights and
45/// activations we do a transformation from UINT8 to INT8 quantized data by
46/// subtracting the value 128 from both the quantized values and the offset:
47/// val(int8) = val(uint8) - 128
48/// scale(int8) = scale(uint8) (scale value is preserved)
49/// offset(int8) = scale(uint8) - 128
50/// The constant definition below defines the value used for subtraction.
51constexpr int32_t UINT8_TO_INT8_SHIFT = 128;
52
53/// Enables or disables constant-folding of Loader Ops with \p flag.
54void setConstantFoldLoaderOpsFlag(bool flag);
55
56/// Returns true if constant-folding for loader Ops is enabled.
57bool getConstantFoldLoaderOpsFlag();
58
59/// Returns true iff all elements of \p a are the same.
60bool isArrayConstant(const llvm::ArrayRef<size_t> a);
61
62/// Prints a single serialized protocol buffer node. This method is useful for
63/// debugging the network and printing errors.
64template <typename T>
65std::string unexpectedNodeErrorMessage(const T &node, llvm::StringRef message) {
66 std::string str;
67 google::protobuf::TextFormat::PrintToString(node, &str);
68 return llvm::formatv("{0}\n{1}", message, str);
69}
70
71/// Reads a single integer.
72template <typename T> static Expected<int> loadInt(const T *arg) {
73 RETURN_ERR_IF_NOT(arg->has_i(), "Node has no Int value");
74 return arg->i();
75}
76
77/// Reads a single float.
78template <typename T> static Expected<float> loadFloat(const T *arg) {
79 RETURN_ERR_IF_NOT(arg->has_f(), "Node has no float value");
80 return arg->f();
81}
82
83/// Reads a single string.
84template <typename T> static Expected<std::string> loadStr(const T *arg) {
85 RETURN_ERR_IF_NOT(arg->has_s(), "Node has no str value");
86 return arg->s();
87}
88
89/// Load the 'shape' record from \p arg into a vector of sizes. \returns the
90/// vector if there is no error, or an error otherwise. If \p allowEmptyShape
91/// then no error will be returned if the vector is empty.
92template <typename ElemTy, typename AttrType>
93Expected<std::vector<ElemTy>> getShape(const AttrType *arg,
94 bool allowEmptyShape = false) {
95 RETURN_ERR_IF_NOT(arg, "Node has no ints attribute with this name");
96 if (!allowEmptyShape) {
97 RETURN_ERR_IF_NOT(arg->ints_size() > 0, "Node has no ints values");
98 }
99 std::vector<ElemTy> dim;
100 for (auto i : arg->ints()) {
101 dim.push_back(i);
102 }
103 return dim;
104}
105
106/// Load a floating record vector from \p arg into a vector. \returns the vector
107/// if there is no error, or an error otherwise.
108template <typename AttrType>
109Expected<std::vector<float>> getFloats(const AttrType *arg) {
110 RETURN_ERR_IF_NOT(arg, "Node has no floats attribute with this name");
111 std::vector<float> dim;
112 for (auto i : arg->floats()) {
113 dim.push_back(i);
114 }
115 return dim;
116}
117
118/// Load a string record vector from \p arg into a vector. \returns the vector
119/// if there is no error, or an error otherwise.
120template <typename AttrType>
121Expected<std::vector<std::string>> getStrings(const AttrType *arg) {
122 RETURN_ERR_IF_NOT(arg, "Node has no strings attribute with this name");
123 RETURN_ERR_IF_NOT(arg->strings_size() > 0, "Node has no strings values");
124 std::vector<std::string> strs;
125 for (const auto &s : arg->strings()) {
126 strs.push_back(s);
127 }
128 return strs;
129}
130
131/// Returns canonical name for a given operator: either \p name() from proto,
132/// or its type name.
133template <typename T> std::string loadOperatorName(const T &op) {
134 if (op.name().length()) {
135 return op.name();
136 }
137 if (op.output_size() > 0) {
138 return op.op_type() + "_" + op.output(0);
139 }
140 return op.op_type();
141}
142
143/// \returns the positive value (modulo) of \p axis given the \p rank (number
144/// of dimensions) of the tensor it refers to. The proto format allows negative
145/// axis in which case the axis is used for counting dimensions from the back.
146/// Since Glow cannot use a negative axis we use this converter utility. For
147/// example an axis value of -1 for a tensor with 3 dimensions (rank 3) is
148/// converted to 2. A good definition of the axis value requires to be in the
149/// range [rank, rank-1].
150template <typename T> Expected<T> getPositiveAxis(int axis, int rank) {
151 RETURN_ERR_IF_NOT(
152 (-rank <= axis) && (axis < rank),
153 strFormat("Axis value %d is invalid! Should be in the range [%d, %d]!",
154 axis, -rank, rank - 1));
155 int axisPos = (axis < 0) ? axis + rank : axis;
156 return static_cast<T>(axisPos);
157}
158
159/// \returns the positive value of \p axis given the rank of the value \p val.
160template <typename T> Expected<T> getPositiveAxis(int axis, NodeValue val) {
161 return getPositiveAxis<T>(axis, val.dims().size());
162}
163
164/// Reads a single axis parameter which is wrapped if negative using \p rank
165/// based on the logic of \ref getPositiveAxis.
166template <typename ElemTy, typename T>
167static Expected<ElemTy> loadAxis(const T *arg, int rank) {
168 int axis;
169 ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(arg));
170 ASSIGN_VALUE_OR_RETURN_ERR(axis, getPositiveAxis<int>(axis, rank));
171 return static_cast<ElemTy>(axis);
172}
173
174/// Reads multiple axes as an array which are wrapped if negative using \p rank
175/// based on the logic of \ref getPositiveAxis.
176template <typename ElemTy, typename T>
177static Expected<std::vector<ElemTy>> loadAxes(const T *arg, int rank) {
178 std::vector<int> axes;
179 ASSIGN_VALUE_OR_RETURN_ERR(axes, getShape<int>(arg));
180 std::vector<ElemTy> axesPos;
181 for (int axis : axes) {
182 int axisPos;
183 ASSIGN_VALUE_OR_RETURN_ERR(axisPos, getPositiveAxis<int>(axis, rank));
184 axesPos.push_back(static_cast<ElemTy>(axisPos));
185 }
186 return axesPos;
187}
188
189/// Loads model: graph and weights.
190class ProtobufLoader {
191protected:
192 /// The graph that we are constructing.
193 Function *G_;
194 /// The module containing the graph(s) that we are constructing.
195 Module &mod_;
196 /// A map from partition names to Function for it.
197 llvm::StringMap<Function *> partNameToFun_;
198 /// Saves network nodes by name.
199 llvm::StringMap<NodeValue> nodeValueByName_;
200 /// Saves intermediate PHs by name, i.e. those that are used to communicate
201 /// between partitions.
202 llvm::StringMap<Placeholder *> intermediatePHsByName_;
203 /// A map from names of the external outputs of the network to Variables.
204 llvm::StringMap<Placeholder *> outputVarsByName_;
205 /// A map from names of the external inputs of the network to Variables.
206 llvm::StringMap<Placeholder *> inputVarsByName_;
207 /// A vector of input names ordered by their position of inference interface
208 std::vector<std::string> positionalInputNames_;
209 /// A vector of output names ordered by their position of inference interface
210 std::vector<std::string> positionalOutputNames_;
211 /// Whether to try constant folding as we load each op from a protobuf.
212 bool constFoldInLoader_{true};
213 /// Whether to load the proto into the existing module. All Functions and
214 /// Storage should already exist for the proto; the Functions should be empty
215 /// and will be filled with Nodes from the proto connected to Storage.
216 bool loadIntoExistingModule_{false};
217 /// Uniqued offset value used when saving the mapping from quant params to
218 /// loader names.
219 int32_t currUniqueOffset_{0};
220 /// An optional mapping from the names of ops and inputs that are quantized to
221 /// the TQP that it came with.
222 OriginNameToTQPMap *originNameToTQPMap_{nullptr};
223 /// Whether to load uniqued dummy quantization params instead of the actual
224 /// quantization params in the model. \ref originNameToTQPMap_ must be
225 /// non-null when this is true.
226 bool loadUniquedDummyQParams_{false};
227 /// New TQP to use, indexed by the unique dummy offset it is mapped to.
228 std::vector<TensorQuantizationParams> updatedTQPs_;
229 /// Whether to try to replace dummy TQPs found during loading with real
230 /// updated ones in \ref updatedTQPs_.
231 bool replaceDummyTQPs_{false};
232 /// If true, when scales for qparams are loaded, they are clipped to
233 /// kMinScaleFP16 if below kMinScaleFP16.
234 bool zeroScaleFP16Clip_{false};
235 /// Whether to the range of any loaded qparams to min/max of FP16.
236 bool clipQuantRangeToFP16_{false};
237
238 // Delete all Constants that have no users. This is useful because some
239 // Constants may have been copied and modified during loading instead of used
240 // directly so they may be unused.
241 void deleteUnusedConstants();
242
243 /// Create a new constant that's initialized with \p tensor, and register it
244 /// under the name \p name. If an existing Placeholder is already registered
245 /// under the same name then the tensor is thrown out and no new Constant
246 /// is created. The Constant will have Layout \p layout.
247 Error createAndRegisterConstant(llvm::StringRef name, Tensor &&tensor,
248 const std::string &layout = ANY_LAYOUT);
249
250 /// Create a new Placeholder of type \p T, and register it
251 /// under the name \p name. If \p isStatic is true register the Placeholder as
252 /// a static placeholder. \p isTrainable and \p layout are set in the
253 /// Placeholder accoringly. \returns The newly created placeholder.
254 Expected<Placeholder *>
255 createAndRegisterPlaceholder(llvm::StringRef name, TypeRef T,
256 bool isStatic = false, bool isTrainable = false,
257 const std::string &layout = ANY_LAYOUT);
258
259 /// \returns the NodeValue that was registered with the name \p name or
260 /// a nullptr wrapped in a NodeValue if no node has been registered with this
261 /// name. Storage NodeValues are always returned if found. Otherwise, if
262 /// \ref G_ is the same as the parent of the NodeValue (or if
263 /// \p ignoreSrcFun), then the direct mapping is returned from
264 /// \ref nodeValueByName_. Otherwise, a SaveNode will be created and output to
265 /// a Placeholder, which will be added to \ref nodeValueByName_ and returned.
266 NodeValue getNodeValueByNameOrNullNodeValue(llvm::StringRef name,
267 bool ignoreSrcFun = false);
268
269 Placeholder *getStaticPlaceholderByNameOrNull(llvm::StringRef name) const;
270
271 /// \returns the Constant registered with the given \p name and nullptr if
272 /// no Constant has been registered with this name.
273 Constant *getConstantByNameOrNull(llvm::StringRef name) const;
274
275 /// \returns an Expected of the Constant registered with the given \p
276 /// name and returns and Error if no Constant has been registered with this
277 /// name.
278 Expected<Constant *> getConstantByName(llvm::StringRef name) const;
279
280 /// \returns whether or not a Constant has been registered with the given \p
281 /// name.
282 bool hasConstantByName(llvm::StringRef name) const;
283
284 /// Sets up a new Loader based on \p tensorNames, \p types, and sets any error
285 /// in \p errPtr.
286 void setupLoader(llvm::ArrayRef<const char *> tensorNames,
287 llvm::ArrayRef<TypeRef> types, Error *errPtr);
288
289 /// \returns the TQP located at \p uniqueOffsetIdx in \ref updatedTQPs_.
290 Expected<TensorQuantizationParams> getUpdatedTQP(int32_t uniqueOffsetIdx);
291
292 /// \returns a quantized type with loader name \p name given \p k, \p dims,
293 /// \p scale, and \p offset. If \p shiftUInt8ToInt8 and \p k is Int8QTy, then
294 /// the offset is shifted by UINT8_TO_INT8_SHIFT to Int8.
295 Expected<TypeRef> loadQuantTy(const std::string &name, ElemKind k,
296 llvm::ArrayRef<dim_t> dims, float scale,
297 int32_t offset, bool shiftUInt8ToInt8 = true,
298 bool skipClipQuantRangeToFP16 = false);
299
300public:
301 /// \returns the NodeValue that was registered with the name \p name. If
302 /// looking up a NodeValue which is produced from a different Function (and if
303 /// not \p ignoreSrcFun) then this returns (and creates if doesn't yet exist)
304 /// a new SaveNode and Placeholder to connect the two Nodes from different
305 /// Functions.
306 /// \pre hasNodeByName(name)
307 Expected<NodeValue> getNodeValueByName(llvm::StringRef name,
308 bool ignoreSrcFun = false);
309
310 /// \returns True if the node that's registered using \p name exists.
311 bool hasNodeByName(llvm::StringRef name) const;
312
313 /// Constructs new ProtobufLoader object. It will populate the network into
314 /// \p F. The list \p types and \p names are used to initialize the inputs
315 /// of the model with specific names and types. If \p errPtr is not null then
316 /// if an error occurs it will get assigned there otherwise if an error
317 /// occurs it will abort. If \p loadIntoExistingModule then all Functions and
318 /// Storage is expected to already exist, so they will be searched for
319 /// according to the proto being loaded instead of created as usual.
320 ProtobufLoader(llvm::ArrayRef<const char *> tensorNames,
321 llvm::ArrayRef<TypeRef> types, Function *F,
322 Error *errPtr = nullptr, bool loadIntoExistingModule = false,
323 OriginNameToTQPMap *originNameToTQPMap = nullptr,
324 bool loadUniquedDummyQParams = false,
325 bool replaceDummyTQPs = false, bool zeroScaleFP16Clip = false,
326 bool clipQuantRangeToFP16 = false);
327
328 /// Constructs new ProtobufLoader object. It will populate the network into
329 /// \p mod. The list \p types and \p names are used to initialize the inputs
330 /// of the model with specific names and types. If \p errPtr is not null then
331 /// if an error occurs it will get assigned there otherwise if an error
332 /// occurs it will abort. If \p loadIntoExistingModule then all Functions and
333 /// Storage is expected to already exist, so they will be searched for
334 /// according to the proto being loaded instead of created as usual.
335 ProtobufLoader(llvm::ArrayRef<const char *> tensorNames,
336 llvm::ArrayRef<TypeRef> types, Module &mod,
337 Error *errPtr = nullptr, bool loadIntoExistingModule = false,
338 OriginNameToTQPMap *originNameToTQPMap = nullptr,
339 bool loadUniquedDummyQParams = false,
340 bool replaceDummyTQPs = false, bool zeroScaleFP16Clip = false,
341 bool clipQuantRangeToFP16 = false);
342
343 ProtobufLoader(const ProtobufLoader &other) = delete;
344 ProtobufLoader &operator=(const ProtobufLoader &) = delete;
345 virtual ~ProtobufLoader() = default;
346
347 /// \returns mapping between external names and actual Glow output nodes.
348 const llvm::StringMap<Placeholder *> &getOutputVarsMapping() const {
349 return outputVarsByName_;
350 }
351
352 /// \returns mapping between external names and actual Glow input nodes.
353 const llvm::StringMap<Placeholder *> &getInputVarsMapping() const {
354 return inputVarsByName_;
355 }
356
357 /// \returns vector of primary input names based on their position
358 const std::vector<std::string> &getPositionalInputNames() const {
359 return positionalInputNames_;
360 }
361
362 /// \returns vector of primary output names based on their position
363 const std::vector<std::string> &getPositionalOutputNames() const {
364 return positionalOutputNames_;
365 }
366
367 /// \returns the single final output of the network. The function assumes
368 /// that there is only one output, returns Error otherwise. For image
369 /// classification, this single final output is usually the result of the
370 /// last softmax or regression layer.
371 Expected<Placeholder *> getSingleOutput() const;
372
373 /// \returns the single input of the network. The function assumes that there
374 /// is only one input, returns Error otherwise. For most of the models the
375 /// single input is usually an image tensor.
376 Expected<Placeholder *> getSingleInput() const;
377
378 /// \returns the Placeholder for the external output with \p name.
379 /// \pre outputVarsByName_.find(name) != outputVarsByName_.end()
380 Expected<Placeholder *> getOutputByName(llvm::StringRef name) const;
381
382 /// \returns the Placeholder for the external input with \p name.
383 /// \pre inputVarsByName_.find(name) != inputVarsByName_.end()
384 Expected<Placeholder *> getInputByName(llvm::StringRef name) const;
385
386 /// \returns True if the operator with name \p typeName having input node
387 /// list as \p inputs is constant foldable.
388 bool isConstantFoldable(llvm::ArrayRef<NodeValue> inputs,
389 std::string typeName) const;
390};
391
392/// \returns success if the folding of operator \p op in the loader
393/// \p loader is successful. The folding utility uses temporary
394/// loader \p tmpLoader, and associated temporary function \p F.
395template <class LoaderType, class OpType>
396Error constantFoldInLoader(Function *F, LoaderType &tmpLoader,
397 LoaderType *loader, const OpType &op) {
398 PlaceholderBindings bindings;
399 std::vector<Tensor *> outTensors;
400
401 // Register the constant inputs to the current op with the constant folding
402 // loader.
403 for (unsigned i = 0; i < (dim_t)op.input_size(); i++) {
404 Constant *tmpConst = loader->getConstantByNameOrNull(op.input(i));
405 RETURN_ERR_IF_NOT(tmpConst, "No constant found");
406 tmpLoader.nodeValueByName_[op.input(i)] = tmpConst->getOutput();
407 }
408
409 // Using the loader to load the current operator.
410 RETURN_IF_ERR(tmpLoader.loadOperator(op));
411
412 // To collect the folded outputs allocate and add save nodes to the folding
413 // function.
414 llvm::SmallVector<Placeholder *, 2> tmpPHs;
415 for (int i = 0; i < op.output_size(); i++) {
416 const auto &outputName = op.output(i);
417 NodeValue r;
418 ASSIGN_VALUE_OR_RETURN_ERR(r, tmpLoader.getNodeValueByName(outputName));
419 Placeholder *PH = F->getParent()->createPlaceholder(
420 r.getType(), "__CONSTFOLD__TMP__" + outputName, false);
421 SaveNode *SN = F->createSave("save_" + PH->getName().str(), r, PH);
422 auto *result = bindings.allocate(SN->getPlaceholder());
423 outTensors.push_back(result);
424 tmpPHs.push_back(PH);
425 }
426
427 // Cleanup to remove the temporary Placeholders we created.
428 ScopeGuard cleanup([&]() {
429 auto &mod = *F->getParent();
430 auto &modPHs = mod.getPlaceholders();
431 for (Placeholder *tmpPH : tmpPHs) {
432 mod.erasePlaceholder(std::find(modPHs.begin(), modPHs.end(), tmpPH));
433 }
434 });
435
436 // Evaluate the constant outputs using interpreter backend.
437 std::unique_ptr<Backend> backend(createBackend("Interpreter"));
438 CompilationContext cctx;
439 cctx.compMode = CompilationMode::Infer;
440 cctx.optimizationOpts.enableConstantFolding = false;
441 cctx.optimizationOpts.enableConstantDeduplication = false;
442 cctx.backendOpts.collectConstants = true;
443 // Do not print out compilation errors encountered, as constant folding is a
444 // best effort; simply silently give up and continue with compilation.
445 cctx.verboseCompile = false;
446 RETURN_IF_ERR(executeConstantFunction(*backend, *F, bindings, cctx));
447
448 // Using the graph output, place constant nodes in the original graph.
449 for (int i = 0; i < op.output_size(); i++) {
450 RETURN_IF_ERR(loader->createAndRegisterConstant(op.output(i),
451 std::move(*outTensors[i])));
452 }
453
454 return Error::success();
455}
456
457} // namespace glow
458
459#endif // GLOW_IMPORTER_PROTOBUFLOADER_H
460