1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "glow/Importer/ProtobufLoader.h"
18#include "llvm/Support/CommandLine.h"
19#include <string>
20
21namespace glow {
22
23llvm::cl::OptionCategory loaderOptCat("Model Loader Options");
24
25static llvm::cl::opt<bool> isConstFoldLoaderOps(
26 "const-fold-ops",
27 llvm::cl::desc(
28 "Performs constant folding on ONNX and Caffe Operators while loading."),
29 llvm::cl::init(true), llvm::cl::cat(loaderOptCat));
30
31bool isArrayConstant(llvm::ArrayRef<size_t> a) {
32 for (size_t i = 1; i < a.size(); i++)
33 if (a[0] != a[i])
34 return false;
35 return true;
36}
37
38void setConstantFoldLoaderOpsFlag(bool flag) { isConstFoldLoaderOps = flag; }
39
40bool getConstantFoldLoaderOpsFlag() { return isConstFoldLoaderOps; }
41
42bool ProtobufLoader::isConstantFoldable(llvm::ArrayRef<NodeValue> inputs,
43 std::string typeName) const {
44 int numInputs = inputs.size();
45 if (!getConstantFoldLoaderOpsFlag()) {
46 return false;
47 }
48 // foldUnsupportedTypes: List of typenames unsupported for folding.
49 std::string foldUnsupportedTypes[] = {"Constant", "Loop", "If"};
50 std::string *findType = std::find(std::begin(foldUnsupportedTypes),
51 std::end(foldUnsupportedTypes), typeName);
52 // Early exit if folding is not supported for current operator.
53 if (findType != std::end(foldUnsupportedTypes)) {
54 return false;
55 }
56
57 // If all the inputs to the operator are constant this op can be folded.
58 for (int i = 0; i < numInputs; i++) {
59 if (inputs[i].getNode()->getKind() != Kinded::Kind::ConstantKind) {
60 return false;
61 }
62 }
63 return true;
64}
65
66Placeholder *
67ProtobufLoader::getStaticPlaceholderByNameOrNull(llvm::StringRef name) const {
68 auto it = nodeValueByName_.find(name);
69 if (it == nodeValueByName_.end()) {
70 return nullptr;
71 }
72 auto *res = llvm::dyn_cast<Placeholder>(it->second.getNode());
73 return (res && res->isStatic()) ? res : nullptr;
74}
75
76Constant *ProtobufLoader::getConstantByNameOrNull(llvm::StringRef name) const {
77 auto it = nodeValueByName_.find(name);
78 if (it == nodeValueByName_.end()) {
79 return nullptr;
80 }
81 auto *res = llvm::dyn_cast<Constant>(it->second.getNode());
82 return res ? res : nullptr;
83}
84
85Expected<Constant *>
86ProtobufLoader::getConstantByName(llvm::StringRef name) const {
87 auto *ptr = getConstantByNameOrNull(name);
88 RETURN_ERR_IF_NOT(
89 ptr, strFormat("could not find constant with name %s", name.data()));
90 return ptr;
91}
92
93bool ProtobufLoader::hasConstantByName(llvm::StringRef name) const {
94 return getConstantByNameOrNull(name) != nullptr;
95}
96
97Expected<Placeholder *> ProtobufLoader::getSingleOutput() const {
98 RETURN_ERR_IF_NOT(outputVarsByName_.size() == 1,
99 "There must be only one output.");
100 return outputVarsByName_.begin()->second;
101}
102
103Expected<Placeholder *> ProtobufLoader::getSingleInput() const {
104 RETURN_ERR_IF_NOT(inputVarsByName_.size() == 1,
105 "There must be only one input.");
106 return inputVarsByName_.begin()->second;
107}
108
109Expected<Placeholder *>
110ProtobufLoader::getOutputByName(llvm::StringRef name) const {
111 auto it = outputVarsByName_.find(name);
112 RETURN_ERR_IF_NOT(
113 it != outputVarsByName_.end(),
114 llvm::Twine("No external output Variable was registered with name ", name)
115 .str());
116 return it->second;
117}
118
119Expected<Placeholder *>
120ProtobufLoader::getInputByName(llvm::StringRef name) const {
121 auto it = inputVarsByName_.find(name);
122 RETURN_ERR_IF_NOT(
123 it != inputVarsByName_.end(),
124 llvm::Twine("No external input Variable was registered with name ", name)
125 .str());
126 return it->second;
127}
128
129NodeValue
130ProtobufLoader::getNodeValueByNameOrNullNodeValue(llvm::StringRef name,
131 bool ignoreSrcFun) {
132 auto it = nodeValueByName_.find(name);
133 if (it == nodeValueByName_.end()) {
134 return NodeValue(nullptr);
135 }
136
137 // Always return the NV of a storage Node since Storage lives in the Module
138 // and is accessible to any Node.
139 NodeValue NV = it->second;
140 if (llvm::isa<Storage>(NV)) {
141 return NV;
142 }
143
144 // Check if the current Function G_ we are loading into is the same as the
145 // Function of the NV we found; if so then return it.
146 Function *srcF = NV.getNode()->getParent();
147 if (srcF == G_ || ignoreSrcFun) {
148 return NV;
149 }
150
151 // Otherwise we must be looking up a NV from a different Function in the
152 // Module, so look for an intermediate Placeholder linking the two if it
153 // exists, or otherwise create one and remember it.
154 assert(partNameToFun_.size() > 0 &&
155 "Must be loading a pre-partitioned model.");
156 auto itPH = intermediatePHsByName_.find(name);
157 Placeholder *intermedPH = nullptr;
158 // Create the intermediate PH and SaveNode if it does not yet exist. Note that
159 // we store these intermediate PHs separately from nodeValueByName_ because we
160 // want future users from the same Function as the NV to still use the Node
161 // directly through nodeValueByName_.
162 if (itPH == intermediatePHsByName_.end()) {
163 auto *save = srcF->createSave("tmp_" + NV.getNode()->getName().str(), NV);
164 intermedPH = save->getPlaceholder();
165 intermediatePHsByName_[name] = intermedPH;
166 } else {
167 intermedPH = itPH->second;
168 }
169 return intermedPH->getOutput();
170}
171
172Expected<NodeValue> ProtobufLoader::getNodeValueByName(llvm::StringRef name,
173 bool ignoreSrcFun) {
174 RETURN_ERR_IF_NOT(hasNodeByName(name),
175 llvm::Twine("No node under name ", name).str());
176 auto node = getNodeValueByNameOrNullNodeValue(name, ignoreSrcFun);
177 RETURN_ERR_IF_NOT(node.getNode(), "Null is under that name??");
178 return node;
179}
180
181Error ProtobufLoader::createAndRegisterConstant(llvm::StringRef name,
182 Tensor &&tensor,
183 const std::string &layout) {
184 auto it = nodeValueByName_.find(name);
185 if (it != nodeValueByName_.end()) {
186 if (llvm::dyn_cast<Placeholder>(it->second.getNode())) {
187 // Placeholders take precedence over Constants.
188 return Error::success();
189 }
190 }
191 // Note: We do not support training from models loaded from protos, so
192 // trainable is always set to false here.
193 Constant *node = mod_.createConstant(name, std::move(tensor), layout);
194 nodeValueByName_[name] = node->getOutput();
195 return Error::success();
196}
197
198void ProtobufLoader::deleteUnusedConstants() {
199 std::vector<std::string> nodeValuesToRemove;
200 // Note that it's possible a constant is referred by more than one names
201 // (e.g., via Identity operator). Therefore, we maintain a set of constants to
202 // erase separately from the list for names.
203 std::unordered_set<Constant *> constantToRemove;
204
205 for (auto &kv : nodeValueByName_) {
206 auto *node = kv.second.getNode();
207 if (auto *c = llvm::dyn_cast<Constant>(node)) {
208 if (!c->hasUsers()) {
209 nodeValuesToRemove.push_back(kv.getKey().str());
210 constantToRemove.insert(c);
211 }
212 }
213 }
214
215 for (auto &name : nodeValuesToRemove) {
216 auto it = nodeValueByName_.find(name);
217 DCHECK(llvm::isa<Constant>(it->second.getNode()))
218 << "NodeValue with name " << name
219 << " was expected to have been a Constant";
220 nodeValueByName_.erase(it);
221 }
222
223 for (auto *c : constantToRemove) {
224 G_->getParent()->eraseConstant(c);
225 }
226}
227
228Expected<Placeholder *>
229ProtobufLoader::createAndRegisterPlaceholder(llvm::StringRef name, TypeRef T,
230 bool isStatic, bool isTrainable,
231 const std::string &layout) {
232 RETURN_ERR_IF_NOT(
233 !hasNodeByName(name),
234 llvm::Twine("Creating an already existing node ", name).str());
235 RETURN_ERR_IF_NOT(!mod_.hasStorageName(name),
236 strFormat("A Placeholder was already registered by name %s",
237 name.data()));
238
239 Placeholder *node = mod_.createPlaceholder(T, name, isTrainable, layout);
240 node->setStatic(isStatic);
241 nodeValueByName_[name] = node->getOutput();
242 return node;
243}
244
245bool ProtobufLoader::hasNodeByName(llvm::StringRef name) const {
246 return nodeValueByName_.find(name) != nodeValueByName_.end();
247}
248
249ProtobufLoader::ProtobufLoader(llvm::ArrayRef<const char *> tensorNames,
250 llvm::ArrayRef<TypeRef> types, Module &mod,
251 Error *errPtr, bool loadIntoExistingModule,
252 OriginNameToTQPMap *originNameToTQPMap,
253 bool loadUniquedDummyQParams,
254 bool replaceDummyTQPs, bool zeroScaleFP16Clip,
255 bool clipQuantRangeToFP16)
256 : G_(nullptr), mod_(mod), loadIntoExistingModule_(loadIntoExistingModule),
257 originNameToTQPMap_(originNameToTQPMap),
258 loadUniquedDummyQParams_(loadUniquedDummyQParams),
259 replaceDummyTQPs_(replaceDummyTQPs),
260 zeroScaleFP16Clip_(zeroScaleFP16Clip),
261 clipQuantRangeToFP16_(clipQuantRangeToFP16) {
262 setupLoader(tensorNames, types, errPtr);
263}
264
265ProtobufLoader::ProtobufLoader(llvm::ArrayRef<const char *> tensorNames,
266 llvm::ArrayRef<TypeRef> types, Function *F,
267 Error *errPtr, bool loadIntoExistingModule,
268 OriginNameToTQPMap *originNameToTQPMap,
269 bool loadUniquedDummyQParams,
270 bool replaceDummyTQPs, bool zeroScaleFP16Clip,
271 bool clipQuantRangeToFP16)
272 : G_(F), mod_(*F->getParent()),
273 loadIntoExistingModule_(loadIntoExistingModule),
274 originNameToTQPMap_(originNameToTQPMap),
275 loadUniquedDummyQParams_(loadUniquedDummyQParams),
276 replaceDummyTQPs_(replaceDummyTQPs),
277 zeroScaleFP16Clip_(zeroScaleFP16Clip),
278 clipQuantRangeToFP16_(clipQuantRangeToFP16) {
279 setupLoader(tensorNames, types, errPtr);
280}
281
282void ProtobufLoader::setupLoader(llvm::ArrayRef<const char *> tensorNames,
283 llvm::ArrayRef<TypeRef> types, Error *errPtr) {
284 // Verify that the version of the library that we linked against is
285 // compatible with the version of the headers we compiled against.
286 GOOGLE_PROTOBUF_VERIFY_VERSION;
287
288 // if errPtr already contains an error then don't continue with constructor
289 if (errPtr && *errPtr) {
290 return;
291 }
292
293 // Use the global flag as default. This may be overridden by instantiations of
294 // the loader later on.
295 constFoldInLoader_ = getConstantFoldLoaderOpsFlag();
296
297 // Lambda to setup the ProtobufLoader and return any Errors that were
298 // raised.
299 auto setup = [&]() -> Error {
300 RETURN_ERR_IF_NOT(tensorNames.size() == types.size(),
301 "Invalid initialization list");
302 for (size_t i = 0, e = tensorNames.size(); i < e; i++) {
303 RETURN_ERR_IF_NOT(!hasNodeByName(tensorNames[i]),
304 "Input names have duplicate");
305 TypeRef T = types[i];
306 if (T->isQuantizedType() && !T->isFusedQuantizedType()) {
307 RETURN_ERR_IF_NOT(!clipQuantRangeToFP16_,
308 strFormat("Do not support clipQuantRangeToFP16 with "
309 "unfused quantized input Placeholders: %s",
310 tensorNames[i]));
311 // Note: Never shift here, because these are the types that were already
312 // imported/defined based on Glow.
313 ASSIGN_VALUE_OR_RETURN_ERR(
314 T, loadQuantTy(tensorNames[i], T->getElementType(), T->dims(),
315 T->getScale(), T->getOffset(),
316 /* shiftUInt8ToInt8 */ false));
317 }
318 Placeholder *placeholder;
319 ASSIGN_VALUE_OR_RETURN_ERR(
320 placeholder, createAndRegisterPlaceholder(tensorNames[i], T));
321 inputVarsByName_.try_emplace(tensorNames[i], placeholder);
322 }
323 return Error::success();
324 };
325
326 if (errPtr) {
327 *errPtr = setup();
328 } else {
329 EXIT_ON_ERR(setup());
330 }
331}
332
333Expected<TensorQuantizationParams>
334ProtobufLoader::getUpdatedTQP(int32_t uniqueOffsetIdx) {
335 RETURN_ERR_IF_NOT(replaceDummyTQPs_, "replaceDummyTQPs_ was not enabled");
336 RETURN_ERR_IF_NOT(
337 uniqueOffsetIdx < int32_t(updatedTQPs_.size()),
338 strFormat("Unexpected size of updated TQPs %lu vs. dummy offset %d",
339 updatedTQPs_.size(), uniqueOffsetIdx));
340 return updatedTQPs_[uniqueOffsetIdx];
341}
342
343Expected<TypeRef> ProtobufLoader::loadQuantTy(const std::string &name,
344 ElemKind k,
345 llvm::ArrayRef<dim_t> dims,
346 float scale, int32_t offset,
347 bool shiftUInt8ToInt8,
348 bool skipClipQuantRangeToFP16) {
349 // If we have Int8QTy, we may have loaded as UInt8, and so will need to shift
350 // to align to Glow's Int8QTy.
351 if (k == ElemKind::Int8QTy && shiftUInt8ToInt8) {
352 offset -= UINT8_TO_INT8_SHIFT;
353 }
354
355 // If we don't have a map to track dummy unique offsets to loader names, then
356 // just load as normal with the actual scale/offset we loaded.
357 if (!loadUniquedDummyQParams_) {
358 // If clipping qparams to fp16 range then do so here.
359 if (clipQuantRangeToFP16_ && !skipClipQuantRangeToFP16) {
360 const auto qMinMax = getQuantizedValueRange(scale, offset, k);
361 const float newMin = std::max(qMinMax.first, kMinFP16);
362 const float newMax = std::min(qMinMax.second, kMaxFP16);
363 const TensorQuantizationParams newQParams = chooseQuantizationParams(
364 {newMin, newMax}, quantization::Asymmetric, k);
365 scale = newQParams.scale;
366 offset = newQParams.offset;
367 }
368 // If we are clipping qparam scales below the kMinScaleFP16 threshold to
369 // kMinScaleFP16 then do so here.
370 if (zeroScaleFP16Clip_ && scale < kMinScaleFP16) {
371 scale = kMinScaleFP16;
372 }
373
374 if (originNameToTQPMap_) {
375 bool inserted =
376 originNameToTQPMap_
377 ->emplace(name, TensorQuantizationParams{scale, offset})
378 .second;
379 RETURN_ERR_IF_NOT(inserted, "Already inserted TQP for " + name);
380 }
381 return mod_.uniqueType(k, dims, scale, offset);
382 }
383
384 RETURN_ERR_IF_NOT(originNameToTQPMap_,
385 "Must have valid originNameToTQPMap_ when loading "
386 "uniqued dummy qparams.");
387
388 // We use dummyScale to represent a dummy scale/offset pair. Make sure the
389 // original model did not have dummyScale, since we will use it later on to
390 // verify all qparams are now dummies.
391 RETURN_ERR_IF_NOT(scale != dummyScale, "Found dummy scale for " + name);
392
393 // For uniqued scale/offset, ignore the actual loaded values. Instead use
394 // dummyScale to signal these quant params are dummies, and then a uniqued
395 // incremented offset to represent this unique quant param pair. Save the name
396 // of the C2 edge that we loaded to use these quant params in the cctx so we
397 // can ue it in the future. The index the name is at represents which unique
398 // index it is mapped to.
399 RETURN_ERR_IF_NOT(int32_t(originNameToTQPMap_->size()) == currUniqueOffset_,
400 "Unexpected size encountered for qparam origin tracking");
401 const int32_t thisUniqueOffset = currUniqueOffset_++;
402 bool inserted =
403 originNameToTQPMap_
404 ->emplace(name,
405 TensorQuantizationParams{dummyScale, thisUniqueOffset})
406 .second;
407 RETURN_ERR_IF_NOT(inserted, "Already inserted TQP for " + name);
408 return mod_.uniqueType(k, dims, dummyScale, thisUniqueOffset);
409}
410
411}; // namespace glow
412