1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "glow/Importer/ProtobufLoader.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include <string> |
20 | |
21 | namespace glow { |
22 | |
23 | llvm::cl::OptionCategory loaderOptCat("Model Loader Options" ); |
24 | |
25 | static llvm::cl::opt<bool> isConstFoldLoaderOps( |
26 | "const-fold-ops" , |
27 | llvm::cl::desc( |
28 | "Performs constant folding on ONNX and Caffe Operators while loading." ), |
29 | llvm::cl::init(true), llvm::cl::cat(loaderOptCat)); |
30 | |
31 | bool isArrayConstant(llvm::ArrayRef<size_t> a) { |
32 | for (size_t i = 1; i < a.size(); i++) |
33 | if (a[0] != a[i]) |
34 | return false; |
35 | return true; |
36 | } |
37 | |
38 | void setConstantFoldLoaderOpsFlag(bool flag) { isConstFoldLoaderOps = flag; } |
39 | |
40 | bool getConstantFoldLoaderOpsFlag() { return isConstFoldLoaderOps; } |
41 | |
42 | bool ProtobufLoader::isConstantFoldable(llvm::ArrayRef<NodeValue> inputs, |
43 | std::string typeName) const { |
44 | int numInputs = inputs.size(); |
45 | if (!getConstantFoldLoaderOpsFlag()) { |
46 | return false; |
47 | } |
48 | // foldUnsupportedTypes: List of typenames unsupported for folding. |
49 | std::string foldUnsupportedTypes[] = {"Constant" , "Loop" , "If" }; |
50 | std::string *findType = std::find(std::begin(foldUnsupportedTypes), |
51 | std::end(foldUnsupportedTypes), typeName); |
52 | // Early exit if folding is not supported for current operator. |
53 | if (findType != std::end(foldUnsupportedTypes)) { |
54 | return false; |
55 | } |
56 | |
57 | // If all the inputs to the operator are constant this op can be folded. |
58 | for (int i = 0; i < numInputs; i++) { |
59 | if (inputs[i].getNode()->getKind() != Kinded::Kind::ConstantKind) { |
60 | return false; |
61 | } |
62 | } |
63 | return true; |
64 | } |
65 | |
66 | Placeholder * |
67 | ProtobufLoader::getStaticPlaceholderByNameOrNull(llvm::StringRef name) const { |
68 | auto it = nodeValueByName_.find(name); |
69 | if (it == nodeValueByName_.end()) { |
70 | return nullptr; |
71 | } |
72 | auto *res = llvm::dyn_cast<Placeholder>(it->second.getNode()); |
73 | return (res && res->isStatic()) ? res : nullptr; |
74 | } |
75 | |
76 | Constant *ProtobufLoader::getConstantByNameOrNull(llvm::StringRef name) const { |
77 | auto it = nodeValueByName_.find(name); |
78 | if (it == nodeValueByName_.end()) { |
79 | return nullptr; |
80 | } |
81 | auto *res = llvm::dyn_cast<Constant>(it->second.getNode()); |
82 | return res ? res : nullptr; |
83 | } |
84 | |
85 | Expected<Constant *> |
86 | ProtobufLoader::getConstantByName(llvm::StringRef name) const { |
87 | auto *ptr = getConstantByNameOrNull(name); |
88 | RETURN_ERR_IF_NOT( |
89 | ptr, strFormat("could not find constant with name %s" , name.data())); |
90 | return ptr; |
91 | } |
92 | |
93 | bool ProtobufLoader::hasConstantByName(llvm::StringRef name) const { |
94 | return getConstantByNameOrNull(name) != nullptr; |
95 | } |
96 | |
97 | Expected<Placeholder *> ProtobufLoader::getSingleOutput() const { |
98 | RETURN_ERR_IF_NOT(outputVarsByName_.size() == 1, |
99 | "There must be only one output." ); |
100 | return outputVarsByName_.begin()->second; |
101 | } |
102 | |
103 | Expected<Placeholder *> ProtobufLoader::getSingleInput() const { |
104 | RETURN_ERR_IF_NOT(inputVarsByName_.size() == 1, |
105 | "There must be only one input." ); |
106 | return inputVarsByName_.begin()->second; |
107 | } |
108 | |
109 | Expected<Placeholder *> |
110 | ProtobufLoader::getOutputByName(llvm::StringRef name) const { |
111 | auto it = outputVarsByName_.find(name); |
112 | RETURN_ERR_IF_NOT( |
113 | it != outputVarsByName_.end(), |
114 | llvm::Twine("No external output Variable was registered with name " , name) |
115 | .str()); |
116 | return it->second; |
117 | } |
118 | |
119 | Expected<Placeholder *> |
120 | ProtobufLoader::getInputByName(llvm::StringRef name) const { |
121 | auto it = inputVarsByName_.find(name); |
122 | RETURN_ERR_IF_NOT( |
123 | it != inputVarsByName_.end(), |
124 | llvm::Twine("No external input Variable was registered with name " , name) |
125 | .str()); |
126 | return it->second; |
127 | } |
128 | |
129 | NodeValue |
130 | ProtobufLoader::getNodeValueByNameOrNullNodeValue(llvm::StringRef name, |
131 | bool ignoreSrcFun) { |
132 | auto it = nodeValueByName_.find(name); |
133 | if (it == nodeValueByName_.end()) { |
134 | return NodeValue(nullptr); |
135 | } |
136 | |
137 | // Always return the NV of a storage Node since Storage lives in the Module |
138 | // and is accessible to any Node. |
139 | NodeValue NV = it->second; |
140 | if (llvm::isa<Storage>(NV)) { |
141 | return NV; |
142 | } |
143 | |
144 | // Check if the current Function G_ we are loading into is the same as the |
145 | // Function of the NV we found; if so then return it. |
146 | Function *srcF = NV.getNode()->getParent(); |
147 | if (srcF == G_ || ignoreSrcFun) { |
148 | return NV; |
149 | } |
150 | |
151 | // Otherwise we must be looking up a NV from a different Function in the |
152 | // Module, so look for an intermediate Placeholder linking the two if it |
153 | // exists, or otherwise create one and remember it. |
154 | assert(partNameToFun_.size() > 0 && |
155 | "Must be loading a pre-partitioned model." ); |
156 | auto itPH = intermediatePHsByName_.find(name); |
157 | Placeholder *intermedPH = nullptr; |
158 | // Create the intermediate PH and SaveNode if it does not yet exist. Note that |
159 | // we store these intermediate PHs separately from nodeValueByName_ because we |
160 | // want future users from the same Function as the NV to still use the Node |
161 | // directly through nodeValueByName_. |
162 | if (itPH == intermediatePHsByName_.end()) { |
163 | auto *save = srcF->createSave("tmp_" + NV.getNode()->getName().str(), NV); |
164 | intermedPH = save->getPlaceholder(); |
165 | intermediatePHsByName_[name] = intermedPH; |
166 | } else { |
167 | intermedPH = itPH->second; |
168 | } |
169 | return intermedPH->getOutput(); |
170 | } |
171 | |
172 | Expected<NodeValue> ProtobufLoader::getNodeValueByName(llvm::StringRef name, |
173 | bool ignoreSrcFun) { |
174 | RETURN_ERR_IF_NOT(hasNodeByName(name), |
175 | llvm::Twine("No node under name " , name).str()); |
176 | auto node = getNodeValueByNameOrNullNodeValue(name, ignoreSrcFun); |
177 | RETURN_ERR_IF_NOT(node.getNode(), "Null is under that name??" ); |
178 | return node; |
179 | } |
180 | |
181 | Error ProtobufLoader::createAndRegisterConstant(llvm::StringRef name, |
182 | Tensor &&tensor, |
183 | const std::string &layout) { |
184 | auto it = nodeValueByName_.find(name); |
185 | if (it != nodeValueByName_.end()) { |
186 | if (llvm::dyn_cast<Placeholder>(it->second.getNode())) { |
187 | // Placeholders take precedence over Constants. |
188 | return Error::success(); |
189 | } |
190 | } |
191 | // Note: We do not support training from models loaded from protos, so |
192 | // trainable is always set to false here. |
193 | Constant *node = mod_.createConstant(name, std::move(tensor), layout); |
194 | nodeValueByName_[name] = node->getOutput(); |
195 | return Error::success(); |
196 | } |
197 | |
198 | void ProtobufLoader::deleteUnusedConstants() { |
199 | std::vector<std::string> nodeValuesToRemove; |
200 | // Note that it's possible a constant is referred by more than one names |
201 | // (e.g., via Identity operator). Therefore, we maintain a set of constants to |
202 | // erase separately from the list for names. |
203 | std::unordered_set<Constant *> constantToRemove; |
204 | |
205 | for (auto &kv : nodeValueByName_) { |
206 | auto *node = kv.second.getNode(); |
207 | if (auto *c = llvm::dyn_cast<Constant>(node)) { |
208 | if (!c->hasUsers()) { |
209 | nodeValuesToRemove.push_back(kv.getKey().str()); |
210 | constantToRemove.insert(c); |
211 | } |
212 | } |
213 | } |
214 | |
215 | for (auto &name : nodeValuesToRemove) { |
216 | auto it = nodeValueByName_.find(name); |
217 | DCHECK(llvm::isa<Constant>(it->second.getNode())) |
218 | << "NodeValue with name " << name |
219 | << " was expected to have been a Constant" ; |
220 | nodeValueByName_.erase(it); |
221 | } |
222 | |
223 | for (auto *c : constantToRemove) { |
224 | G_->getParent()->eraseConstant(c); |
225 | } |
226 | } |
227 | |
228 | Expected<Placeholder *> |
229 | ProtobufLoader::createAndRegisterPlaceholder(llvm::StringRef name, TypeRef T, |
230 | bool isStatic, bool isTrainable, |
231 | const std::string &layout) { |
232 | RETURN_ERR_IF_NOT( |
233 | !hasNodeByName(name), |
234 | llvm::Twine("Creating an already existing node " , name).str()); |
235 | RETURN_ERR_IF_NOT(!mod_.hasStorageName(name), |
236 | strFormat("A Placeholder was already registered by name %s" , |
237 | name.data())); |
238 | |
239 | Placeholder *node = mod_.createPlaceholder(T, name, isTrainable, layout); |
240 | node->setStatic(isStatic); |
241 | nodeValueByName_[name] = node->getOutput(); |
242 | return node; |
243 | } |
244 | |
245 | bool ProtobufLoader::hasNodeByName(llvm::StringRef name) const { |
246 | return nodeValueByName_.find(name) != nodeValueByName_.end(); |
247 | } |
248 | |
249 | ProtobufLoader::ProtobufLoader(llvm::ArrayRef<const char *> tensorNames, |
250 | llvm::ArrayRef<TypeRef> types, Module &mod, |
251 | Error *errPtr, bool loadIntoExistingModule, |
252 | OriginNameToTQPMap *originNameToTQPMap, |
253 | bool loadUniquedDummyQParams, |
254 | bool replaceDummyTQPs, bool zeroScaleFP16Clip, |
255 | bool clipQuantRangeToFP16) |
256 | : G_(nullptr), mod_(mod), loadIntoExistingModule_(loadIntoExistingModule), |
257 | originNameToTQPMap_(originNameToTQPMap), |
258 | loadUniquedDummyQParams_(loadUniquedDummyQParams), |
259 | replaceDummyTQPs_(replaceDummyTQPs), |
260 | zeroScaleFP16Clip_(zeroScaleFP16Clip), |
261 | clipQuantRangeToFP16_(clipQuantRangeToFP16) { |
262 | setupLoader(tensorNames, types, errPtr); |
263 | } |
264 | |
265 | ProtobufLoader::ProtobufLoader(llvm::ArrayRef<const char *> tensorNames, |
266 | llvm::ArrayRef<TypeRef> types, Function *F, |
267 | Error *errPtr, bool loadIntoExistingModule, |
268 | OriginNameToTQPMap *originNameToTQPMap, |
269 | bool loadUniquedDummyQParams, |
270 | bool replaceDummyTQPs, bool zeroScaleFP16Clip, |
271 | bool clipQuantRangeToFP16) |
272 | : G_(F), mod_(*F->getParent()), |
273 | loadIntoExistingModule_(loadIntoExistingModule), |
274 | originNameToTQPMap_(originNameToTQPMap), |
275 | loadUniquedDummyQParams_(loadUniquedDummyQParams), |
276 | replaceDummyTQPs_(replaceDummyTQPs), |
277 | zeroScaleFP16Clip_(zeroScaleFP16Clip), |
278 | clipQuantRangeToFP16_(clipQuantRangeToFP16) { |
279 | setupLoader(tensorNames, types, errPtr); |
280 | } |
281 | |
282 | void ProtobufLoader::setupLoader(llvm::ArrayRef<const char *> tensorNames, |
283 | llvm::ArrayRef<TypeRef> types, Error *errPtr) { |
284 | // Verify that the version of the library that we linked against is |
285 | // compatible with the version of the headers we compiled against. |
286 | GOOGLE_PROTOBUF_VERIFY_VERSION; |
287 | |
288 | // if errPtr already contains an error then don't continue with constructor |
289 | if (errPtr && *errPtr) { |
290 | return; |
291 | } |
292 | |
293 | // Use the global flag as default. This may be overridden by instantiations of |
294 | // the loader later on. |
295 | constFoldInLoader_ = getConstantFoldLoaderOpsFlag(); |
296 | |
297 | // Lambda to setup the ProtobufLoader and return any Errors that were |
298 | // raised. |
299 | auto setup = [&]() -> Error { |
300 | RETURN_ERR_IF_NOT(tensorNames.size() == types.size(), |
301 | "Invalid initialization list" ); |
302 | for (size_t i = 0, e = tensorNames.size(); i < e; i++) { |
303 | RETURN_ERR_IF_NOT(!hasNodeByName(tensorNames[i]), |
304 | "Input names have duplicate" ); |
305 | TypeRef T = types[i]; |
306 | if (T->isQuantizedType() && !T->isFusedQuantizedType()) { |
307 | RETURN_ERR_IF_NOT(!clipQuantRangeToFP16_, |
308 | strFormat("Do not support clipQuantRangeToFP16 with " |
309 | "unfused quantized input Placeholders: %s" , |
310 | tensorNames[i])); |
311 | // Note: Never shift here, because these are the types that were already |
312 | // imported/defined based on Glow. |
313 | ASSIGN_VALUE_OR_RETURN_ERR( |
314 | T, loadQuantTy(tensorNames[i], T->getElementType(), T->dims(), |
315 | T->getScale(), T->getOffset(), |
316 | /* shiftUInt8ToInt8 */ false)); |
317 | } |
318 | Placeholder *placeholder; |
319 | ASSIGN_VALUE_OR_RETURN_ERR( |
320 | placeholder, createAndRegisterPlaceholder(tensorNames[i], T)); |
321 | inputVarsByName_.try_emplace(tensorNames[i], placeholder); |
322 | } |
323 | return Error::success(); |
324 | }; |
325 | |
326 | if (errPtr) { |
327 | *errPtr = setup(); |
328 | } else { |
329 | EXIT_ON_ERR(setup()); |
330 | } |
331 | } |
332 | |
333 | Expected<TensorQuantizationParams> |
334 | ProtobufLoader::getUpdatedTQP(int32_t uniqueOffsetIdx) { |
335 | RETURN_ERR_IF_NOT(replaceDummyTQPs_, "replaceDummyTQPs_ was not enabled" ); |
336 | RETURN_ERR_IF_NOT( |
337 | uniqueOffsetIdx < int32_t(updatedTQPs_.size()), |
338 | strFormat("Unexpected size of updated TQPs %lu vs. dummy offset %d" , |
339 | updatedTQPs_.size(), uniqueOffsetIdx)); |
340 | return updatedTQPs_[uniqueOffsetIdx]; |
341 | } |
342 | |
343 | Expected<TypeRef> ProtobufLoader::loadQuantTy(const std::string &name, |
344 | ElemKind k, |
345 | llvm::ArrayRef<dim_t> dims, |
346 | float scale, int32_t offset, |
347 | bool shiftUInt8ToInt8, |
348 | bool skipClipQuantRangeToFP16) { |
349 | // If we have Int8QTy, we may have loaded as UInt8, and so will need to shift |
350 | // to align to Glow's Int8QTy. |
351 | if (k == ElemKind::Int8QTy && shiftUInt8ToInt8) { |
352 | offset -= UINT8_TO_INT8_SHIFT; |
353 | } |
354 | |
355 | // If we don't have a map to track dummy unique offsets to loader names, then |
356 | // just load as normal with the actual scale/offset we loaded. |
357 | if (!loadUniquedDummyQParams_) { |
358 | // If clipping qparams to fp16 range then do so here. |
359 | if (clipQuantRangeToFP16_ && !skipClipQuantRangeToFP16) { |
360 | const auto qMinMax = getQuantizedValueRange(scale, offset, k); |
361 | const float newMin = std::max(qMinMax.first, kMinFP16); |
362 | const float newMax = std::min(qMinMax.second, kMaxFP16); |
363 | const TensorQuantizationParams newQParams = chooseQuantizationParams( |
364 | {newMin, newMax}, quantization::Asymmetric, k); |
365 | scale = newQParams.scale; |
366 | offset = newQParams.offset; |
367 | } |
368 | // If we are clipping qparam scales below the kMinScaleFP16 threshold to |
369 | // kMinScaleFP16 then do so here. |
370 | if (zeroScaleFP16Clip_ && scale < kMinScaleFP16) { |
371 | scale = kMinScaleFP16; |
372 | } |
373 | |
374 | if (originNameToTQPMap_) { |
375 | bool inserted = |
376 | originNameToTQPMap_ |
377 | ->emplace(name, TensorQuantizationParams{scale, offset}) |
378 | .second; |
379 | RETURN_ERR_IF_NOT(inserted, "Already inserted TQP for " + name); |
380 | } |
381 | return mod_.uniqueType(k, dims, scale, offset); |
382 | } |
383 | |
384 | RETURN_ERR_IF_NOT(originNameToTQPMap_, |
385 | "Must have valid originNameToTQPMap_ when loading " |
386 | "uniqued dummy qparams." ); |
387 | |
388 | // We use dummyScale to represent a dummy scale/offset pair. Make sure the |
389 | // original model did not have dummyScale, since we will use it later on to |
390 | // verify all qparams are now dummies. |
391 | RETURN_ERR_IF_NOT(scale != dummyScale, "Found dummy scale for " + name); |
392 | |
393 | // For uniqued scale/offset, ignore the actual loaded values. Instead use |
394 | // dummyScale to signal these quant params are dummies, and then a uniqued |
395 | // incremented offset to represent this unique quant param pair. Save the name |
396 | // of the C2 edge that we loaded to use these quant params in the cctx so we |
397 | // can ue it in the future. The index the name is at represents which unique |
398 | // index it is mapped to. |
399 | RETURN_ERR_IF_NOT(int32_t(originNameToTQPMap_->size()) == currUniqueOffset_, |
400 | "Unexpected size encountered for qparam origin tracking" ); |
401 | const int32_t thisUniqueOffset = currUniqueOffset_++; |
402 | bool inserted = |
403 | originNameToTQPMap_ |
404 | ->emplace(name, |
405 | TensorQuantizationParams{dummyScale, thisUniqueOffset}) |
406 | .second; |
407 | RETURN_ERR_IF_NOT(inserted, "Already inserted TQP for " + name); |
408 | return mod_.uniqueType(k, dims, dummyScale, thisUniqueOffset); |
409 | } |
410 | |
411 | }; // namespace glow |
412 | |