1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "glow/Importer/Caffe2ModelLoader.h" |
18 | #include "glow/Base/Tensor.h" |
19 | #include "glow/Graph/Graph.h" |
20 | #include "glow/Graph/Nodes.h" |
21 | #include "glow/Runtime/RuntimeTypes.h" |
22 | #include "glow/Support/Error.h" |
23 | |
24 | #include "llvm/Support/Casting.h" |
25 | |
26 | #include "caffe2/proto/caffe2.pb.h" |
27 | #include <google/protobuf/io/coded_stream.h> |
28 | #include <google/protobuf/io/zero_copy_stream_impl.h> |
29 | |
30 | #include <cstddef> |
31 | #include <cstdint> |
32 | #include <fstream> |
33 | #include <iostream> |
34 | #include <string> |
35 | #include <vector> |
36 | |
37 | using namespace glow; |
38 | using llvm::cast; |
39 | |
40 | using ArgumentDictionaryTy = |
41 | std::unordered_map<std::string, const caffe2::Argument *>; |
42 | |
43 | namespace glow { |
44 | /// Template specialization of loadOperatorName for caffe2. |
45 | template <> |
46 | std::string |
47 | loadOperatorName<caffe2::OperatorDef>(const caffe2::OperatorDef &op) { |
48 | if (op.name().length()) { |
49 | return op.name(); |
50 | } |
51 | if (op.output_size() > 0) { |
52 | return op.output(0); |
53 | } |
54 | return op.type(); |
55 | } |
56 | |
57 | // FIXME: this is a temporary solution for the case when NonZero returns |
58 | // -2^31 as the boundary for the returned indices. For examples, currently |
59 | // we get this NonZero([0, 1, 1, 0, 0]) -> [1, 2, -2^31, 0, 0], because the |
60 | // shapes are static. This function makes sure that the output looks like |
61 | // [1, 2, -1, -1, -1] which is more convenient for now. |
62 | // The logic is we get [1, 2, -2^31, 0, 0], then we convert to [0, 0, 1, 0, 0] |
63 | // by finding negative element, then do cumsum so we get [0, 0, 1, 1, 1], |
64 | // then whenever we see 0, we use original value and when we see 1 we use -1, |
65 | // so it becomes [1, 2, -1, -1, -1]. |
66 | Node *fixNonZero(Function *F, Module &mod, const std::string opName, |
67 | NodeValue node) { |
68 | auto zeroes = F->createSplat(opName + ".fixNZ.zeroes" , node.getType(), 0); |
69 | auto floatTy = mod.uniqueType(ElemKind::Float16Ty, node.dims()); |
70 | auto minusOnesFloat = |
71 | F->createSplat(opName + ".fixNZ.minusOnesFloat" , floatTy, -1); |
72 | auto zeroesFloat = F->createSplat(opName + ".fixNZ.zeroesFloat" , floatTy, 0); |
73 | auto onesFloat = F->createSplat(opName + ".fixNZ.onesFloat" , floatTy, 1); |
74 | auto nodeFloat = F->createConvertTo(opName + ".fixNZ.float" , node, floatTy); |
75 | |
76 | // If there is a boundary, it will be marked as true. |
77 | auto isNegBool = F->createCmpLT(opName + ".fixNZ.isNegBool" , node, zeroes); |
78 | auto isNegFloat = F->createSelect(opName + ".fixNZ.isNegFloat" , isNegBool, |
79 | onesFloat, zeroesFloat); |
80 | auto isNegInt = F->createConvertTo(opName + ".fixNZ.isNegInt" , isNegFloat, |
81 | node.getType()); |
82 | |
83 | // After applying cumsum every element before boundary will be 0 |
84 | // and starting from boundary will be 1. |
85 | auto cumSum = F->createCumSum(opName + ".fixNZ.cumSum" , isNegInt, 0); |
86 | |
87 | auto isAfterBoundary = |
88 | F->createCmpGT(opName + ".fixNZ.isAfterBoundary" , cumSum, zeroes); |
89 | |
90 | auto withMinusOnesFloat = |
91 | F->createSelect(opName + ".fixNZ.withMinusOnesFloat" , isAfterBoundary, |
92 | minusOnesFloat, nodeFloat); |
93 | |
94 | auto withMinusOnesInt = F->createConvertTo( |
95 | opName + ".fixNZ.withMinusOnesInt" , withMinusOnesFloat, node.getType()); |
96 | |
97 | return withMinusOnesInt; |
98 | } |
99 | }; // namespace glow |
100 | |
101 | /// Legacy padding modes supported in caffe2. These are used by MaxPool |
102 | /// operators, and are defined in caffe2_legacy.proto in the caffe2 source |
103 | /// tree. |
104 | enum LegacyPaddingMode { NOTSET, VALID, SAME, CAFFE_LEGACY_POOLING, N_MODES }; |
105 | |
106 | /// Creates tensor \p T from the input \p in. Note, there is no data associated |
107 | /// with the Tensor. This method makes sure that the tensor is created with the |
108 | /// proper shape and element type. |
109 | Expected<LoadWeightResult> |
110 | Caffe2ModelLoader::createAndSetTensorType(const caffe2::TensorProto &in) { |
111 | std::vector<dim_t> dim; |
112 | for (auto d : in.dims()) { |
113 | if (d == 0) { |
114 | return MAKE_ERR("0 dimension is not supported" ); |
115 | } |
116 | dim.push_back(d); |
117 | } |
118 | |
119 | LoadWeightResult result; |
120 | result.t = glow::make_unique<Tensor>(); |
121 | |
122 | if (in.data_type() == caffe2::TensorProto::FLOAT) { |
123 | result.t->reset(ElemKind::FloatTy, dim); |
124 | } else if (in.data_type() == caffe2::TensorProto::FLOAT16) { |
125 | result.t->reset(ElemKind::Float16Ty, dim); |
126 | } else if (in.data_type() == caffe2::TensorProto::INT32) { |
127 | result.t->reset(ElemKind::Int32ITy, dim); |
128 | } else if (in.data_type() == caffe2::TensorProto::INT64) { |
129 | result.t->reset(ElemKind::Int64ITy, dim); |
130 | } else if (in.data_type() == caffe2::TensorProto::UINT8) { |
131 | result.t->reset(ElemKind::UInt8QTy, dim, 1.0, 0); |
132 | } else if (in.data_type() == caffe2::TensorProto::INT8) { |
133 | result.t->reset(ElemKind::Int8QTy, dim, 1.0, 0); |
134 | } else { |
135 | return MAKE_ERR( |
136 | strFormat("FP32/16, Int32/64, Int8/Uint8 are supported. Got type" |
137 | " %s for tensor %s." , |
138 | caffe2::TensorProto_DataType_Name(in.data_type()).c_str(), |
139 | in.name().c_str())); |
140 | } |
141 | |
142 | return Expected<LoadWeightResult>(std::move(result)); |
143 | } |
144 | |
145 | Expected<LoadWeightResult> |
146 | Caffe2ModelLoader::createAndSetTensorType(const caffe2::QTensorProto &in) { |
147 | std::vector<dim_t> dim; |
148 | for (auto d : in.dims()) { |
149 | if (d == 0) { |
150 | return MAKE_ERR("0 dimension qtensor is not supported" ); |
151 | } |
152 | dim.push_back(d); |
153 | } |
154 | |
155 | if (in.axis() != 1) { |
156 | return MAKE_ERR("axis must be 1" ); |
157 | } |
158 | |
159 | dim_t qparams = static_cast<dim_t>(in.scales().size()); |
160 | |
161 | RETURN_ERR_IF_NOT(qparams > 0, "No qparams found" ); |
162 | |
163 | RETURN_ERR_IF_NOT(in.biases().size() == in.scales().size(), |
164 | "Found a different number of biases and scales" ); |
165 | |
166 | LoadWeightResult result; |
167 | result.t = glow::make_unique<Tensor>(); |
168 | |
169 | float scale = 1.0; |
170 | int32_t offset = 0; |
171 | |
172 | // If only one set of qparams is present then use them, otherwise load the |
173 | // multiple sets of qparams as separate tensors and use the default qparams |
174 | // for the main tensor result.t. |
175 | // TODO: should we check is_multiparam? |
176 | if (qparams == 1) { |
177 | scale = in.scales(0); |
178 | offset = in.biases(0); |
179 | } else { |
180 | RETURN_ERR_IF_NOT(!originNameToTQPMap_, |
181 | "Unsupported loading of uniqued qparams for vector of " |
182 | "scales/biases for " + |
183 | in.name()); |
184 | result.scales = glow::make_unique<Tensor>(ElemKind::FloatTy, |
185 | llvm::makeArrayRef({qparams})); |
186 | result.offsets = glow::make_unique<Tensor>(ElemKind::Int32ITy, |
187 | llvm::makeArrayRef({qparams})); |
188 | |
189 | auto scalesH = result.scales->getHandle<float>(); |
190 | auto offsetsH = result.offsets->getHandle<int32_t>(); |
191 | for (size_t i = 0; i < qparams; ++i) { |
192 | scalesH.raw(i) = in.scales(i); |
193 | offsetsH.raw(i) = in.biases(i); |
194 | } |
195 | } |
196 | |
197 | if (in.data_type() == caffe2::TensorProto::INT8) { |
198 | TypeRef outTy; |
199 | ASSIGN_VALUE_OR_RETURN_ERR( |
200 | outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int8QTy, dim, |
201 | scale, offset, |
202 | /* shiftUInt8ToInt8 */ false)); |
203 | result.t->reset(*outTy); |
204 | } else if (in.data_type() == caffe2::TensorProto::UINT8) { |
205 | TypeRef outTy; |
206 | ASSIGN_VALUE_OR_RETURN_ERR( |
207 | outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int8QTy, dim, |
208 | scale, offset)); |
209 | result.t->reset(*outTy); |
210 | } else if (in.data_type() == caffe2::TensorProto::INT32) { |
211 | TypeRef outTy; |
212 | ASSIGN_VALUE_OR_RETURN_ERR( |
213 | outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int32QTy, dim, |
214 | scale, offset)); |
215 | result.t->reset(*outTy); |
216 | } else { |
217 | return MAKE_ERR("Only int8, uint8, and int32 qtensors are supported" ); |
218 | } |
219 | |
220 | return Expected<LoadWeightResult>(std::move(result)); |
221 | } |
222 | |
223 | /// Translates the protocol buffer node \p op into a random access map. |
224 | template <typename T> static ArgumentDictionaryTy loadArgumentMap(const T &t) { |
225 | ArgumentDictionaryTy dict; |
226 | for (auto &arg : t.arg()) { |
227 | dict[arg.name()] = &arg; |
228 | } |
229 | return dict; |
230 | } |
231 | |
232 | static Expected<std::vector<unsigned_t>> getPads(ArgumentDictionaryTy &dict) { |
233 | if (dict.count("pad" )) { |
234 | int pad; |
235 | ASSIGN_VALUE_OR_RETURN_ERR(pad, loadInt(dict.at("pad" ))); |
236 | std::vector<unsigned_t> pads(4, pad); |
237 | return pads; |
238 | } |
239 | if (dict.count("pad_t" )) { |
240 | std::vector<unsigned_t> pads(4); |
241 | ASSIGN_VALUE_OR_RETURN_ERR(pads[0], loadInt(dict.at("pad_t" ))); |
242 | RETURN_ERR_IF_NOT(dict.count("pad_l" ), "missing pad_l" ); |
243 | ASSIGN_VALUE_OR_RETURN_ERR(pads[1], loadInt(dict.at("pad_l" ))); |
244 | RETURN_ERR_IF_NOT(dict.count("pad_b" ), "missing pad_b" ); |
245 | ASSIGN_VALUE_OR_RETURN_ERR(pads[2], loadInt(dict.at("pad_b" ))); |
246 | RETURN_ERR_IF_NOT(dict.count("pad_r" ), "missing pad_r" ); |
247 | ASSIGN_VALUE_OR_RETURN_ERR(pads[3], loadInt(dict.at("pad_r" ))); |
248 | return pads; |
249 | } |
250 | if (dict.count("pads" )) { |
251 | std::vector<unsigned_t> shape; |
252 | ASSIGN_VALUE_OR_RETURN_ERR(shape, getShape<unsigned_t>(dict["pads" ])); |
253 | return shape; |
254 | } |
255 | // Return default value 0 for pads. |
256 | return std::vector<unsigned_t>{0, 0, 0, 0}; |
257 | } |
258 | |
259 | /// Translates the "order" field of dictionary \p dict into a channel number. |
260 | static Expected<unsigned_t> getChannel(ArgumentDictionaryTy &dict) { |
261 | std::string order = "NCHW" ; // default |
262 | auto orderIt = dict.find("order" ); |
263 | if (orderIt != dict.end()) { |
264 | ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(orderIt->second)); |
265 | } |
266 | if (order == "NHWC" ) { |
267 | return 3; |
268 | } else if (order == "NCHW" ) { |
269 | return 1; |
270 | } |
271 | return MAKE_ERR("Invalid order field" ); |
272 | } |
273 | |
274 | static Expected<std::vector<unsigned_t>> getSizeHW(ArgumentDictionaryTy &dict, |
275 | const std::string &name, |
276 | unsigned_t defaultValue) { |
277 | if (dict.count(name)) { |
278 | int value; |
279 | ASSIGN_VALUE_OR_RETURN_ERR(value, loadInt(dict[name])); |
280 | std::vector<unsigned_t> result(2, value); |
281 | return result; |
282 | } |
283 | if (dict.count(name + "_h" ) && dict.count(name + "_w" )) { |
284 | std::vector<unsigned_t> result(2); |
285 | ASSIGN_VALUE_OR_RETURN_ERR(result[0], loadInt(dict[name + "_h" ])); |
286 | ASSIGN_VALUE_OR_RETURN_ERR(result[1], loadInt(dict[name + "_w" ])); |
287 | return result; |
288 | } |
289 | if (dict.count(name + "s" )) { |
290 | return getShape<unsigned_t>(dict[name + "s" ]); |
291 | } |
292 | return std::vector<unsigned_t>{defaultValue, defaultValue}; |
293 | } |
294 | |
295 | Expected<caffe2::NetDef> |
296 | Caffe2ModelLoader::loadProtoFile(const std::string &filename) { |
297 | std::ifstream ff(filename, std::ios::in | std::ios::binary); |
298 | RETURN_ERR_IF_NOT(ff, |
299 | strFormat("Can't find the model or network files for %s" , |
300 | filename.c_str())); |
301 | caffe2::NetDef net; |
302 | |
303 | bool parseNet = false; |
304 | if (filename.find(".pbtxt" ) != std::string::npos) { |
305 | std::string str((std::istreambuf_iterator<char>(ff)), |
306 | std::istreambuf_iterator<char>()); |
307 | parseNet = google::protobuf::TextFormat::ParseFromString(str, &net); |
308 | } else { |
309 | // Construct and configure a Coded Input Stream |
310 | google::protobuf::io::IstreamInputStream filestr(&ff); |
311 | google::protobuf::io::CodedInputStream codedstr(&filestr); |
312 | // Don't warn about large file sizes. |
313 | #if GOOGLE_PROTOBUF_VERSION >= 3002000 |
314 | codedstr.SetTotalBytesLimit(MAX_PROTO_SIZE); |
315 | #else |
316 | codedstr.SetTotalBytesLimit(MAX_PROTO_SIZE, MAX_PROTO_SIZE); |
317 | #endif |
318 | parseNet = net.ParseFromCodedStream(&codedstr); |
319 | } |
320 | |
321 | RETURN_ERR_IF_NOT(parseNet, "Failed to parse the network descriptor." ); |
322 | return net; |
323 | } |
324 | |
325 | Expected<caffe2::NetDef> Caffe2ModelLoader::loadProto(const void *c2Model, |
326 | size_t c2ModelSize) { |
327 | google::protobuf::io::ArrayInputStream arrayStream(c2Model, c2ModelSize); |
328 | // Construct and configure a Coded Input Stream |
329 | google::protobuf::io::CodedInputStream codedStream(&arrayStream); |
330 | |
331 | // Don't warn about large file sizes. |
332 | #if GOOGLE_PROTOBUF_VERSION >= 3002000 |
333 | codedStream.SetTotalBytesLimit(MAX_PROTO_SIZE); |
334 | #else |
335 | codedStream.SetTotalBytesLimit(MAX_PROTO_SIZE, MAX_PROTO_SIZE); |
336 | #endif |
337 | caffe2::NetDef MP; |
338 | bool parseNet = MP.ParseFromCodedStream(&codedStream); |
339 | RETURN_ERR_IF_NOT(parseNet, "Failed to parse NetDef" ); |
340 | return MP; |
341 | } |
342 | |
343 | Expected<bool> Caffe2ModelLoader::getBroadcast(ArgumentDictionaryTy &dict) { |
344 | if (!dict.count("broadcast" )) { |
345 | return false; |
346 | } |
347 | int broadcast; |
348 | ASSIGN_VALUE_OR_RETURN_ERR(broadcast, loadInt(dict.at("broadcast" ))); |
349 | return broadcast == 1; |
350 | } |
351 | |
352 | bool Caffe2ModelLoader::hasMultidirectionalBroadcast( |
353 | const llvm::StringRef typeName) { |
354 | (void)typeName; |
355 | return false; |
356 | } |
357 | |
358 | const std::string Caffe2ModelLoader::opErrMsg(const caffe2::OperatorDef &op, |
359 | const std::string &errMsg) { |
360 | const std::string &opName = loadOperatorName(op); |
361 | return strFormat(" [Operator-'%s'] : %s " , opName.c_str(), errMsg.c_str()); |
362 | } |
363 | |
364 | // Caffe2 PRelu |
365 | // https://github.com/pytorch/pytorch/blob/master/caffe2/operators/prelu_op.cc |
366 | Error Caffe2ModelLoader::loadPRelu(const caffe2::OperatorDef &op, |
367 | ArgumentDictionaryTy &dict) { |
368 | const std::string &opName = loadOperatorName(op); |
369 | |
370 | NodeValue in; |
371 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
372 | |
373 | NodeValue slope; |
374 | ASSIGN_VALUE_OR_RETURN_ERR(slope, getNodeValueByName(op.input(1))); |
375 | |
376 | // Do broadcasting. |
377 | auto targetDim = in.dims(); |
378 | // Set the axis assuming i/p is of NCHW format. |
379 | int axis = 1; |
380 | auto *finalSlope = G_->createBroadcast(opName, slope, targetDim, axis); |
381 | auto *R = G_->createPRELU(opName, in, finalSlope); |
382 | RETURN_IF_ERR(addNodeAsOutput(op, R)); |
383 | return Error::success(); |
384 | } |
385 | |
386 | Error Caffe2ModelLoader::loadSoftmax(const caffe2::OperatorDef &op, |
387 | ArgumentDictionaryTy &dict) { |
388 | const std::string &opName = loadOperatorName(op); |
389 | |
390 | NodeValue in; |
391 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
392 | |
393 | RETURN_ERR_IF_NOT( |
394 | in.dims().size() >= 2, |
395 | opErrMsg(op, |
396 | strFormat( |
397 | "SoftMax input dims must be >= 2, but found input dims %zu " , |
398 | in.dims().size()))); |
399 | |
400 | // Create a constant to store labels to be used in SoftMaxGradNode. |
401 | auto *selected = G_->createSplat( |
402 | opName + ".selected" , |
403 | mod_.uniqueType(ElemKind::Int64ITy, {in.dims()[0], 1}), 0.f); |
404 | |
405 | int axis = 1; |
406 | if (dict.count("axis" )) { |
407 | ASSIGN_VALUE_OR_RETURN_ERR(axis, |
408 | loadAxis<int>(dict["axis" ], in.dims().size())); |
409 | } |
410 | |
411 | auto *FN = G_->createFlatten(opName + ".reshapeInput" , in, axis); |
412 | auto *SM = G_->createSoftMax(opName, FN, selected); |
413 | |
414 | // The output should have the same shape as the original input. |
415 | auto origInDims = in.getType()->dims(); |
416 | auto *RN = G_->createReshape(opName + ".reshapeOutput" , SM, origInDims); |
417 | RETURN_IF_ERR(addNodeAsOutput(op, RN)); |
418 | return Error::success(); |
419 | } |
420 | |
421 | Error Caffe2ModelLoader::loadConv(const caffe2::OperatorDef &op, |
422 | ArgumentDictionaryTy &dict) { |
423 | const std::string &opName = loadOperatorName(op); |
424 | |
425 | // Load the inputs: |
426 | std::vector<unsigned_t> strides; |
427 | ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride" , 1)); |
428 | std::vector<unsigned_t> pads; |
429 | ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict)); |
430 | std::vector<unsigned_t> kernels; |
431 | ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel" , 0)); |
432 | unsigned_t group = 1; |
433 | if (dict.count("group" )) { |
434 | ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group" ])); |
435 | } |
436 | std::string order = "NCHW" ; |
437 | if (dict.count("order" )) { |
438 | ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order" ])); |
439 | } |
440 | std::vector<unsigned_t> dilations; |
441 | ASSIGN_VALUE_OR_RETURN_ERR(dilations, |
442 | getDilations(dict, std::vector<unsigned_t>{1, 1})); |
443 | |
444 | NodeValue in; |
445 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
446 | |
447 | NodeValue w; |
448 | ASSIGN_VALUE_OR_RETURN_ERR(w, getConstantByName(op.input(1))); |
449 | |
450 | // Transpose the weights to the right format. Glow expects to read the |
451 | // weights in the format CRSK. |
452 | // C - output_depth, R - filter_height, S - filter_width, K - input_depth. |
453 | // Caffe2 "Conv" op always stores the weight as CKRS. |
454 | w = G_->createTranspose(w.getNode()->getName().str() + "_NHWC" , w, NCHW2NHWC, |
455 | "NHWC" ); |
456 | |
457 | // The structure of the conv weights is: CRSK. We take the C, which is the |
458 | // number of filters. We use this value to calculate the size of the bias |
459 | // if it is not specified. |
460 | dim_t depth = w.dims()[0]; |
461 | |
462 | // We expect the input to be NHWC. |
463 | NodeValue finalIn; |
464 | if (order == "NCHW" ) { |
465 | finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult(); |
466 | } else { |
467 | finalIn = in; |
468 | } |
469 | |
470 | TypeRef finalInType = finalIn.getType(); |
471 | |
472 | // Calculate the size and allocate the output buffer. |
473 | ShapeNHWC idim = ShapeNHWC(finalInType->dims()); |
474 | auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, |
475 | pads, dilations); |
476 | std::array<dim_t, 4> outDims = {{idim.n, outSz.first, outSz.second, depth}}; |
477 | |
478 | // Try to find a loaded bias constant. |
479 | NodeValue bias(nullptr); |
480 | if (op.input_size() > 2) { |
481 | const auto &biasName = op.input(2); |
482 | bias = getConstantByNameOrNull(biasName); |
483 | } |
484 | // Construct the bias constant if one wasn't found. |
485 | if (!bias.getNode()) { |
486 | TypeRef bTy = mod_.uniqueType(ElemKind::FloatTy, {depth}); |
487 | bias = G_->createSplat(opName + ".bias" , bTy, 0.f); |
488 | } |
489 | |
490 | TypeRef outTy = mod_.uniqueType(ElemKind::FloatTy, outDims); |
491 | |
492 | Node *node = G_->createConv(opName, finalIn, w, bias, outTy, kernels, strides, |
493 | pads, group, dilations); |
494 | if (op.type() == "ConvRelu" ) { |
495 | node = G_->createRELU(opName + ".relu" , node); |
496 | } |
497 | if (order == "NCHW" ) { |
498 | // Transpose the output back. |
499 | node = G_->createTranspose(opName, node, NHWC2NCHW); |
500 | } |
501 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
502 | return Error::success(); |
503 | } |
504 | |
505 | Error Caffe2ModelLoader::loadConvQuantized(const caffe2::OperatorDef &op, |
506 | ArgumentDictionaryTy &dict) { |
507 | const std::string &opName = loadOperatorName(op); |
508 | |
509 | // Load the inputs: |
510 | std::vector<unsigned_t> strides; |
511 | ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride" , 1)); |
512 | std::vector<unsigned_t> pads; |
513 | ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict)); |
514 | std::vector<unsigned_t> kernels; |
515 | ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel" , 0)); |
516 | unsigned_t group = 1; |
517 | if (dict.count("group" )) { |
518 | ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group" ])); |
519 | } |
520 | std::string order = "NCHW" ; |
521 | if (dict.count("order" )) { |
522 | ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order" ])); |
523 | } |
524 | bool quantizeGroupwise = false; |
525 | if (dict.count("quantize_groupwise" )) { |
526 | ASSIGN_VALUE_OR_RETURN_ERR(quantizeGroupwise, |
527 | loadInt(dict["quantize_groupwise" ])); |
528 | } |
529 | std::vector<unsigned_t> dilations; |
530 | ASSIGN_VALUE_OR_RETURN_ERR(dilations, |
531 | getDilations(dict, std::vector<unsigned_t>{1, 1})); |
532 | |
533 | // Group quantization only applies if there is more than one group. |
534 | quantizeGroupwise &= group > 1; |
535 | |
536 | NodeValue in; |
537 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
538 | |
539 | NodeValue w; |
540 | ASSIGN_VALUE_OR_RETURN_ERR(w, getConstantByName(op.input(1))); |
541 | |
542 | // Transpose the weights to the right format. Glow expects to read the |
543 | // weights in the format CRSK. |
544 | // C - output_depth, R - filter_height, S - filter_width, K - input_depth. |
545 | // For Caffe2 "Int8Conv" and "Int8ConvRelu", the weights always follows the |
546 | // "order" arg. |
547 | if (order != "NHWC" ) { |
548 | w = G_->createTranspose(w.getNode()->getName().str() + "_NHWC" , w, |
549 | NCHW2NHWC, "NHWC" ); |
550 | } |
551 | |
552 | // The structure of the conv weights is: CRSK. We take the C, which is the |
553 | // number of filters. We use this value to calculate the size of the bias |
554 | // if it is not specified. |
555 | dim_t depth = w.dims()[0]; |
556 | |
557 | // We expect the input to be NHWC. |
558 | NodeValue finalIn; |
559 | if (order == "NCHW" ) { |
560 | finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult(); |
561 | } else { |
562 | finalIn = in; |
563 | } |
564 | |
565 | TypeRef finalInType = finalIn.getType(); |
566 | |
567 | // Calculate the size and allocate the output buffer. |
568 | ShapeNHWC idim = ShapeNHWC(finalInType->dims()); |
569 | auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, |
570 | pads, dilations); |
571 | std::array<dim_t, 4> outDims = {{idim.n, outSz.first, outSz.second, depth}}; |
572 | |
573 | TypeRef outTy; |
574 | |
575 | RETURN_ERR_IF_NOT(dict.count("Y_zero_point" ), |
576 | opErrMsg(op, |
577 | "ConvQuantized " |
578 | "missing zero point for quantized output type" )); |
579 | RETURN_ERR_IF_NOT(dict.count("Y_scale" ), |
580 | opErrMsg(op, "ConvQuantized " |
581 | "missing Y_scale for quantized output type" )); |
582 | |
583 | // Try to find a loaded bias constant. |
584 | NodeValue bias(nullptr); |
585 | if (op.input_size() > 2) { |
586 | const auto &biasName = op.input(2); |
587 | bias = getConstantByNameOrNull(biasName); |
588 | } |
589 | // Construct the bias constant if one wasn't found. |
590 | if (!bias.getNode()) { |
591 | TypeRef bTy = mod_.uniqueType(ElemKind::Int32QTy, {depth}, 1.0, 0); |
592 | bias = G_->createSplat(opName + "_conv.bias" , bTy, 0.f); |
593 | } |
594 | |
595 | RETURN_ERR_IF_NOT( |
596 | bias.getType()->size() == depth, |
597 | opErrMsg(op, strFormat("Loaded bias tensor of incorrect size %d " , |
598 | int(bias.getType()->size())))); |
599 | |
600 | // Construct output type |
601 | ASSIGN_VALUE_OR_RETURN_ERR( |
602 | outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict)); |
603 | |
604 | Node *node; |
605 | |
606 | if (quantizeGroupwise) { |
607 | auto wScalesName = strFormat("%s_loaded_scales" , op.input(1).c_str()); |
608 | auto wOffsetsName = strFormat("%s_loaded_offsets" , op.input(1).c_str()); |
609 | Constant *wScales; |
610 | Constant *wOffsets; |
611 | ASSIGN_VALUE_OR_RETURN_ERR(wScales, getConstantByName(wScalesName)); |
612 | ASSIGN_VALUE_OR_RETURN_ERR(wOffsets, getConstantByName(wOffsetsName)); |
613 | |
614 | // Quantize the filter automatically (only if it is float). The bias is NOT |
615 | // quantized automatically and is left at the disposal of each Backend to |
616 | // quantize it later using custom logic. |
617 | node = G_->createChannelwiseQuantizedConv( |
618 | opName, finalIn, w, bias, wScales, wOffsets, /* biasScales */ nullptr, |
619 | /* biasOffsets */ nullptr, outTy, kernels, strides, pads, group, |
620 | dilations, /* quantizeFilter */ true, /* quantizeBias */ false); |
621 | } else { |
622 | // If the bias isn't quantized for a non group quantized conv, quantize it. |
623 | if (bias.getElementType() == ElemKind::FloatTy) { |
624 | int32_t biasOffset = 0; |
625 | float biasScale = finalInType->getScale() * w.getType()->getScale(); |
626 | |
627 | auto biasTy = mod_.uniqueType(ElemKind::Int32QTy, bias.dims(), biasScale, |
628 | biasOffset); |
629 | bias = G_->createQuantize(opName + "_conv.bias" , bias, biasTy); |
630 | } |
631 | |
632 | node = G_->createConv(opName, finalIn, w, bias, outTy, kernels, strides, |
633 | pads, group, dilations); |
634 | } |
635 | |
636 | if (op.type() == "Int8ConvRelu" ) { |
637 | node = G_->createRELU(opName + ".relu" , node); |
638 | } |
639 | |
640 | if (order == "NCHW" ) { |
641 | // Transpose the output back. |
642 | node = G_->createTranspose(opName, node, NHWC2NCHW); |
643 | } |
644 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
645 | return Error::success(); |
646 | } |
647 | |
648 | Error Caffe2ModelLoader::loadLayerNorm(const caffe2::OperatorDef &op, |
649 | ArgumentDictionaryTy &dict) { |
650 | const std::string &opName = loadOperatorName(op); |
651 | |
652 | NodeValue in; |
653 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
654 | |
655 | unsigned_t axis = 1; // Caffe2 default. |
656 | if (dict.count("axis" )) { |
657 | ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis" ])); |
658 | ASSIGN_VALUE_OR_RETURN_ERR(axis, |
659 | getPositiveAxis<int>(axis, in.dims().size())); |
660 | } |
661 | |
662 | // Feature shape is based on the input dims, from the axis to the end. |
663 | ShapeVector featDims; |
664 | for (dim_t i = axis, e = in.dims().size(); i < e; ++i) { |
665 | featDims.push_back(in.dims()[i]); |
666 | } |
667 | TypeRef featTy = mod_.uniqueTypeWithNewShape(in.getType(), featDims); |
668 | |
669 | NodeValue weight, bias; |
670 | if (op.input_size() > 1) { |
671 | RETURN_ERR_IF_NOT(op.input_size() == 3, |
672 | opErrMsg(op, "Must have both weight and bias" )); |
673 | |
674 | ASSIGN_VALUE_OR_RETURN_ERR(weight, getNodeValueByName(op.input(1))); |
675 | RETURN_ERR_IF_NOT(weight.getType() == featTy, |
676 | opErrMsg(op, "Invalid weight shape" )); |
677 | |
678 | ASSIGN_VALUE_OR_RETURN_ERR(bias, getNodeValueByName(op.input(2))); |
679 | RETURN_ERR_IF_NOT(bias.getType() == featTy, |
680 | opErrMsg(op, "Invalid bias shape" )); |
681 | } else { |
682 | // Caffe2 default to use weight 1 and bias 0. |
683 | weight = G_->createSplat(opName + "_weight_ones" , featTy, 1.0)->getResult(); |
684 | bias = G_->createSplat(opName + "_bias_zeros" , featTy, 0.0)->getResult(); |
685 | } |
686 | |
687 | float eps = 0.001; // Caffe2 default. |
688 | if (dict.count("epsilon" )) { |
689 | ASSIGN_VALUE_OR_RETURN_ERR(eps, loadFloat(dict["epsilon" ])); |
690 | } |
691 | |
692 | LayerNormalizationNode *node = |
693 | G_->createLayerNormalization(opName, in.getType(), in, weight, bias, eps); |
694 | |
695 | // We only support one output for LayoutNorm. Ignoring the |
696 | // rest of the outputs. |
697 | RETURN_IF_ERR(addNodeAsOutput(op, node, /* numOutputs */ 1)); |
698 | |
699 | return Error::success(); |
700 | } |
701 | |
702 | Expected<bool> Caffe2ModelLoader::foldOperator(const caffe2::OperatorDef &op) { |
703 | const unsigned numInputs = op.input_size(); |
704 | const std::string &typeName = op.type(); |
705 | llvm::SmallVector<NodeValue, 4> inputs; |
706 | inputs.reserve(numInputs); |
707 | for (unsigned i = 0; i < numInputs; i++) { |
708 | NodeValue in; |
709 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i))); |
710 | inputs.push_back(in); |
711 | } |
712 | |
713 | if (!isConstantFoldable(inputs, typeName)) { |
714 | return false; |
715 | } |
716 | |
717 | // Create a temporary lightweight loader to construct function representing |
718 | // current Op, and then constant fold the function using Interp backend. |
719 | Function *tmpF = mod_.createFunction("eval_const_fold__" ); |
720 | Caffe2ModelLoader tmpLoader(*tmpF, nullptr); |
721 | bool foldStatus = |
722 | !ERR_TO_BOOL(constantFoldInLoader<Caffe2ModelLoader, caffe2::OperatorDef>( |
723 | tmpF, tmpLoader, this, op), |
724 | /* log */ false); |
725 | mod_.eraseFunction(tmpF); |
726 | return foldStatus; |
727 | } |
728 | |
729 | Error Caffe2ModelLoader::loadConvTranspose(const caffe2::OperatorDef &op, |
730 | ArgumentDictionaryTy &dict) { |
731 | const std::string &opName = loadOperatorName(op); |
732 | |
733 | // Load the inputs: |
734 | std::vector<unsigned_t> strides; |
735 | ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride" , 1)); |
736 | std::vector<unsigned_t> pads; |
737 | ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict)); |
738 | std::vector<unsigned_t> kernels; |
739 | ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel" , 0)); |
740 | unsigned_t group = 1; |
741 | if (dict.count("group" )) { |
742 | ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group" ])); |
743 | } |
744 | std::string order = "NCHW" ; |
745 | if (dict.count("order" )) { |
746 | ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order" ])); |
747 | } |
748 | std::vector<unsigned_t> dilations; |
749 | ASSIGN_VALUE_OR_RETURN_ERR(dilations, |
750 | getDilations(dict, std::vector<unsigned_t>{1, 1})); |
751 | |
752 | NodeValue in; |
753 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
754 | |
755 | NodeValue weight; |
756 | ASSIGN_VALUE_OR_RETURN_ERR(weight, getConstantByName(op.input(1))); |
757 | |
758 | // Transpose the weights to the right format. Glow expects to read the |
759 | // weights in the format CRSK. |
760 | // C - output_depth, R - filter_height, S - filter_width, K - input_depth. |
761 | // Caffe2 "ConvTranspose" op always stores the weight as KCRS. |
762 | weight = G_->createTranspose(weight.getNode()->getName().str() + "_NHWC" , |
763 | weight, CNHW2NHWC, "NHWC" ); |
764 | |
765 | // The structure of the conv weights is: CRSK. We take the C, which is the |
766 | // number of filters. We use this value to calculate the size of the bias |
767 | // if it is not specified. |
768 | dim_t depth = weight.dims()[0]; |
769 | |
770 | // We expect the input to be NHWC. |
771 | NodeValue finalIn; |
772 | if (order == "NCHW" ) { |
773 | finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult(); |
774 | } else { |
775 | finalIn = in; |
776 | } |
777 | |
778 | TypeRef finalInType = finalIn.getType(); |
779 | |
780 | // Calculate the size and allocate the output buffer. |
781 | ShapeNHWC idim = ShapeNHWC(finalInType->dims()); |
782 | auto outSz = calculateConvTransposeOutputDims(idim.h, idim.w, kernels, |
783 | strides, pads, dilations); |
784 | std::array<dim_t, 4> outDims = {{idim.n, outSz.first, outSz.second, depth}}; |
785 | |
786 | // Try to find a loaded bias constant. |
787 | NodeValue bias(nullptr); |
788 | if (op.input_size() > 2) { |
789 | const auto &biasName = op.input(2); |
790 | bias = getConstantByNameOrNull(biasName); |
791 | } |
792 | // Construct the bias constant if one wasn't found. |
793 | if (!bias.getNode()) { |
794 | TypeRef bTy = mod_.uniqueType(ElemKind::FloatTy, {depth}); |
795 | bias = G_->createSplat(opName + "_conv.bias" , bTy, 0.f); |
796 | } |
797 | |
798 | TypeRef outTy = mod_.uniqueType(ElemKind::FloatTy, outDims); |
799 | |
800 | Node *node = |
801 | G_->createConvTranspose(opName, finalIn, weight, bias, outTy, kernels, |
802 | strides, pads, group, dilations); |
803 | |
804 | if (order == "NCHW" ) { |
805 | // Transpose the output back. |
806 | node = G_->createTranspose(opName, node, NHWC2NCHW); |
807 | } |
808 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
809 | return Error::success(); |
810 | } |
811 | |
812 | Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) { |
813 | ArgumentDictionaryTy dict = loadArgumentMap(op); |
814 | const std::string &typeName = op.type(); |
815 | mod_.registerOriginalName(op.name()); |
816 | |
817 | // Check if operator is supported in parent class, CommonOperatorLoader. |
818 | bool loadCommonOperatorSuccess; |
819 | ASSIGN_VALUE_OR_RETURN_ERR(loadCommonOperatorSuccess, |
820 | tryLoadCommonOperator(typeName, op, dict)); |
821 | if (loadCommonOperatorSuccess) { |
822 | return Error::success(); |
823 | } |
824 | const std::string &opName = loadOperatorName(op); |
825 | |
826 | if (typeName == "Gelu" ) { |
827 | NodeValue in; |
828 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
829 | Node *node = G_->createGelu(opName, in); |
830 | |
831 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
832 | return Error::success(); |
833 | } |
834 | |
835 | if (typeName == "Conv" || typeName == "ConvRelu" ) { |
836 | return loadConv(op, dict); |
837 | } |
838 | |
839 | if (typeName == "Softmax" ) { |
840 | return loadSoftmax(op, dict); |
841 | } |
842 | |
843 | if (typeName == "PRelu" ) { |
844 | return loadPRelu(op, dict); |
845 | } |
846 | |
847 | if (typeName == "ConvTranspose" ) { |
848 | return loadConvTranspose(op, dict); |
849 | } |
850 | |
851 | if (typeName == "Int8Conv" || typeName == "Int8ConvRelu" ) { |
852 | return loadConvQuantized(op, dict); |
853 | } |
854 | |
855 | if (typeName == "LayerNorm" ) { |
856 | return loadLayerNorm(op, dict); |
857 | } |
858 | |
859 | if (typeName == "Int8SumRelu" ) { |
860 | RETURN_ERR_IF_NOT(op.input_size() == 2, |
861 | opErrMsg(op, "Only Sum of 2 inputs is supported." )); |
862 | RETURN_ERR_IF_NOT( |
863 | dict.count("Y_zero_point" ), |
864 | opErrMsg(op, "missing zero point for quantized outout type" )); |
865 | RETURN_ERR_IF_NOT( |
866 | dict.count("Y_scale" ), |
867 | opErrMsg(op, "missing Y_scale for quantized output type" )); |
868 | NodeValue in0; |
869 | ASSIGN_VALUE_OR_RETURN_ERR(in0, getNodeValueByName(op.input(0))); |
870 | NodeValue in1; |
871 | ASSIGN_VALUE_OR_RETURN_ERR(in1, getNodeValueByName(op.input(1))); |
872 | auto outDims = in0.getType()->dims(); |
873 | TypeRef outTy; |
874 | ASSIGN_VALUE_OR_RETURN_ERR( |
875 | outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict)); |
876 | auto *add = G_->createAdd(opName + ".sum" , outTy, in0, in1); |
877 | auto *relu = G_->createRELU(opName + ".relu" , add); |
878 | RETURN_IF_ERR(addNodeAsOutput(op, relu)); |
879 | return Error::success(); |
880 | } |
881 | |
882 | if (typeName == "Int8Relu" ) { |
883 | RETURN_ERR_IF_NOT(op.input_size() == 1, |
884 | opErrMsg(op, "Only one input is supported." )); |
885 | RETURN_ERR_IF_NOT( |
886 | dict.count("Y_zero_point" ), |
887 | opErrMsg(op, "missing zero point for quantized outout type" )); |
888 | RETURN_ERR_IF_NOT( |
889 | dict.count("Y_scale" ), |
890 | opErrMsg(op, "missing Y_scale for quantized output type" )); |
891 | NodeValue in; |
892 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
893 | auto outDims = in.getType()->dims(); |
894 | TypeRef outTy; |
895 | ASSIGN_VALUE_OR_RETURN_ERR( |
896 | outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict)); |
897 | auto *relu = G_->createRELU(opName, in, outTy); |
898 | RETURN_IF_ERR(addNodeAsOutput(op, relu)); |
899 | return Error::success(); |
900 | } |
901 | |
902 | if (typeName == "Int8Quantize" ) { |
903 | RETURN_ERR_IF_NOT( |
904 | op.input_size() == 1, |
905 | opErrMsg(op, "Glow only supports Int8Quantize with 1 input" )); |
906 | RETURN_ERR_IF_NOT( |
907 | dict.count("Y_zero_point" ), |
908 | opErrMsg(op, "missing zero point for quantized output type" )); |
909 | RETURN_ERR_IF_NOT( |
910 | dict.count("Y_scale" ), |
911 | opErrMsg(op, "missing Y_scale for quantized output type" )); |
912 | NodeValue in; |
913 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
914 | auto outDims = in.getType()->dims(); |
915 | TypeRef outTy; |
916 | ASSIGN_VALUE_OR_RETURN_ERR( |
917 | outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict)); |
918 | Node *N = G_->createQuantize(opName, in, outTy); |
919 | RETURN_IF_ERR(addNodeAsOutput(op, N)); |
920 | return Error::success(); |
921 | } |
922 | |
923 | if (typeName == "Int8Dequantize" ) { |
924 | NodeValue in; |
925 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
926 | auto *node = G_->createDequantize(opName, in, ElemKind::FloatTy); |
927 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
928 | return Error::success(); |
929 | } |
930 | |
931 | if (typeName == "MaxPool" || typeName == "AveragePool" || |
932 | typeName == "Int8MaxPool" || typeName == "Int8AveragePool" ) { |
933 | // Load the inputs: |
934 | NodeValue in; |
935 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
936 | std::vector<unsigned_t> strides; |
937 | ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride" , 1)); |
938 | std::vector<unsigned_t> kernels; |
939 | ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel" , 0)); |
940 | std::vector<unsigned_t> pads; |
941 | ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict)); |
942 | bool countIncludePads; |
943 | ASSIGN_VALUE_OR_RETURN_ERR( |
944 | countIncludePads, getCountIncludePads(dict, /* defaultValue */ true)); |
945 | std::string order = "NCHW" ; |
946 | if (dict.count("order" )) { |
947 | ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order" ])); |
948 | } |
949 | // We expect the input to be NHWC. |
950 | NodeValue finalIn; |
951 | if (order == "NCHW" ) { |
952 | finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult(); |
953 | } else { |
954 | finalIn = in; |
955 | } |
956 | |
957 | // If 'global_pooling' is set then the operation will pool over the size |
958 | // of the input by doing: kernels = {height, width}. |
959 | if (dict.count("global_pooling" )) { |
960 | auto Ty = in.getType(); |
961 | kernels[0] = Ty->dims()[2]; |
962 | kernels[1] = Ty->dims()[3]; |
963 | } |
964 | |
965 | // Check the padding style. |
966 | if (dict.count("legacy_pad" )) { |
967 | int mode; |
968 | ASSIGN_VALUE_OR_RETURN_ERR(mode, loadInt(dict["legacy_pad" ])); |
969 | // Caffe1 (legacy) rounded-up and Caffe2 rounds down. |
970 | // This style is deprecated according to caffe2's caffe2_legacy.proto |
971 | // definition. |
972 | if (static_cast<LegacyPaddingMode>(mode) == |
973 | LegacyPaddingMode::CAFFE_LEGACY_POOLING) { |
974 | return MAKE_ERR(opErrMsg(op, |
975 | "MaxPool nodes with legacy caffe padding are " |
976 | "deprecated and not supported." )); |
977 | } |
978 | } |
979 | |
980 | Node *node = nullptr; |
981 | |
982 | if (typeName == "Int8MaxPool" || typeName == "Int8AveragePool" ) { |
983 | // Create the node with quantized type. |
984 | RETURN_ERR_IF_NOT( |
985 | dict.count("Y_zero_point" ), |
986 | opErrMsg(op, "missing zero point for quantized output type" )); |
987 | RETURN_ERR_IF_NOT( |
988 | dict.count("Y_scale" ), |
989 | opErrMsg(op, "missing Y_scale for quantized output type" )); |
990 | |
991 | TypeRef finalInType = finalIn.getType(); |
992 | ShapeNHWC idim = ShapeNHWC(finalInType->dims()); |
993 | auto outSz = |
994 | calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads); |
995 | std::array<dim_t, 4> outDims = { |
996 | {idim.n, outSz.first, outSz.second, idim.c}}; |
997 | if (typeName == "Int8MaxPool" ) { |
998 | // Int8Maxpool output quantization should be same as the input, so |
999 | // just ignore the given params. |
1000 | node = G_->createMaxPool(opName, finalIn, kernels, strides, pads); |
1001 | } else { |
1002 | TypeRef outTy; |
1003 | ASSIGN_VALUE_OR_RETURN_ERR( |
1004 | outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict)); |
1005 | node = G_->createAvgPool(opName, finalIn, outTy, kernels, strides, pads, |
1006 | NHWC, countIncludePads); |
1007 | } |
1008 | } else if (typeName == "MaxPool" ) { |
1009 | node = G_->createMaxPool(opName, finalIn, kernels, strides, pads); |
1010 | } else { |
1011 | node = G_->createAvgPool(opName, finalIn, kernels, strides, pads, NHWC, |
1012 | countIncludePads); |
1013 | } |
1014 | if (order == "NCHW" ) { |
1015 | unsigned resIdx = 0; |
1016 | if (llvm::isa<MaxPoolNode>(node)) { |
1017 | resIdx = MaxPoolNode::ResultIdx; |
1018 | } else if (llvm::isa<AvgPoolNode>(node)) { |
1019 | resIdx = AvgPoolNode::ResultIdx; |
1020 | } else { |
1021 | return MAKE_ERR("Expected either Max or Avg Pool." ); |
1022 | } |
1023 | // Transpose the output back. |
1024 | node = G_->createTranspose(opName, node->getNthResult(resIdx), NHWC2NCHW); |
1025 | } |
1026 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1027 | return Error::success(); |
1028 | } |
1029 | |
1030 | if (typeName == "SpatialBN" ) { |
1031 | NodeValue in; |
1032 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1033 | Constant *scale; |
1034 | ASSIGN_VALUE_OR_RETURN_ERR(scale, getConstantByName(op.input(1))); |
1035 | Constant *bias; |
1036 | ASSIGN_VALUE_OR_RETURN_ERR(bias, getConstantByName(op.input(2))); |
1037 | Constant *mean; |
1038 | ASSIGN_VALUE_OR_RETURN_ERR(mean, getConstantByName(op.input(3))); |
1039 | Constant *var; |
1040 | ASSIGN_VALUE_OR_RETURN_ERR(var, getConstantByName(op.input(4))); |
1041 | float epsilon = 1e-5f; // default |
1042 | auto epsilonIt = dict.find("epsilon" ); |
1043 | if (epsilonIt != dict.end()) { |
1044 | ASSIGN_VALUE_OR_RETURN_ERR(epsilon, loadFloat(epsilonIt->second)); |
1045 | } |
1046 | |
1047 | unsigned_t channel; |
1048 | ASSIGN_VALUE_OR_RETURN_ERR(channel, getChannel(dict)); |
1049 | auto *node = G_->createBatchNormalization( |
1050 | opName, in.getType(), in, bias, scale, mean, var, channel, epsilon); |
1051 | |
1052 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1053 | return Error::success(); |
1054 | } |
1055 | |
1056 | if (typeName == "Bucketize" ) { |
1057 | NodeValue in; |
1058 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1059 | RETURN_ERR_IF_NOT( |
1060 | dict.count("boundaries" ), |
1061 | opErrMsg(op, "Bucketize: Expected a boundaries member vector" )); |
1062 | std::vector<float> boundaries; |
1063 | ASSIGN_VALUE_OR_RETURN_ERR(boundaries, getFloats(dict["boundaries" ])); |
1064 | auto *node = G_->createBucketizeNode(opName, in, boundaries); |
1065 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1066 | return Error::success(); |
1067 | } |
1068 | |
1069 | if (typeName == "ResizeNearest" ) { |
1070 | NodeValue in; |
1071 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1072 | |
1073 | std::string order = "NCHW" ; |
1074 | if (dict.count("order" )) { |
1075 | ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order" ])); |
1076 | } |
1077 | // We expect the input to be NHWC. |
1078 | NodeValue finalIn; |
1079 | if (order == "NCHW" ) { |
1080 | finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult(); |
1081 | } else { |
1082 | finalIn = in; |
1083 | } |
1084 | |
1085 | float heightScale; |
1086 | ASSIGN_VALUE_OR_RETURN_ERR(heightScale, loadFloat(dict["height_scale" ])); |
1087 | float widthScale; |
1088 | ASSIGN_VALUE_OR_RETURN_ERR(widthScale, loadFloat(dict["width_scale" ])); |
1089 | |
1090 | std::vector<float> scales; |
1091 | scales.push_back(1.0f); |
1092 | scales.push_back(heightScale); |
1093 | scales.push_back(widthScale); |
1094 | scales.push_back(1.0f); |
1095 | |
1096 | auto *node = G_->createResizeNearest(opName, finalIn, scales); |
1097 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1098 | return Error::success(); |
1099 | } |
1100 | |
1101 | if (typeName == "Concat" ) { |
1102 | const unsigned numInputs = op.input_size(); |
1103 | llvm::SmallVector<NodeValue, 4> inputs; |
1104 | inputs.reserve(numInputs); |
1105 | for (unsigned i = 0; i < numInputs; i++) { |
1106 | NodeValue in; |
1107 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i))); |
1108 | inputs.push_back(std::move(in)); |
1109 | } |
1110 | |
1111 | // If axis exists it takes priority over channel. |
1112 | unsigned_t channel; |
1113 | if (dict.count("axis" )) { |
1114 | ASSIGN_VALUE_OR_RETURN_ERR(channel, loadInt(dict["axis" ])); |
1115 | } else { |
1116 | ASSIGN_VALUE_OR_RETURN_ERR(channel, getChannel(dict)); |
1117 | } |
1118 | |
1119 | unsigned_t addAxis = 0; |
1120 | if (dict.count("add_axis" )) { |
1121 | ASSIGN_VALUE_OR_RETURN_ERR(addAxis, loadInt(dict["add_axis" ])); |
1122 | } |
1123 | |
1124 | Node *node{nullptr}; |
1125 | |
1126 | if (addAxis) { |
1127 | // When add axis is used, this means we have to add a new dimension |
1128 | // before the axis, instead of merging on the axis. |
1129 | std::vector<dim_t> outputDims = inputs[0].dims(); |
1130 | |
1131 | if (channel < outputDims.size()) { |
1132 | unsigned i = 0; |
1133 | for (const auto &input : inputs) { |
1134 | RETURN_ERR_IF_NOT( |
1135 | outputDims[channel] == input.dims()[channel], |
1136 | opErrMsg(op, |
1137 | strFormat("inputs need all to have the same dims for " |
1138 | "concat with add_axis: input 0 (%s) vs " |
1139 | "input %u (%s), %u vs %u, channel = %u" , |
1140 | op.input(0).c_str(), i, op.input(i).c_str(), |
1141 | static_cast<unsigned>(outputDims[channel]), |
1142 | static_cast<unsigned>(input.dims()[channel]), |
1143 | channel))); |
1144 | ++i; |
1145 | } |
1146 | outputDims.insert(outputDims.begin() + channel, numInputs); |
1147 | node = G_->createConcat(opName, inputs, channel); |
1148 | node = G_->createReshape(opName, node, outputDims); |
1149 | } else if (channel == outputDims.size()) { |
1150 | // We convert inputs into 2D arrays with single columns, thus the |
1151 | // number of rows will be equal to the product of all original dims. |
1152 | // Every converted input will look like a vertical line of numbers. |
1153 | const auto flatVerticalShape = flattenCdr(inputs[0].dims(), channel); |
1154 | llvm::SmallVector<NodeValue, 4> verticalInputs; |
1155 | for (auto &input : inputs) { |
1156 | verticalInputs.push_back(G_->createReshape( |
1157 | opName, input, |
1158 | {flatVerticalShape.first, flatVerticalShape.second})); |
1159 | } |
1160 | |
1161 | // We glue together the vertical lines, so, the number of columns |
1162 | // becomes equal to the number of original inputs. |
1163 | node = G_->createConcat(opName, verticalInputs, 1); |
1164 | |
1165 | // Reshape to convert to desired shape. |
1166 | outputDims.push_back(numInputs); |
1167 | node = G_->createReshape(opName, node, outputDims); |
1168 | } else { |
1169 | return MAKE_ERR(opErrMsg( |
1170 | op, strFormat("Invalid input: channel (=%u) > number of dims (=%u)" , |
1171 | channel, static_cast<unsigned>(outputDims.size())))); |
1172 | } |
1173 | } else { |
1174 | // In normal case (i.e. when we are not adding a new dimension) |
1175 | // plain createConcat() would suffice. |
1176 | node = G_->createConcat(opName, inputs, channel); |
1177 | } |
1178 | |
1179 | // If we add the axis then node is a Reshape, otherwise it should be |
1180 | // Concat. |
1181 | RETURN_ERR_IF_NOT( |
1182 | llvm::isa<ConcatNode>(node) || llvm::isa<ReshapeNode>(node), |
1183 | opErrMsg(op, |
1184 | "Internal error: Node should either be a Concat or Reshape." )); |
1185 | NodeValue finalNode = llvm::isa<ConcatNode>(node) |
1186 | ? NodeValue(node, ConcatNode::ResultIdx) |
1187 | : NodeValue(node, ReshapeNode::ResultIdx); |
1188 | nodeValueByName_[op.output(0)] = finalNode; |
1189 | // Concat may have a second output in Caffe2 (split_info), but we don't |
1190 | // use it for inference |
1191 | return Error::success(); |
1192 | } |
1193 | |
1194 | if (typeName == "FC" || typeName == "FCTransposed" || typeName == "Int8FC" || |
1195 | typeName == "FbFCPacked" ) { |
1196 | RETURN_ERR_IF_NOT(op.input_size() == 3, |
1197 | "Glow only suports FC with 3 inputs" ); |
1198 | // Load the inputs: |
1199 | NodeValue in; |
1200 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1201 | |
1202 | auto originalInputDims = in.getType()->dims(); |
1203 | |
1204 | size_t axis = 1; |
1205 | if (dict.count("axis" )) { |
1206 | ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis" ])); |
1207 | } |
1208 | |
1209 | // Load weights. |
1210 | unsigned_t axis_w = 1; |
1211 | if (dict.count("axis_w" )) { |
1212 | ASSIGN_VALUE_OR_RETURN_ERR(axis_w, loadInt(dict["axis_w" ])); |
1213 | } |
1214 | |
1215 | NodeValue W; |
1216 | if (hasConstantByName(op.input(1))) { |
1217 | ASSIGN_VALUE_OR_RETURN_ERR(W, getConstantByName(op.input(1))); |
1218 | } else { |
1219 | ASSIGN_VALUE_OR_RETURN_ERR(W, getNodeValueByName(op.input(1))); |
1220 | } |
1221 | |
1222 | // Caffe2 stores the transposed W matrix. In here we first coerce W to a |
1223 | // 2D matrix size if necessary and then transpose it back. |
1224 | auto wDims = flattenCdr(W.dims(), axis_w); |
1225 | if (W.dims().size() > 2) { |
1226 | W = G_->createReshape(W.getNode()->getName(), W, |
1227 | {wDims.first, wDims.second}); |
1228 | } |
1229 | |
1230 | if (typeName == "FC" || typeName == "Int8FC" || typeName == "FbFCPacked" ) { |
1231 | W = G_->createTranspose(W.getNode()->getName(), W, {1, 0}); |
1232 | } |
1233 | |
1234 | NodeValue B; |
1235 | if (hasConstantByName(op.input(2))) { |
1236 | ASSIGN_VALUE_OR_RETURN_ERR(B, getConstantByName(op.input(2))); |
1237 | } else { |
1238 | ASSIGN_VALUE_OR_RETURN_ERR(B, getNodeValueByName(op.input(2))); |
1239 | } |
1240 | |
1241 | Node *node = nullptr; |
1242 | if (typeName == "Int8FC" ) { |
1243 | // Create a node with quantized type. |
1244 | auto outputDims = flattenCdr(in.dims(), axis); |
1245 | TypeRef outTy; |
1246 | ASSIGN_VALUE_OR_RETURN_ERR( |
1247 | outTy, loadQuantTy(opName, ElemKind::Int8QTy, |
1248 | {outputDims.first, B.dims()[0]}, dict)); |
1249 | int dequantizeOutput = 0; |
1250 | if (dict.count("dequantize_output" )) { |
1251 | ASSIGN_VALUE_OR_RETURN_ERR(dequantizeOutput, |
1252 | loadInt(dict["dequantize_output" ])); |
1253 | } |
1254 | if (dequantizeOutput == 1) { |
1255 | node = G_->createDynamicQuantizedFullyConnected(opName, in, W, B); |
1256 | } else { |
1257 | node = G_->createFullyConnected(opName, in, W, B, outTy, axis); |
1258 | } |
1259 | } else if (typeName == "FbFCPacked" ) { |
1260 | RETURN_ERR_IF_NOT(W.getElementType() == ElemKind::Float16Ty, |
1261 | opErrMsg(op, "Expected float16 weights." )); |
1262 | auto fp16InputType = |
1263 | mod_.uniqueType(ElemKind::Float16Ty, in.getType()->dims()); |
1264 | in = G_->createConvertTo(opName + ".ConvertInput" , in, fp16InputType); |
1265 | |
1266 | auto fp16BiasType = mod_.uniqueType(ElemKind::Float16Ty, B.dims()); |
1267 | auto *fp16Bias = |
1268 | G_->createConvertTo(opName + ".ConvertBias" , B, fp16BiasType); |
1269 | |
1270 | auto outputDims = flattenCdr(in.dims(), axis); |
1271 | TypeRef OT = |
1272 | mod_.uniqueType(ElemKind::Float16Ty, {outputDims.first, B.dims()[0]}); |
1273 | auto fc = G_->createFullyConnected(opName, in, W, fp16Bias, OT, axis); |
1274 | auto outputType = |
1275 | mod_.uniqueType(ElemKind::FloatTy, fc->getResult().dims()); |
1276 | node = G_->createConvertTo(opName + ".ConvertOutput" , fc, outputType); |
1277 | } else { |
1278 | auto outputDims = flattenCdr(in.dims(), axis); |
1279 | TypeRef outputType = |
1280 | mod_.uniqueType(ElemKind::FloatTy, {outputDims.first, B.dims()[0]}); |
1281 | node = G_->createFullyConnected(opName, in, W, B, outputType, axis); |
1282 | } |
1283 | |
1284 | // If number of original input dims is greater than 2, expand the output |
1285 | // dims back with the same axis. |
1286 | if (axis != 1) { |
1287 | llvm::SmallVector<dim_t, max_tensor_dimensions> reshapeDims; |
1288 | size_t totalReshapeSize = 1; |
1289 | for (size_t i = 0; i < axis; ++i) { |
1290 | auto d = originalInputDims[i]; |
1291 | reshapeDims.push_back(d); |
1292 | totalReshapeSize *= static_cast<dim_t>(d); |
1293 | } |
1294 | |
1295 | size_t finalDim = typeName == "FCTransposed" ? wDims.second : wDims.first; |
1296 | |
1297 | reshapeDims.push_back(finalDim); |
1298 | totalReshapeSize *= finalDim; |
1299 | |
1300 | size_t totalOriginalOutputSize = node->getNthResult(0).getType()->size(); |
1301 | RETURN_ERR_IF_NOT( |
1302 | totalReshapeSize == totalOriginalOutputSize, |
1303 | opErrMsg(op, strFormat("Cannot reshape from size %lu to size %lu" , |
1304 | totalOriginalOutputSize, totalReshapeSize))); |
1305 | |
1306 | node = G_->createReshape(opName + ".fc.out" , node, reshapeDims); |
1307 | } |
1308 | |
1309 | // Save the outputs: |
1310 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1311 | return Error::success(); |
1312 | } |
1313 | |
1314 | if (typeName == "ChannelShuffle" ) { |
1315 | NodeValue in; |
1316 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1317 | |
1318 | size_t group; |
1319 | ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group" ])); |
1320 | size_t kernel; |
1321 | ASSIGN_VALUE_OR_RETURN_ERR(kernel, loadInt(dict["kernel" ])); |
1322 | |
1323 | Node *node = G_->createChannelShuffle(opName, in, group, kernel); |
1324 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1325 | return Error::success(); |
1326 | } |
1327 | |
1328 | if (typeName == "Squeeze" ) { |
1329 | NodeValue in; |
1330 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1331 | std::vector<dim_t> dims; |
1332 | ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["dims" ])); |
1333 | Node *node = G_->createSqueeze(opName, in, dims); |
1334 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1335 | return Error::success(); |
1336 | } |
1337 | |
1338 | if (typeName == "Log" ) { |
1339 | // Load the inputs: |
1340 | NodeValue in; |
1341 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1342 | // Create the log: |
1343 | auto *R = G_->createLog(opName, in); |
1344 | RETURN_IF_ERR(addNodeAsOutput(op, R)); |
1345 | return Error::success(); |
1346 | } |
1347 | |
1348 | if (typeName == "Swish" ) { |
1349 | NodeValue in; |
1350 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1351 | auto *S = G_->createSwish(opName, in); |
1352 | RETURN_IF_ERR(addNodeAsOutput(op, S)); |
1353 | return Error::success(); |
1354 | } |
1355 | |
1356 | if (typeName == "Logit" ) { |
1357 | // Load the input and (optional) epsilon clamping value: |
1358 | NodeValue input; |
1359 | ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0))); |
1360 | auto epsIt = dict.find("eps" ); |
1361 | // default: 1e-6 (as in Caffe2) |
1362 | float eps = 1E-6f; |
1363 | if (epsIt != dict.end()) { |
1364 | ASSIGN_VALUE_OR_RETURN_ERR(eps, loadFloat(epsIt->second)); |
1365 | } |
1366 | |
1367 | auto *node = G_->createLogit(opName, input, eps); |
1368 | // Save the outputs: |
1369 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1370 | return Error::success(); |
1371 | } |
1372 | |
1373 | if (typeName == "EQ" ) { |
1374 | NodeValue in0; |
1375 | ASSIGN_VALUE_OR_RETURN_ERR(in0, getNodeValueByName(op.input(0))); |
1376 | NodeValue in1; |
1377 | ASSIGN_VALUE_OR_RETURN_ERR(in1, getNodeValueByName(op.input(1))); |
1378 | auto *node = G_->createCmpEQ(opName, in0, in1); |
1379 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1380 | return Error::success(); |
1381 | } |
1382 | |
1383 | if (typeName == "Tile" ) { |
1384 | NodeValue in; |
1385 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1386 | unsigned_t tiles; |
1387 | ASSIGN_VALUE_OR_RETURN_ERR(tiles, loadInt(dict["tiles" ])); |
1388 | unsigned_t axis; |
1389 | ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis" ])); |
1390 | |
1391 | auto *node = G_->createTile(opName, in, tiles, axis); |
1392 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1393 | return Error::success(); |
1394 | } |
1395 | |
1396 | if (typeName == "Free" ) { |
1397 | // Glow frees memory automatically. |
1398 | return Error::success(); |
1399 | } |
1400 | if (typeName == "StopGradient" || typeName == "ScaleGradient" ) { |
1401 | NodeValue in; |
1402 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1403 | // Currently Caffe2 importer only supports inference. |
1404 | RETURN_IF_ERR(addNodeAsOutput(op, in)); |
1405 | return Error::success(); |
1406 | } |
1407 | |
1408 | if (typeName == "Transpose" ) { |
1409 | RETURN_IF_ERR(loadTranspose(op, dict, "axes" )); |
1410 | return Error::success(); |
1411 | } |
1412 | |
1413 | if (typeName == "NCHW2NHWC" ) { |
1414 | NodeValue in; |
1415 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1416 | auto *node = G_->createTranspose(opName, in, NCHW2NHWC); |
1417 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1418 | return Error::success(); |
1419 | } |
1420 | |
1421 | if (typeName == "CopyCPUToMKL" || typeName == "CopyMKLToCPU" || |
1422 | typeName == "Copy" || typeName == "EnsureCPUOutput" || |
1423 | typeName == "EnsureDense" || typeName == "Dropout" ) { |
1424 | // Glow does not support any of these ops now, so implement them as |
1425 | // no-ops. Note: Implement this as a no-op reshape because these ops may |
1426 | // have partition information, and we need a node to maintain the parent |
1427 | // Function partition it specified. This reshape will get eliminated later |
1428 | // on during graph optimizations. |
1429 | NodeValue in; |
1430 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1431 | ReshapeNode *RN = G_->createReshape(in.getNode()->getName(), in, in.dims()); |
1432 | RETURN_IF_ERR(addNodeAsOutput(op, RN)); |
1433 | return Error::success(); |
1434 | } |
1435 | |
1436 | if (typeName == "Slice" ) { |
1437 | NodeValue data; |
1438 | ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0))); |
1439 | |
1440 | std::vector<ssize_t> starts; |
1441 | ASSIGN_VALUE_OR_RETURN_ERR(starts, getShape<ssize_t>(dict["starts" ])); |
1442 | std::vector<ssize_t> ends; |
1443 | ASSIGN_VALUE_OR_RETURN_ERR(ends, getShape<ssize_t>(dict["ends" ])); |
1444 | |
1445 | std::vector<dim_t> newStarts, newEnds; |
1446 | RETURN_ERR_IF_NOT( |
1447 | starts.size() == ends.size(), |
1448 | opErrMsg(op, strFormat( |
1449 | "Slice starts %lu and %lu ends must be the same size." , |
1450 | starts.size(), ends.size()))); |
1451 | for (size_t i = 0; i < starts.size(); i++) { |
1452 | ssize_t newStart = starts[i]; |
1453 | if (newStart == -1) { |
1454 | newStart = data.dims()[i]; |
1455 | } |
1456 | RETURN_ERR_IF_NOT( |
1457 | newStart >= 0, |
1458 | opErrMsg(op, |
1459 | strFormat("Indices should never be negative, but found %lu " , |
1460 | newStart))); |
1461 | newStarts.push_back(newStart); |
1462 | |
1463 | ssize_t newEnd = ends[i]; |
1464 | if (newEnd == -1) { |
1465 | newEnd = data.dims()[i]; |
1466 | } |
1467 | RETURN_ERR_IF_NOT( |
1468 | newEnd >= 0, |
1469 | opErrMsg(op, |
1470 | strFormat("Indices should never be negative, but found %lu " , |
1471 | newEnd))); |
1472 | newEnds.push_back(newEnd); |
1473 | } |
1474 | |
1475 | Node *SN = G_->createSlice(opName, data, newStarts, newEnds); |
1476 | RETURN_IF_ERR(addNodeAsOutput(op, SN)); |
1477 | return Error::success(); |
1478 | } |
1479 | |
1480 | if (typeName == "Clip" ) { |
1481 | NodeValue in; |
1482 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1483 | float cmin = std::numeric_limits<float>::lowest(); |
1484 | if (dict.count("min" )) { |
1485 | ASSIGN_VALUE_OR_RETURN_ERR(cmin, loadFloat(dict.find("min" )->second)); |
1486 | } |
1487 | |
1488 | float cmax = std::numeric_limits<float>::max(); |
1489 | if (dict.count("max" )) { |
1490 | ASSIGN_VALUE_OR_RETURN_ERR(cmax, loadFloat(dict.find("max" )->second)); |
1491 | } |
1492 | |
1493 | auto *node = G_->createClip(loadOperatorName(op), in, cmin, cmax); |
1494 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1495 | return Error::success(); |
1496 | } |
1497 | |
1498 | if (typeName == "MatMul" ) { |
1499 | RETURN_IF_ERR(loadMatMul(op, dict)); |
1500 | return Error::success(); |
1501 | } |
1502 | |
1503 | if (typeName == "Cast" ) { |
1504 | NodeValue in; |
1505 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1506 | int to; |
1507 | ASSIGN_VALUE_OR_RETURN_ERR(to, loadInt(dict["to" ])); |
1508 | |
1509 | switch (to) { |
1510 | case caffe2::TensorProto_DataType_FLOAT: { |
1511 | RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::FloatTy, |
1512 | opErrMsg(op, "Can only cast float to float." )); |
1513 | break; |
1514 | } |
1515 | case caffe2::TensorProto_DataType_INT32: { |
1516 | RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::Int32ITy, |
1517 | opErrMsg(op, "Can only cast int32 to int32." )); |
1518 | break; |
1519 | } |
1520 | case caffe2::TensorProto_DataType_INT64: { |
1521 | RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::Int64ITy, |
1522 | opErrMsg(op, "Can only cast int64 to int64." )); |
1523 | break; |
1524 | } |
1525 | default: |
1526 | return MAKE_ERR(opErrMsg(op, "Unsupported Cast type." )); |
1527 | } |
1528 | |
1529 | RETURN_IF_ERR(addNodeAsOutput(op, in)); |
1530 | return Error::success(); |
1531 | } |
1532 | |
1533 | if (typeName == "HalfToFloat" ) { |
1534 | NodeValue in; |
1535 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1536 | auto convertedType = |
1537 | mod_.uniqueType(ElemKind::FloatTy, in.getType()->dims()); |
1538 | auto *R = G_->createConvertTo(opName + ".ConvertInput" , in, convertedType); |
1539 | RETURN_IF_ERR(addNodeAsOutput(op, R)); |
1540 | return Error::success(); |
1541 | } |
1542 | |
1543 | if (typeName == "ScatterAssign" ) { |
1544 | NodeValue data; |
1545 | ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0))); |
1546 | NodeValue indices; |
1547 | ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(1))); |
1548 | NodeValue slices; |
1549 | ASSIGN_VALUE_OR_RETURN_ERR(slices, getNodeValueByName(op.input(2))); |
1550 | |
1551 | assert(indices.dims().size() == 1 && "Indices should be 1-dimensional!" ); |
1552 | NodeValue indices2D = G_->createReshape(opName + ".indices.2d" , indices, |
1553 | {indices.dims()[0], 1}); |
1554 | Node *SAN = G_->createScatterData(opName, data, indices2D, slices); |
1555 | RETURN_IF_ERR(addNodeAsOutput(op, SAN)); |
1556 | return Error::success(); |
1557 | } |
1558 | |
1559 | if (typeName == "ConstantFill" || typeName == "GivenTensorIntFill" || |
1560 | typeName == "GivenTensorInt64Fill" || typeName == "GaussianFill" || |
1561 | typeName == "UniformFill" ) { |
1562 | RETURN_IF_ERR(loadWeight(op)); |
1563 | return Error::success(); |
1564 | } |
1565 | |
1566 | if (typeName == "SigmoidCrossEntropyWithLogits" ) { |
1567 | NodeValue logits; |
1568 | ASSIGN_VALUE_OR_RETURN_ERR(logits, getNodeValueByName(op.input(0))); |
1569 | NodeValue targets; |
1570 | ASSIGN_VALUE_OR_RETURN_ERR(targets, getNodeValueByName(op.input(1))); |
1571 | Node *SCEL = |
1572 | G_->createSigmoidCrossEntropyWithLogits(opName, logits, targets); |
1573 | RETURN_IF_ERR(addNodeAsOutput(op, SCEL)); |
1574 | return Error::success(); |
1575 | } |
1576 | |
1577 | if (typeName == "ElementwiseLinear" ) { |
1578 | NodeValue X, w, b; |
1579 | |
1580 | // If the axis argument does not exist in the protobuf, the default |
1581 | // value should be 1. |
1582 | unsigned axis = 1; |
1583 | |
1584 | ASSIGN_VALUE_OR_RETURN_ERR(X, getNodeValueByName(op.input(0))); |
1585 | ASSIGN_VALUE_OR_RETURN_ERR(w, getNodeValueByName(op.input(1))); |
1586 | ASSIGN_VALUE_OR_RETURN_ERR(b, getNodeValueByName(op.input(2))); |
1587 | |
1588 | if (dict.count("axis" )) { |
1589 | ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis" ])); |
1590 | } |
1591 | |
1592 | Node *EL = G_->createElementwiseLinear(opName, X, w, b, axis); |
1593 | RETURN_IF_ERR(addNodeAsOutput(op, EL)); |
1594 | return Error::success(); |
1595 | } |
1596 | |
1597 | if (typeName == "AveragedLoss" ) { |
1598 | NodeValue in; |
1599 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1600 | auto *node = G_->createBatchedReduceMean(opName, in, 0); |
1601 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1602 | return Error::success(); |
1603 | } |
1604 | |
1605 | if (typeName == "Mod" ) { |
1606 | NodeValue in; |
1607 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1608 | int64_t divisor; |
1609 | ASSIGN_VALUE_OR_RETURN_ERR(divisor, loadInt(dict["divisor" ])); |
1610 | |
1611 | RETURN_ERR_IF_NOT( |
1612 | divisor >= 1, |
1613 | opErrMsg(op, |
1614 | strFormat("Divisor must not be less than 1, but found %ld " , |
1615 | divisor))); |
1616 | |
1617 | bool signFollowDivisor = false; |
1618 | if (dict.count("sign_follow_divisor" )) { |
1619 | ASSIGN_VALUE_OR_RETURN_ERR(signFollowDivisor, |
1620 | loadInt(dict["sign_follow_divisor" ])); |
1621 | } |
1622 | |
1623 | auto *node = G_->createModulo(opName, in, divisor, signFollowDivisor); |
1624 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1625 | |
1626 | return Error::success(); |
1627 | } |
1628 | |
1629 | if (typeName == "Scale" ) { |
1630 | NodeValue in; |
1631 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1632 | float scale = 1.0; |
1633 | if (dict.count("scale" )) { |
1634 | ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["scale" ])); |
1635 | } |
1636 | auto scaleType = mod_.uniqueType(ElemKind::FloatTy, {in.dims()}); |
1637 | auto scales = G_->createSplat(opName + ".scales" , scaleType, scale); |
1638 | Node *node = G_->createMul(opName, in, scales); |
1639 | |
1640 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1641 | return Error::success(); |
1642 | } |
1643 | |
1644 | if (typeName == "SparseLengthsWeightedSum8BitsRowwise" || |
1645 | typeName == "SparseLengthsSum8BitsRowwise" || |
1646 | typeName == "SparseLengthsWeightedSumFused8BitRowwise" || |
1647 | typeName == "SparseLengthsSumFused8BitRowwise" || |
1648 | typeName == "SparseLengthsWeightedSumFused4BitRowwise" || |
1649 | typeName == "SparseLengthsSumFused4BitRowwise" ) { |
1650 | const bool isWeighted = |
1651 | typeName == "SparseLengthsWeightedSum8BitsRowwise" || |
1652 | typeName == "SparseLengthsWeightedSumFused8BitRowwise" || |
1653 | typeName == "SparseLengthsWeightedSumFused4BitRowwise" ; |
1654 | const bool isFused = |
1655 | typeName == "SparseLengthsWeightedSumFused8BitRowwise" || |
1656 | typeName == "SparseLengthsSumFused8BitRowwise" || |
1657 | typeName == "SparseLengthsWeightedSumFused4BitRowwise" || |
1658 | typeName == "SparseLengthsSumFused4BitRowwise" ; |
1659 | const bool is4Bit = |
1660 | typeName == "SparseLengthsWeightedSumFused4BitRowwise" || |
1661 | typeName == "SparseLengthsSumFused4BitRowwise" ; |
1662 | // If weighted, then the weights are the second input and so we need to |
1663 | // shift indices/lengths/scalesBiases. |
1664 | size_t indicesIdx = 1; |
1665 | size_t lengthsIdx = 2; |
1666 | size_t scalesBiasesIdx = 3; |
1667 | if (isWeighted) { |
1668 | indicesIdx++; |
1669 | lengthsIdx++; |
1670 | scalesBiasesIdx++; |
1671 | } |
1672 | |
1673 | NodeValue data; |
1674 | ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0))); |
1675 | NodeValue weights; |
1676 | if (isWeighted) { |
1677 | ASSIGN_VALUE_OR_RETURN_ERR(weights, getNodeValueByName(op.input(1))); |
1678 | } |
1679 | NodeValue indices; |
1680 | ASSIGN_VALUE_OR_RETURN_ERR(indices, |
1681 | getNodeValueByName(op.input(indicesIdx))); |
1682 | NodeValue lengths; |
1683 | ASSIGN_VALUE_OR_RETURN_ERR(lengths, |
1684 | getNodeValueByName(op.input(lengthsIdx))); |
1685 | Storage *dataS = llvm::dyn_cast<Storage>(data); |
1686 | |
1687 | const dim_t numRows = data.dims()[0]; |
1688 | |
1689 | // Make sure all the shapes make sense. |
1690 | RETURN_ERR_IF_NOT(lengths.dims().size() == 1, |
1691 | opErrMsg(op, "lengths must be a vector." )); |
1692 | RETURN_ERR_IF_NOT(indices.dims().size() == 1, |
1693 | opErrMsg(op, "indices must be a vector." )); |
1694 | |
1695 | LengthsMode lengthsMode; |
1696 | ASSIGN_VALUE_OR_RETURN_ERR(lengthsMode, getLengthsMode(dict)); |
1697 | |
1698 | float avgLength; |
1699 | ASSIGN_VALUE_OR_RETURN_ERR(avgLength, getAvgLength(dict)); |
1700 | |
1701 | Node *node; |
1702 | if (isFused) { |
1703 | RETURN_IF_ERR(setFusedTy(dataS, is4Bit ? ElemKind::UInt4FusedFP16QTy |
1704 | : ElemKind::UInt8FusedQTy)); |
1705 | |
1706 | // No other work to do, since the data is already loaded fused, so just |
1707 | // create the new node with its inputs. |
1708 | if (isWeighted) { |
1709 | node = G_->createFusedRowwiseQuantizedSparseLengthsWeightedSum( |
1710 | opName, dataS, weights, indices, lengths, |
1711 | /* useFP16Accumulation */ false, lengthsMode, avgLength); |
1712 | } else { |
1713 | node = G_->createFusedRowwiseQuantizedSparseLengthsSum( |
1714 | opName, dataS, indices, lengths, /* useFP16Accumulation */ false, |
1715 | lengthsMode, avgLength); |
1716 | } |
1717 | |
1718 | if (is4Bit) { |
1719 | node = G_->createConvertTo(opName, node, ElemKind::FloatTy); |
1720 | } |
1721 | } else { |
1722 | NodeValue scalesBiases; |
1723 | ASSIGN_VALUE_OR_RETURN_ERR(scalesBiases, |
1724 | getNodeValueByName(op.input(scalesBiasesIdx))); |
1725 | |
1726 | Constant *scalesBiasesC = llvm::dyn_cast<Constant>(scalesBiases); |
1727 | RETURN_ERR_IF_NOT(scalesBiasesC, |
1728 | opErrMsg(op, "scales_biases must be Constant." )); |
1729 | RETURN_ERR_IF_NOT(scalesBiases.dims().size() == 2, |
1730 | opErrMsg(op, "scale_bias has to be a matrix." )); |
1731 | RETURN_ERR_IF_NOT( |
1732 | scalesBiases.dims()[0] == numRows, |
1733 | opErrMsg( |
1734 | op, |
1735 | strFormat("scale_bias must have the same number of rows as data, " |
1736 | "but found scale_bias %d and rows %d " , |
1737 | int(scalesBiases.dims()[0]), int(numRows)))); |
1738 | RETURN_ERR_IF_NOT( |
1739 | scalesBiases.dims()[1] == 2, |
1740 | opErrMsg(op, |
1741 | strFormat("Second dim of scale_bias has to be equal to 2 " |
1742 | "but found %d " , |
1743 | int(scalesBiases.dims()[1])))); |
1744 | |
1745 | // Now strip out the scales and biases into their own tensors. |
1746 | NodeValue sliceScales = |
1747 | G_->createSlice(scalesBiasesC->getName().str() + "_scale" , |
1748 | scalesBiasesC, {0, 0}, {numRows, 1}); |
1749 | NodeValue sliceBiases = |
1750 | G_->createSlice(scalesBiasesC->getName().str() + "_bias" , |
1751 | scalesBiasesC, {0, 1}, {numRows, 2}); |
1752 | sliceScales = |
1753 | G_->createReshape(sliceScales.getNode()->getName().str() + "_1D" , |
1754 | sliceScales, {numRows}); |
1755 | sliceBiases = |
1756 | G_->createReshape(sliceBiases.getNode()->getName().str() + "_1D" , |
1757 | sliceBiases, {numRows}); |
1758 | |
1759 | // Now create the actual node. |
1760 | if (isWeighted) { |
1761 | node = G_->createRowwiseQuantizedSparseLengthsWeightedSum( |
1762 | opName, dataS, sliceScales, sliceBiases, weights, indices, lengths, |
1763 | /* precision */ ElemKind::FloatTy, |
1764 | /* useFP16Accumulation */ false, lengthsMode, avgLength); |
1765 | } else { |
1766 | node = G_->createRowwiseQuantizedSparseLengthsSum( |
1767 | opName, dataS, sliceScales, sliceBiases, indices, lengths, |
1768 | /* precision */ ElemKind::FloatTy, |
1769 | /* useFP16Accumulation */ false, lengthsMode, avgLength); |
1770 | } |
1771 | } |
1772 | |
1773 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1774 | return Error::success(); |
1775 | } |
1776 | |
1777 | if (typeName == "LengthsRangeFill" ) { |
1778 | NodeValue lengths; |
1779 | ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(0))); |
1780 | RETURN_ERR_IF_NOT(lengths.dims().size() == 1, |
1781 | opErrMsg(op, "lengths must be a 1D vector." )); |
1782 | |
1783 | auto maxOutputSizeIt = dict.find("maxOutputSize" ); |
1784 | RETURN_ERR_IF_NOT( |
1785 | maxOutputSizeIt != dict.end(), |
1786 | opErrMsg(op, "Require maxOutputSize when loading LengthsRangeFill." )); |
1787 | unsigned_t maxOutputSize; |
1788 | ASSIGN_VALUE_OR_RETURN_ERR(maxOutputSize, loadInt(maxOutputSizeIt->second)); |
1789 | |
1790 | auto *LRF = G_->createLengthsRangeFill(opName, lengths, maxOutputSize); |
1791 | RETURN_IF_ERR(addNodeAsOutput(op, LRF)); |
1792 | |
1793 | return Error::success(); |
1794 | } |
1795 | |
1796 | // TODO: add checks for number of inputs and argument values |
1797 | if (typeName == "ReduceBackSum" ) { |
1798 | NodeValue in; |
1799 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1800 | RETURN_ERR_IF_NOT(in.dims().size() >= 2, |
1801 | opErrMsg(op, "Input should be at least 2D." )); |
1802 | Node *node = G_->createBatchedReduceAdd(opName, in, in.dims().size() - 1); |
1803 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1804 | return Error::success(); |
1805 | } |
1806 | |
1807 | if (typeName == "RMSNorm" ) { |
1808 | NodeValue X, gamma, beta; |
1809 | ASSIGN_VALUE_OR_RETURN_ERR(X, getNodeValueByName(op.input(0))); |
1810 | RETURN_ERR_IF_NOT(X.dims().size() == 2, |
1811 | opErrMsg(op, "X should be a 2D tensor." )); |
1812 | ASSIGN_VALUE_OR_RETURN_ERR(gamma, getNodeValueByName(op.input(1))); |
1813 | RETURN_ERR_IF_NOT(gamma.dims().size() == 1, |
1814 | opErrMsg(op, "gamma should be a 1D tensor." )); |
1815 | ASSIGN_VALUE_OR_RETURN_ERR(beta, getNodeValueByName(op.input(2))); |
1816 | RETURN_ERR_IF_NOT(beta.dims().size() == 1, |
1817 | opErrMsg(op, "beta should be a 1D tensor." )); |
1818 | |
1819 | float epsilon = .0f; |
1820 | if (dict.count("eps" )) { |
1821 | ASSIGN_VALUE_OR_RETURN_ERR(epsilon, loadFloat(dict["eps" ])); |
1822 | } |
1823 | |
1824 | auto nodes = G_->createRMSNorm(opName, X, gamma, beta, epsilon); |
1825 | nodeValueByName_[op.output(0)] = nodes[0]; |
1826 | nodeValueByName_[op.output(1)] = nodes[1]; |
1827 | return Error::success(); |
1828 | } |
1829 | |
1830 | if (typeName == "Mean" ) { |
1831 | const unsigned numInputs = op.input_size(); |
1832 | RETURN_ERR_IF_NOT(numInputs > 0, |
1833 | opErrMsg(op, "Expect at least one input." )); |
1834 | |
1835 | std::vector<NodeValue> inputs; |
1836 | inputs.reserve(numInputs); |
1837 | for (unsigned i = 0; i < numInputs; i++) { |
1838 | NodeValue in; |
1839 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i))); |
1840 | inputs.push_back(std::move(in)); |
1841 | } |
1842 | |
1843 | // Check that all inputs have the same shape |
1844 | const auto shape = inputs[0].dims(); |
1845 | for (unsigned i = 1; i < numInputs; i++) { |
1846 | RETURN_ERR_IF_NOT( |
1847 | shape == inputs[i].dims(), |
1848 | opErrMsg(op, |
1849 | "All inputs should have the same shape, violating input " + |
1850 | op.input(i))); |
1851 | } |
1852 | |
1853 | if (numInputs == 1) { |
1854 | RETURN_IF_ERR(addNodeAsOutput(op, inputs[0])); |
1855 | return Error::success(); |
1856 | } |
1857 | |
1858 | Node *node = G_->createConcat(opName + ".concat" , inputs, 0); |
1859 | |
1860 | std::vector<dim_t> newShape{numInputs}; |
1861 | newShape.insert(newShape.end(), shape.begin(), shape.end()); |
1862 | node = G_->createReshape(opName + ".reshape" , node, newShape); |
1863 | |
1864 | node = G_->createBatchedReduceMean(opName + ".reduceMean" , node, 0); |
1865 | |
1866 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1867 | return Error::success(); |
1868 | } |
1869 | |
1870 | if (typeName == "Negative" ) { |
1871 | RETURN_IF_ERR(loadNeg(op, dict)); |
1872 | return Error::success(); |
1873 | } |
1874 | |
1875 | if (typeName == "LpNorm" ) { |
1876 | NodeValue in; |
1877 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1878 | |
1879 | int p = 2; |
1880 | if (dict.count("p" )) { |
1881 | ASSIGN_VALUE_OR_RETURN_ERR(p, loadInt(dict["p" ])); |
1882 | RETURN_ERR_IF_NOT(p == 1 || p == 2, |
1883 | opErrMsg(op, "p should be either 1 or 2." )); |
1884 | } |
1885 | bool average = false; |
1886 | if (dict.count("average" )) { |
1887 | ASSIGN_VALUE_OR_RETURN_ERR(average, loadInt(dict["average" ])); |
1888 | } |
1889 | RETURN_ERR_IF_NOT(!average, opErrMsg(op, "average is not supported." )); |
1890 | |
1891 | Node *node = nullptr; |
1892 | if (p == 1) { |
1893 | node = G_->createAbs(opName, in); |
1894 | } else { |
1895 | node = G_->createPow(opName, in, 2); |
1896 | } |
1897 | |
1898 | const auto dims1D = flattenCdr(in.dims(), in.dims().size()); |
1899 | node = G_->createReshape(opName + ".reshape1D" , node, dims1D.first); |
1900 | |
1901 | auto outputType = mod_.uniqueType(in.getElementType(), {1}); |
1902 | node = G_->createBatchedReduceAdd(opName + ".sum" , outputType, node, 0); |
1903 | |
1904 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1905 | return Error::success(); |
1906 | } |
1907 | |
1908 | if (typeName == "ArgMin" ) { |
1909 | NodeValue input; |
1910 | ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0))); |
1911 | int axis = 0; |
1912 | if (dict.count("axis" )) { |
1913 | ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis" ])); |
1914 | } |
1915 | bool keepDims = true; |
1916 | if (dict.count("keepdims" )) { |
1917 | ASSIGN_VALUE_OR_RETURN_ERR(keepDims, loadInt(dict.at("keepdims" ))); |
1918 | } |
1919 | |
1920 | auto node = G_->createArgMin(opName, input, axis, keepDims); |
1921 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1922 | return Error::success(); |
1923 | } |
1924 | |
1925 | if (typeName == "Sign" ) { |
1926 | NodeValue in; |
1927 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1928 | |
1929 | Node *zeroes = G_->createSplat(opName + ".zeroes" , in.getType(), 0.f); |
1930 | |
1931 | Node *isPos = G_->createCmpLT(opName + ".isPos" , zeroes, in); |
1932 | Node *isNeg = G_->createCmpLT(opName + ".isNeg" , in, zeroes); |
1933 | |
1934 | Node *posOnes = G_->createSplat(opName + ".posOnes" , in.getType(), 1); |
1935 | Node *negOnes = G_->createSplat(opName + ".negOnes" , in.getType(), -1); |
1936 | |
1937 | Node *node = G_->createSelect(opName + ".fillPos" , isPos, posOnes, zeroes); |
1938 | node = G_->createSelect(opName + ".fillNeg" , isNeg, negOnes, node); |
1939 | |
1940 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1941 | return Error::success(); |
1942 | } |
1943 | |
1944 | if (typeName == "Softplus" ) { |
1945 | NodeValue in; |
1946 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1947 | |
1948 | Node *node = G_->createSoftPlus(opName, in); |
1949 | |
1950 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
1951 | return Error::success(); |
1952 | } |
1953 | |
1954 | if (typeName == "TopK" ) { |
1955 | NodeValue in; |
1956 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
1957 | RETURN_ERR_IF_NOT( |
1958 | op.input_size() <= 2, |
1959 | opErrMsg( |
1960 | op, |
1961 | strFormat( |
1962 | "TopK: Maximum number of inputs is 2, but found input size %d " , |
1963 | op.input_size()))); |
1964 | unsigned_t k = 0; |
1965 | if (op.input_size() > 1) { |
1966 | Constant *kConst = getConstantByNameOrNull(op.input(1)); |
1967 | RETURN_ERR_IF_NOT( |
1968 | kConst, |
1969 | opErrMsg(op, "TopK: Non-constant k is not supported by Glow." )); |
1970 | RETURN_ERR_IF_NOT( |
1971 | kConst->getElementType() == ElemKind::Int64ITy, |
1972 | opErrMsg(op, strFormat( |
1973 | "TopK: k input must be of type Int64, but found " |
1974 | "input type '%s' " , |
1975 | kConst->getType()->getElementName().str().c_str()))); |
1976 | auto constH = kConst->getPayload().getHandle<int64_t>(); |
1977 | k = constH.at({0}); |
1978 | } else { |
1979 | ASSIGN_VALUE_OR_RETURN_ERR(k, loadInt(dict["k" ])); |
1980 | } |
1981 | |
1982 | int lastDim = in.dims().size() - 1; |
1983 | int axis = lastDim; |
1984 | if (dict.count("axis" )) { |
1985 | ASSIGN_VALUE_OR_RETURN_ERR(axis, |
1986 | loadAxis<int>(dict["axis" ], in.dims().size())); |
1987 | } |
1988 | |
1989 | RETURN_ERR_IF_NOT( |
1990 | axis == lastDim, |
1991 | opErrMsg( |
1992 | op, |
1993 | strFormat( |
1994 | "TopK: Currently only support axis %d being last dimension %d " , |
1995 | axis, lastDim))); |
1996 | |
1997 | TopKNode *R = G_->createTopK(opName, in, k, ElemKind::Int32ITy); |
1998 | RETURN_IF_ERR(addNodeAsOutput(op, R)); |
1999 | return Error::success(); |
2000 | } |
2001 | |
2002 | if (typeName == "FillExamplesWithIndicator" ) { |
2003 | // Support FillExamplesWithIndicator |
2004 | NodeValue data; |
2005 | ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0))); |
2006 | NodeValue indicator; |
2007 | ASSIGN_VALUE_OR_RETURN_ERR(indicator, getNodeValueByName(op.input(1))); |
2008 | // Validating input types and shapes |
2009 | RETURN_ERR_IF_NOT( |
2010 | indicator.getElementType() == ElemKind::Int32ITy || |
2011 | indicator.getElementType() == ElemKind::Int64ITy, |
2012 | opErrMsg(op, "Indicator should be of int32 or int64 type." )); |
2013 | RETURN_ERR_IF_NOT(indicator.dims().size() == 1, |
2014 | opErrMsg(op, "Indicator should be 1D tensor." )); |
2015 | dim_t dataReshapeDim = flattenCdr(data.dims()).second; |
2016 | ShapeVector outDims{indicator.dims()[0]}; |
2017 | outDims.insert(outDims.end(), data.dims().begin() + 1, data.dims().end()); |
2018 | auto outTy2D = mod_.uniqueTypeWithNewShape( |
2019 | data.getType(), {indicator.dims()[0], dataReshapeDim}); |
2020 | |
2021 | auto data2D = G_->createReshape(opName + ".data2D" , data, |
2022 | {data.dims()[0], dataReshapeDim}); |
2023 | if (indicator.getElementType() == ElemKind::Int64ITy) { |
2024 | indicator = G_->createConvertTo(opName + ".int64ToInt32" , indicator, |
2025 | ElemKind::Int32ITy); |
2026 | } |
2027 | // Select only takes boolean indicators, and converting from int to bool |
2028 | // must go from int -> float -> bool. Due to fp16 clipping, since only |
2029 | // int32 -> fp16 conversions are available, there is an initial conversion |
2030 | // from int64 to int32 if necessary. |
2031 | auto indicatorFloat = G_->createConvertTo(opName + ".intToFloat" , indicator, |
2032 | ElemKind::FloatTy); |
2033 | auto indicatorBool = G_->createConvertTo(opName + ".floatToBool" , |
2034 | indicatorFloat, ElemKind::BoolTy); |
2035 | auto nzIndices = G_->createNonZero(opName + ".nonzero" , indicatorBool); |
2036 | |
2037 | auto nzIndicesFixed = fixNonZero(G_, mod_, opName, nzIndices); |
2038 | auto nonZeroCount = data.dims()[0]; |
2039 | RETURN_ERR_IF_NOT(nonZeroCount <= nzIndicesFixed->getNthResult(0).dims()[0], |
2040 | opErrMsg(op, |
2041 | "The number of " |
2042 | "non-zero elements in the indicator must be at " |
2043 | "least that of the first dimension of data" )); |
2044 | |
2045 | auto indices = G_->createSlice(opName + ".indices" , nzIndicesFixed, {0, 0}, |
2046 | {data.dims()[0], 1}); |
2047 | |
2048 | auto zeros = G_->createSplat(opName + ".zeros" , outTy2D, 0); |
2049 | |
2050 | auto res2D = G_->createScatterData(opName + ".scatterData" , zeros, indices, |
2051 | data2D, true); |
2052 | auto node = G_->createReshape(opName + ".result" , res2D, outDims); |
2053 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
2054 | return Error::success(); |
2055 | } |
2056 | |
2057 | if (typeName == "BatchSparseToDense" ) { |
2058 | // Support BatchSparseToDense for output second dim = 1 only |
2059 | NodeValue lengths; |
2060 | ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(0))); |
2061 | NodeValue indices; |
2062 | ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(1))); |
2063 | NodeValue values; |
2064 | ASSIGN_VALUE_OR_RETURN_ERR(values, getNodeValueByName(op.input(2))); |
2065 | |
2066 | dim_t denseLastDim = 1; |
2067 | if (dict.count("dense_last_dim" )) { |
2068 | ASSIGN_VALUE_OR_RETURN_ERR(denseLastDim, |
2069 | loadInt(dict.at("dense_last_dim" ))); |
2070 | } |
2071 | |
2072 | RETURN_ERR_IF_NOT( |
2073 | denseLastDim == 1, |
2074 | opErrMsg(op, "Only output second dimension = 1 supported" )); |
2075 | // Validating input types and shapes |
2076 | RETURN_ERR_IF_NOT( |
2077 | lengths.getElementType() == ElemKind::Int32ITy || |
2078 | lengths.getElementType() == ElemKind::Int64ITy, |
2079 | opErrMsg(op, "Lengths should be of int32 or int64 type." )); |
2080 | RETURN_ERR_IF_NOT(lengths.dims().size() == 1, |
2081 | opErrMsg(op, "Lengths should be 1D tensor." )); |
2082 | RETURN_ERR_IF_NOT( |
2083 | indices.getElementType() == ElemKind::Int32ITy || |
2084 | indices.getElementType() == ElemKind::Int64ITy, |
2085 | opErrMsg(op, "Indices should be of int32 or int64 type." )); |
2086 | RETURN_ERR_IF_NOT(indices.dims().size() == 1, |
2087 | opErrMsg(op, "Indices should be 1D tensor." )); |
2088 | RETURN_ERR_IF_NOT(values.getElementType() == ElemKind::FloatTy, |
2089 | opErrMsg(op, "Values should be of float type." )); |
2090 | RETURN_ERR_IF_NOT( |
2091 | indices.dims()[0] == values.dims()[0], |
2092 | opErrMsg(op, "There should be the same number of values as indices." )); |
2093 | |
2094 | float defaultValue = 0.0; |
2095 | if (dict.count("default_value" )) { |
2096 | ASSIGN_VALUE_OR_RETURN_ERR(defaultValue, |
2097 | loadFloat(dict.at("default_value" ))); |
2098 | } |
2099 | // Select only takes boolean indicators, and converting from int to bool |
2100 | // must go from int -> float -> bool. Due to fp16 clipping, since only |
2101 | // int32 -> fp16 conversions are available, there is an initial conversion |
2102 | // from int64 to int32 if necessary. |
2103 | if (lengths.getElementType() == ElemKind::Int64ITy) { |
2104 | lengths = G_->createConvertTo(opName + ".int64ToInt32" , lengths, |
2105 | ElemKind::Int32ITy); |
2106 | } |
2107 | auto lengthsIntToFloat = |
2108 | G_->createConvertTo(opName + ".intToFloat" , lengths, ElemKind::FloatTy); |
2109 | auto lengthsFloatToBool = G_->createConvertTo( |
2110 | opName + ".floatToBool" , lengthsIntToFloat, ElemKind::BoolTy); |
2111 | auto nonZeroIndices = |
2112 | G_->createNonZero(opName + ".nonzero" , lengthsFloatToBool); |
2113 | auto nonZeroIndicesFixed = fixNonZero(G_, mod_, opName, nonZeroIndices); |
2114 | auto numIndices = indices.dims()[0]; |
2115 | auto indicesSliced = G_->createSlice( |
2116 | opName + ".indicesSlice" , nonZeroIndicesFixed, {0, 0}, {numIndices, 1}); |
2117 | |
2118 | ShapeVector outDims{lengths.dims()[0], 1}; |
2119 | auto dataTy = mod_.uniqueTypeWithNewShape(values.getType(), outDims); |
2120 | auto data = G_->createSplat(opName + ".data" , dataTy, defaultValue); |
2121 | auto values2D = |
2122 | G_->createReshape(opName + ".reshape" , values, {numIndices, 1}); |
2123 | auto scatterData = G_->createScatterData(opName + ".scatterData" , data, |
2124 | indicesSliced, values2D, true); |
2125 | |
2126 | RETURN_IF_ERR(addNodeAsOutput(op, scatterData)); |
2127 | return Error::success(); |
2128 | } |
2129 | |
2130 | if (typeName == "SparseLabelSplit" ) { |
2131 | NodeValue lengths; |
2132 | ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(0))); |
2133 | NodeValue indices; |
2134 | ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(1))); |
2135 | NodeValue values; |
2136 | ASSIGN_VALUE_OR_RETURN_ERR(values, getNodeValueByName(op.input(2))); |
2137 | |
2138 | dim_t numLabels = 0; |
2139 | RETURN_ERR_IF_NOT(dict.count("num_labels" ), |
2140 | opErrMsg(op, "num_labels was not provided." )); |
2141 | ASSIGN_VALUE_OR_RETURN_ERR(numLabels, loadInt(dict.at("num_labels" ))); |
2142 | |
2143 | bool keepGradientOffsetMap = false; |
2144 | if (dict.count("keep_gradient_offset_map" )) { |
2145 | ASSIGN_VALUE_OR_RETURN_ERR(keepGradientOffsetMap, |
2146 | loadInt(dict.at("keep_gradient_offset_map" ))); |
2147 | } |
2148 | |
2149 | // Validating input types and shapes |
2150 | RETURN_ERR_IF_NOT(lengths.getElementType() == ElemKind::Int32ITy, |
2151 | opErrMsg(op, "Lengths should be of int32 type." )); |
2152 | RETURN_ERR_IF_NOT(lengths.dims().size() == 1 || lengths.dims().size() == 2, |
2153 | opErrMsg(op, "Lengths should be 1D or 2D tensor." )); |
2154 | RETURN_ERR_IF_NOT(indices.getElementType() == ElemKind::Int64ITy, |
2155 | opErrMsg(op, "Indices should be of int64 type." )); |
2156 | RETURN_ERR_IF_NOT(indices.dims().size() == 1 || indices.dims().size() == 2, |
2157 | opErrMsg(op, "Indices should be 1D or 2D tensor." )); |
2158 | RETURN_ERR_IF_NOT(values.getElementType() == ElemKind::FloatTy, |
2159 | opErrMsg(op, "Values should be of float type." )); |
2160 | RETURN_ERR_IF_NOT(values.dims().size() == 1 || values.dims().size() == 2, |
2161 | opErrMsg(op, "Values should be 1D or 2D tensor." )); |
2162 | RETURN_ERR_IF_NOT( |
2163 | indices.dims() == values.dims(), |
2164 | opErrMsg(op, "Indices and values should have the same shape." )); |
2165 | |
2166 | // Optional conversion from 2D to 1D inputs |
2167 | if (lengths.dims().size() == 2) { |
2168 | RETURN_ERR_IF_NOT( |
2169 | lengths.dims()[1] == 1, |
2170 | opErrMsg(op, "Second dimension should be 1 in lengths." )); |
2171 | lengths = G_->createReshape(opName + ".lengths1D" , lengths, |
2172 | {lengths.dims()[0]}); |
2173 | } |
2174 | if (indices.dims().size() == 2) { |
2175 | RETURN_ERR_IF_NOT( |
2176 | indices.dims()[1] == 1, |
2177 | opErrMsg(op, "Second dimension should be 1 in indices." )); |
2178 | indices = G_->createReshape(opName + ".indices1D" , indices, |
2179 | {indices.dims()[0]}); |
2180 | } |
2181 | if (values.dims().size() == 2) { |
2182 | RETURN_ERR_IF_NOT( |
2183 | values.dims()[1] == 1, |
2184 | opErrMsg(op, "Second dimension should be 1 in values." )); |
2185 | values = |
2186 | G_->createReshape(opName + ".values1D" , values, {values.dims()[0]}); |
2187 | } |
2188 | |
2189 | SparseLabelSplitNode *node = |
2190 | G_->createSparseLabelSplit(opName, lengths, indices, values, numLabels); |
2191 | |
2192 | std::vector<SliceNode *> labelValueSlices; |
2193 | G_->createSplit(opName + ".splitLabelValues" , |
2194 | node->getNthResult(SparseLabelSplitNode::LabelValuesIdx), |
2195 | numLabels, 0, {}, labelValueSlices); |
2196 | |
2197 | std::vector<SliceNode *> exampleIdSlices; |
2198 | G_->createSplit(opName + ".splitExampleIds" , |
2199 | node->getNthResult(SparseLabelSplitNode::ExampleIdsIdx), |
2200 | numLabels, 0, {}, exampleIdSlices); |
2201 | |
2202 | const auto numItems = indices.dims()[0] / numLabels; |
2203 | |
2204 | std::vector<Node *> labelValues; |
2205 | for (auto slice : labelValueSlices) { |
2206 | labelValues.push_back( |
2207 | G_->createReshape(opName + ".reshapeLabelValue" , slice, {numItems})); |
2208 | } |
2209 | |
2210 | std::vector<Node *> exampleIds; |
2211 | for (auto slice : exampleIdSlices) { |
2212 | exampleIds.push_back( |
2213 | G_->createReshape(opName + ".reshapeExamplId" , slice, {numItems})); |
2214 | } |
2215 | |
2216 | for (dim_t i = 0; i < numLabels; ++i) { |
2217 | nodeValueByName_[op.output(i)] = labelValues[i]; |
2218 | } |
2219 | for (dim_t i = 0; i < numLabels; ++i) { |
2220 | nodeValueByName_[op.output(numLabels + i)] = exampleIds[i]; |
2221 | } |
2222 | if (keepGradientOffsetMap) { |
2223 | nodeValueByName_[op.output(2 * numLabels)] = |
2224 | node->getNthResult(SparseLabelSplitNode::GradientOffsetMapIdx); |
2225 | } |
2226 | return Error::success(); |
2227 | } |
2228 | |
2229 | if (typeName == "Log1p" ) { |
2230 | NodeValue in; |
2231 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
2232 | |
2233 | Node *ones = G_->createSplat(opName + ".ones" , in.getType(), 1.0f); |
2234 | Node *add = G_->createAdd(opName + ".add" , in, ones); |
2235 | Node *node = G_->createLog(opName + ".log" , add); |
2236 | |
2237 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
2238 | return Error::success(); |
2239 | } |
2240 | |
2241 | if (typeName == "ReduceBackMean" ) { |
2242 | const unsigned numInputs = op.input_size(); |
2243 | RETURN_ERR_IF_NOT(numInputs == 1, |
2244 | opErrMsg(op, "Only single input is supported." )); |
2245 | |
2246 | NodeValue in; |
2247 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
2248 | RETURN_ERR_IF_NOT(in.dims().size() >= 2, |
2249 | opErrMsg(op, "Input should be at least 2D." )); |
2250 | |
2251 | int numReduceDim = 1; |
2252 | if (dict.count("num_reduce_dim" )) { |
2253 | ASSIGN_VALUE_OR_RETURN_ERR(numReduceDim, loadInt(dict["num_reduce_dim" ])); |
2254 | } |
2255 | // TODO: check maybe we can support more dimensions to be reduced |
2256 | RETURN_ERR_IF_NOT(numReduceDim == 1, |
2257 | opErrMsg(op, "Supporting reducing only one dimension." )); |
2258 | |
2259 | Node *node = G_->createBatchedReduceMean(opName, in, in.dims().size() - 1); |
2260 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
2261 | return Error::success(); |
2262 | } |
2263 | |
2264 | return MAKE_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator." )); |
2265 | } |
2266 | |
2267 | template <class TensorProtoType> |
2268 | Error Caffe2ModelLoader::loadInputsWithTensorProtoType( |
2269 | const caffe2::NetDef &net, |
2270 | const std::unordered_set<std::string> &initializers, |
2271 | const TensorProtoType &in) { |
2272 | // Skip static weights |
2273 | if (getConstantByNameOrNull(in.name())) { |
2274 | return Error::success(); |
2275 | } |
2276 | |
2277 | if (getStaticPlaceholderByNameOrNull(in.name())) { |
2278 | return Error::success(); |
2279 | } |
2280 | |
2281 | LoadWeightResult loadRes; |
2282 | if (auto resOrErr = createAndSetTensorType(in)) { |
2283 | loadRes = std::move(*resOrErr); |
2284 | } else { |
2285 | RETURN_ERR(resOrErr.takeError()); |
2286 | } |
2287 | |
2288 | bool multiQParamsLoaded = loadRes.scales || loadRes.offsets; |
2289 | RETURN_ERR_IF_NOT( |
2290 | (!multiQParamsLoaded || (loadRes.scales && loadRes.offsets)), |
2291 | "For tensors with separate qparams, both scales and offsets must be " |
2292 | "loaded" ); |
2293 | |
2294 | bool isInput = !initializers.count(in.name()); |
2295 | if (isInput) { |
2296 | RETURN_ERR_IF_NOT(!clipQuantRangeToFP16_ || |
2297 | !loadRes.t->getType().isQuantizedType() || |
2298 | loadRes.t->getType().isFusedQuantizedType(), |
2299 | "Do not support clipQuantRangeToFP16 with unfused " |
2300 | "quantized input Placeholders: " + |
2301 | in.name()); |
2302 | Placeholder *placeholder; |
2303 | ASSIGN_VALUE_OR_RETURN_ERR( |
2304 | placeholder, |
2305 | createAndRegisterPlaceholder(in.name(), &loadRes.t->getType())); |
2306 | |
2307 | inputVarsByName_.try_emplace(in.name(), placeholder); |
2308 | |
2309 | if (multiQParamsLoaded) { |
2310 | auto offsetsName = strFormat("%s_loaded_offsets" , in.name().c_str()); |
2311 | auto scalesName = strFormat("%s_loaded_scales" , in.name().c_str()); |
2312 | Placeholder *offsetsPlaceholder; |
2313 | Placeholder *scalesPlaceholder; |
2314 | |
2315 | ASSIGN_VALUE_OR_RETURN_ERR(offsetsPlaceholder, |
2316 | createAndRegisterPlaceholder( |
2317 | offsetsName, &loadRes.offsets->getType())); |
2318 | inputVarsByName_.try_emplace(offsetsName, offsetsPlaceholder); |
2319 | |
2320 | ASSIGN_VALUE_OR_RETURN_ERR( |
2321 | scalesPlaceholder, |
2322 | createAndRegisterPlaceholder(scalesName, &loadRes.scales->getType())); |
2323 | inputVarsByName_.try_emplace(scalesName, scalesPlaceholder); |
2324 | } |
2325 | } else { |
2326 | RETURN_IF_ERR(createAndRegisterConstant(in.name(), std::move(*loadRes.t))); |
2327 | |
2328 | if (multiQParamsLoaded) { |
2329 | auto offsetsName = strFormat("%s_loaded_offsets" , in.name().c_str()); |
2330 | auto scalesName = strFormat("%s_loaded_scales" , in.name().c_str()); |
2331 | RETURN_IF_ERR( |
2332 | createAndRegisterConstant(offsetsName, std::move(*loadRes.offsets))); |
2333 | RETURN_IF_ERR( |
2334 | createAndRegisterConstant(scalesName, std::move(*loadRes.scales))); |
2335 | } |
2336 | } |
2337 | return Error::success(); |
2338 | } |
2339 | |
2340 | Error Caffe2ModelLoader::loadInputs( |
2341 | const caffe2::NetDef &net, |
2342 | const std::unordered_set<std::string> &initializers) { |
2343 | const caffe2::Argument *arg = nullptr, *qarg = nullptr; |
2344 | for (auto i = 0, e = net.arg_size(); i < e && (!arg || !qarg); ++i) { |
2345 | if (net.arg(i).name() == "input_shape_info" ) { |
2346 | arg = &net.arg(i); |
2347 | } else if (net.arg(i).name() == "input_qshape_info" ) { |
2348 | qarg = &net.arg(i); |
2349 | } |
2350 | } |
2351 | |
2352 | // Load all regular tensor input |
2353 | if (arg) { |
2354 | for (const auto &in : arg->tensors()) { |
2355 | RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>( |
2356 | net, initializers, in)); |
2357 | } |
2358 | } |
2359 | |
2360 | // Load all quantized tensor input |
2361 | if (qarg) { |
2362 | for (const auto &in : qarg->qtensors()) { |
2363 | RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>( |
2364 | net, initializers, in)); |
2365 | } |
2366 | } |
2367 | |
2368 | return Error::success(); |
2369 | } |
2370 | |
2371 | Error Caffe2ModelLoader::loadNetwork(caffe2::NetDef &net) { |
2372 | // Make a claim on the unique name of all output Placeholders. |
2373 | for (int i = 0; i < net.external_output_size(); i++) { |
2374 | auto &outputName = net.external_output(i); |
2375 | mod_.registerStorageName(legalizeName(outputName)); |
2376 | } |
2377 | |
2378 | /// Load the network operators: |
2379 | for (int i = 0; i < net.op_size(); i++) { |
2380 | auto &op = net.op(i); |
2381 | |
2382 | // Set up current partition to load into if relevant. |
2383 | if (partNameToFun_.size()) { |
2384 | auto &pName = op.device_option().node_name(); |
2385 | auto it = partNameToFun_.find(pName); |
2386 | RETURN_ERR_IF_NOT( |
2387 | it != partNameToFun_.end(), |
2388 | strFormat("Did not find partition with name %s" , pName.c_str())); |
2389 | G_ = it->second; |
2390 | } |
2391 | RETURN_ERR_IF_NOT(G_, "Internal Glow error; Graph was not valid." ); |
2392 | |
2393 | if (constFoldInLoader_) { |
2394 | auto tryFold = foldOperator(op); |
2395 | if (!tryFold) { |
2396 | // Error during constant folding; load the op normally below. |
2397 | const std::string errStr = ERR_TO_STRING(tryFold.takeError()); |
2398 | VLOG(1) << "Error while trying to ConstantFold " << loadOperatorName(op) |
2399 | << ": " << errStr; |
2400 | } else if (tryFold.get()) { |
2401 | // Folded successfully, so skip loading the op below. |
2402 | continue; |
2403 | } |
2404 | } |
2405 | RETURN_IF_ERR(loadOperator(op)); |
2406 | } |
2407 | |
2408 | RETURN_ERR_IF_NOT(net.external_output_size(), |
2409 | "Network needs external outputs defined." ); |
2410 | |
2411 | for (int i = 0; i < net.external_output_size(); i++) { |
2412 | auto &outputName = net.external_output(i); |
2413 | NodeValue r; |
2414 | // We want to create the save node in the same Function as the original |
2415 | // NodeValue. Thus here we ignore the source function when getting the NV, |
2416 | // which avoids copying the NV to whatever G_ currently is via an |
2417 | // intermediate Placeholder. |
2418 | ASSIGN_VALUE_OR_RETURN_ERR( |
2419 | r, getNodeValueByName(outputName, /* ignoreSrcFun */ true)); |
2420 | |
2421 | PlaceholderList &PHList = mod_.getPlaceholders(); |
2422 | // Create a Placeholder with the previously claimed name. |
2423 | auto *PH = |
2424 | new Placeholder(legalizeName(outputName), mod_.uniqueType(*r.getType()), |
2425 | false, ANY_LAYOUT); |
2426 | PHList.push_back(PH); |
2427 | // If r is storage then just use the current last Function to save, since |
2428 | // we're just saving directly from a Storage node anyway. |
2429 | Function *F = llvm::isa<Storage>(r) ? G_ : r.getNode()->getParent(); |
2430 | assert(F && "F must be valid here." ); |
2431 | auto *SN = F->createSave(outputName, r, PH); |
2432 | outputVarsByName_[outputName] = SN->getPlaceholder(); |
2433 | } |
2434 | return Error::success(); |
2435 | } |
2436 | |
2437 | /// Fills \p T with data from \p values. |
2438 | template <typename ElemTy, typename RangeTy> |
2439 | static Error fillTensor(Tensor &T, ElemKind kind, llvm::ArrayRef<dim_t> dim, |
2440 | RangeTy values) { |
2441 | T.reset(kind, dim); |
2442 | auto TH = T.getHandle<ElemTy>(); |
2443 | RETURN_ERR_IF_NOT((size_t)values.size() == T.size(), |
2444 | llvm::formatv("Wrong number of values for GivenTensorFill " |
2445 | "({0} given, {1} expected)" , |
2446 | values.size(), T.size()) |
2447 | .str()); |
2448 | size_t i = 0; |
2449 | for (auto num : values) { |
2450 | TH.raw(i++) = num; |
2451 | } |
2452 | return Error::success(); |
2453 | } |
2454 | |
2455 | Error Caffe2ModelLoader::loadWeight(const caffe2::OperatorDef &op) { |
2456 | ArgumentDictionaryTy dict = loadArgumentMap(op); |
2457 | const std::string &typeName = op.type(); |
2458 | const std::string &opName = loadOperatorName(op); |
2459 | // Load tensors with values: |
2460 | if (typeName == "GivenTensorFill" || typeName == "GivenTensorFp16Fill" || |
2461 | typeName == "GivenTensorIntFill" || typeName == "GivenTensorInt64Fill" ) { |
2462 | /* |
2463 | * op { |
2464 | * output: "conv1_w" |
2465 | * name: "" |
2466 | * type: "GivenTensorFill" |
2467 | * arg { |
2468 | * name: "shape" |
2469 | * ints: 96 |
2470 | * ints: 3 |
2471 | * ints: 11 |
2472 | * ints: 11 |
2473 | * } |
2474 | * arg { |
2475 | * name: "values" |
2476 | * floats: -0.028315347 |
2477 | * ... |
2478 | * } |
2479 | * } |
2480 | */ |
2481 | |
2482 | // Note: Explicitly allow for an empty dim here, representing a scalar value |
2483 | // will be loaded below. |
2484 | std::vector<dim_t> dim; |
2485 | ASSIGN_VALUE_OR_RETURN_ERR( |
2486 | dim, getShape<dim_t>(dict["shape" ], /* allowEmptyShape */ true)); |
2487 | auto const &values = dict["values" ]; |
2488 | RETURN_ERR_IF_NOT( |
2489 | op.output_size() == 1, |
2490 | opErrMsg( |
2491 | op, strFormat( |
2492 | "GivenTensorFill must have exactly 1 output, but found %d " , |
2493 | op.output_size()))); |
2494 | Tensor T; |
2495 | if (typeName == "GivenTensorFill" ) { |
2496 | RETURN_IF_ERR( |
2497 | fillTensor<float>(T, ElemKind::FloatTy, dim, values->floats())); |
2498 | } else if (typeName == "GivenTensorFp16Fill" ) { |
2499 | RETURN_IF_ERR( |
2500 | fillTensor<float16_t>(T, ElemKind::Float16Ty, dim, values->floats())); |
2501 | } else if (typeName == "GivenTensorIntFill" ) { |
2502 | RETURN_IF_ERR( |
2503 | fillTensor<int32_t>(T, ElemKind::Int32ITy, dim, values->ints())); |
2504 | } else if (typeName == "GivenTensorInt64Fill" ) { |
2505 | RETURN_IF_ERR( |
2506 | fillTensor<int64_t>(T, ElemKind::Int64ITy, dim, values->ints())); |
2507 | } else { |
2508 | return MAKE_ERR( |
2509 | strFormat("Unhandled tensor fill type: %s" , typeName.c_str())); |
2510 | } |
2511 | RETURN_IF_ERR(createAndRegisterConstant(op.output().Get(0), std::move(T))); |
2512 | return Error::success(); |
2513 | } |
2514 | |
2515 | if (typeName == "GivenTensorByteStringToUInt8Fill" ) { |
2516 | /* |
2517 | output: "data" |
2518 | type: "GivenTensorByteStringToUInt8Fill" |
2519 | arg { |
2520 | name: "shape" |
2521 | ints: 3 |
2522 | ints: 10 |
2523 | } |
2524 | arg { |
2525 | name: "values" |
2526 | s: |
2527 | "\000\377\152\232\115\072\000\000\200\077\000\377\050\132\215\073\063\063\023\100\000\377\314\063\232\073\000\000\220\100" |
2528 | } |
2529 | */ |
2530 | |
2531 | for (auto &o : op.output()) { |
2532 | Tensor T; |
2533 | if (getConstantByNameOrNull(o)) { |
2534 | continue; |
2535 | } |
2536 | std::vector<dim_t> dim; |
2537 | ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape" ])); |
2538 | T.reset(ElemKind::UInt8QTy, dim, 0.0, 0); |
2539 | auto TH = T.getHandle<uint8_t>(); |
2540 | RETURN_ERR_IF_NOT( |
2541 | dict["values" ]->strings().size() == 1, |
2542 | "Expect single string input for GivenTensorByteStringToUInt8Fill" ); |
2543 | const std::string &str = dict["values" ]->strings().Get(0); |
2544 | |
2545 | size_t pos; |
2546 | for (pos = 0; pos < str.size(); pos++) { |
2547 | TH.raw(pos) = (uint8_t)str[pos]; |
2548 | } |
2549 | |
2550 | RETURN_ERR_IF_NOT( |
2551 | pos == T.size(), |
2552 | strFormat("The number of serialized values (%li) does not " |
2553 | "match the size of the tensor (%li)." , |
2554 | pos, (size_t)T.size())); |
2555 | RETURN_IF_ERR(createAndRegisterConstant(o, std::move(T))); |
2556 | } |
2557 | return Error::success(); |
2558 | } |
2559 | |
2560 | // Load quantized tensors: |
2561 | if (typeName == "Int8GivenTensorFill" || |
2562 | typeName == "Int8GivenIntTensorFill" ) { |
2563 | /* |
2564 | output: "conv1_w" |
2565 | name: "" |
2566 | type: "Int8GivenTensorFill" |
2567 | arg { |
2568 | name: "shape" |
2569 | ints: 96 |
2570 | ints: 3 |
2571 | ints: 11 |
2572 | ints: 11 |
2573 | } |
2574 | arg { |
2575 | name: "values" |
2576 | s: "\x7f\x80\x80\x7" |
2577 | } |
2578 | arg { |
2579 | name: "Y_scale" |
2580 | f: 0.00044428 |
2581 | } |
2582 | arg { |
2583 | name: "Y_zero_point" |
2584 | i: 127 |
2585 | } |
2586 | */ |
2587 | for (auto &o : op.output()) { |
2588 | Tensor T; |
2589 | if (getConstantByNameOrNull(o)) { |
2590 | continue; |
2591 | } |
2592 | |
2593 | std::vector<dim_t> dim; |
2594 | ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape" ])); |
2595 | |
2596 | RETURN_ERR_IF_NOT(dict.count("Y_zero_point" ), |
2597 | ("missing zero point for quantized output type" )); |
2598 | RETURN_ERR_IF_NOT(dict.count("Y_scale" ), |
2599 | ("missing Y_scale for quantized output type" )); |
2600 | |
2601 | float scale; |
2602 | ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["Y_scale" ])); |
2603 | (void)scale; |
2604 | int32_t offset; |
2605 | ASSIGN_VALUE_OR_RETURN_ERR(offset, loadInt(dict["Y_zero_point" ])); |
2606 | (void)offset; |
2607 | size_t i = 0; |
2608 | if (typeName == "Int8GivenTensorFill" ) { |
2609 | // Although in Caffe2 quantized model, the weights is int8 quantized, |
2610 | // the weights is stored in uint8_t format due to that Caffe2 requires |
2611 | // the type of input and weights must be the same. Therefore, we need |
2612 | // to convert it to int8 by subtracting 128. |
2613 | TypeRef ty; |
2614 | ASSIGN_VALUE_OR_RETURN_ERR( |
2615 | ty, loadQuantTy(o, ElemKind::Int8QTy, dim, dict, |
2616 | /* skipClipQuantRangeToFP16 */ true)); |
2617 | T.reset(*ty); |
2618 | auto TH = T.getHandle<int8_t>(); |
2619 | std::string str = dict["values" ]->s(); |
2620 | for (; i < str.size(); i++) { |
2621 | TH.raw(i) = ((uint8_t)(str.c_str()[i]) - UINT8_TO_INT8_SHIFT); |
2622 | } |
2623 | } else { |
2624 | TypeRef ty; |
2625 | ASSIGN_VALUE_OR_RETURN_ERR( |
2626 | ty, loadQuantTy(o, ElemKind::Int32QTy, dim, dict, |
2627 | /* skipClipQuantRangeToFP16 */ true)); |
2628 | T.reset(*ty); |
2629 | auto TH = T.getHandle<int32_t>(); |
2630 | for (auto num : dict["values" ]->ints()) { |
2631 | TH.raw(i++) = num; |
2632 | } |
2633 | } |
2634 | |
2635 | // If we're clipping quantized ranges tp FP16, then we need to rescale the |
2636 | // Tensor and update its type. |
2637 | if (clipQuantRangeToFP16_) { |
2638 | const ElemKind k = T.getType().getElementType(); |
2639 | const auto qMinMax = getQuantizedValueRange(T.getType().getScale(), |
2640 | T.getType().getOffset(), k); |
2641 | const float newMin = std::max(qMinMax.first, kMinFP16); |
2642 | const float newMax = std::min(qMinMax.second, kMaxFP16); |
2643 | if (newMin != qMinMax.first || newMax != qMinMax.second) { |
2644 | auto rescaledT = glow::make_unique<Tensor>(); |
2645 | dispatchQuantizedImpl(rescaleQTensor, k, T, *rescaledT, newMin, |
2646 | newMax); |
2647 | T = std::move(*rescaledT); |
2648 | } |
2649 | } |
2650 | |
2651 | RETURN_ERR_IF_NOT( |
2652 | i == T.size(), |
2653 | strFormat("The number of serialized values (%li) does not " |
2654 | "match the size of the tensor (%li)." , |
2655 | i, (size_t)T.size())); |
2656 | |
2657 | RETURN_IF_ERR(createAndRegisterConstant(o, std::move(T))); |
2658 | } |
2659 | |
2660 | return Error::success(); |
2661 | } |
2662 | |
2663 | // Load tensors with constant fill: |
2664 | if (typeName == "ConstantFill" ) { |
2665 | /* |
2666 | output: "data" |
2667 | name: "" |
2668 | type: "ConstantFill" |
2669 | arg { |
2670 | name: "shape" |
2671 | ints: 1 |
2672 | } |
2673 | */ |
2674 | |
2675 | const auto &name = op.output(0); |
2676 | // If the tensor is pre-populated by the user of this class then we don't |
2677 | // need to allocate a new tensor. |
2678 | if (getConstantByNameOrNull(name)) { |
2679 | return Error::success(); |
2680 | } |
2681 | |
2682 | // The shape is set either the shape argument, or from another input |
2683 | // tensor. Shape takes priority over input. |
2684 | std::vector<dim_t> dims; |
2685 | if (dict.count("shape" )) { |
2686 | ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["shape" ])); |
2687 | } else { |
2688 | RETURN_ERR_IF_NOT(op.input_size() > 0, |
2689 | "If no shape provided, must have input shape." ); |
2690 | |
2691 | bool inputAsShape = false; |
2692 | if (dict.count("input_as_shape" )) { |
2693 | ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape, |
2694 | loadInt(dict["input_as_shape" ])); |
2695 | } |
2696 | |
2697 | if (inputAsShape) { |
2698 | // It must be registered as a Constant because it must be statically set |
2699 | // already, as shapes must be statically known. |
2700 | Constant *in; |
2701 | ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(0))); |
2702 | RETURN_ERR_IF_NOT(in->dims().size() == 1, |
2703 | opErrMsg(op, "Input must be 1D tensor." )); |
2704 | RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy, |
2705 | opErrMsg(op, "Input must be of int64 type." )); |
2706 | const auto handle = in->getHandle<int64_t>(); |
2707 | dims.reserve(in->dims().size()); |
2708 | for (auto dim : handle) { |
2709 | dims.push_back(dim); |
2710 | } |
2711 | } else { |
2712 | NodeValue in; |
2713 | ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0))); |
2714 | dims = in.dims(); |
2715 | } |
2716 | } |
2717 | |
2718 | int to = caffe2::TensorProto_DataType_FLOAT; |
2719 | if (dict.count("dtype" )) { |
2720 | ASSIGN_VALUE_OR_RETURN_ERR(to, loadInt(dict["dtype" ])); |
2721 | } |
2722 | |
2723 | SplatNode *splatNode{nullptr}; |
2724 | |
2725 | switch (to) { |
2726 | case caffe2::TensorProto_DataType_FLOAT: { |
2727 | float f = 0.0f; |
2728 | if ((dict.count("value" ) && dict["value" ]->has_f())) { |
2729 | ASSIGN_VALUE_OR_RETURN_ERR(f, loadFloat(dict["value" ])); |
2730 | } |
2731 | splatNode = |
2732 | G_->createSplat(opName, mod_.uniqueType(ElemKind::FloatTy, dims), f); |
2733 | break; |
2734 | } |
2735 | case caffe2::TensorProto_DataType_INT32: { |
2736 | int i = 0; |
2737 | if ((dict.count("value" ) && dict["value" ]->has_i())) { |
2738 | ASSIGN_VALUE_OR_RETURN_ERR(i, loadInt(dict["value" ])); |
2739 | } |
2740 | splatNode = |
2741 | G_->createSplat(opName, mod_.uniqueType(ElemKind::Int32ITy, dims), i); |
2742 | break; |
2743 | } |
2744 | case caffe2::TensorProto_DataType_INT64: |
2745 | case caffe2::TensorProto_DataType_BOOL: { |
2746 | int i = 0; |
2747 | if ((dict.count("value" ) && dict["value" ]->has_i())) { |
2748 | ASSIGN_VALUE_OR_RETURN_ERR(i, loadInt(dict["value" ])); |
2749 | } |
2750 | splatNode = |
2751 | G_->createSplat(opName, mod_.uniqueType(ElemKind::Int64ITy, dims), i); |
2752 | break; |
2753 | } |
2754 | default: |
2755 | return MAKE_ERR("Unsupported datatype for ConstantFill." ); |
2756 | } |
2757 | |
2758 | RETURN_IF_ERR(addNodeAsOutput(op, splatNode)); |
2759 | |
2760 | return Error::success(); |
2761 | } |
2762 | |
2763 | if (typeName == "UniformFill" ) { |
2764 | /* |
2765 | output: "fc/w" |
2766 | name: "" |
2767 | type: "UniformFill" |
2768 | arg { |
2769 | name: "max" |
2770 | f: 0.25 |
2771 | } |
2772 | arg { |
2773 | name: "shape" |
2774 | ints: 1 |
2775 | ints: 16 |
2776 | } |
2777 | arg { |
2778 | name: "min" |
2779 | f: -0.25 |
2780 | } |
2781 | */ |
2782 | const auto &name = op.output(0); |
2783 | Tensor T; |
2784 | std::vector<dim_t> dim; |
2785 | if (dict.count("shape" )) { |
2786 | ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape" ])); |
2787 | } else { |
2788 | RETURN_ERR_IF_NOT(op.input_size() > 0, |
2789 | "If no shape provided, must have input shape." ); |
2790 | |
2791 | bool inputAsShape = false; |
2792 | if (dict.count("input_as_shape" )) { |
2793 | ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape, |
2794 | loadInt(dict["input_as_shape" ])); |
2795 | } |
2796 | |
2797 | if (inputAsShape) { |
2798 | Constant *in; |
2799 | ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(0))); |
2800 | RETURN_ERR_IF_NOT(in->dims().size() == 1, |
2801 | opErrMsg(op, "Input must be 1D tensor." )); |
2802 | RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy, |
2803 | opErrMsg(op, "Input must be of int64 type." )); |
2804 | const auto handle = in->getHandle<int64_t>(); |
2805 | dim.reserve(in->dims().size()); |
2806 | for (auto d : handle) { |
2807 | dim.push_back(d); |
2808 | } |
2809 | } else { |
2810 | NodeValue input; |
2811 | ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0))); |
2812 | dim = input.dims(); |
2813 | } |
2814 | } |
2815 | T.reset(ElemKind::FloatTy, dim); |
2816 | auto TH = T.getHandle<>(); |
2817 | float tensorMin; |
2818 | ASSIGN_VALUE_OR_RETURN_ERR(tensorMin, loadFloat(dict["min" ])); |
2819 | float tensorMax; |
2820 | ASSIGN_VALUE_OR_RETURN_ERR(tensorMax, loadFloat(dict["max" ])); |
2821 | |
2822 | DLOG(INFO) |
2823 | << "The model contains UniformFill operator, which generates random " |
2824 | "numbers. This could be source of discrepancy." ; |
2825 | |
2826 | // Uniformly generate random numbers in [tensorMin; tensorMax). |
2827 | for (auto &elem : TH) { |
2828 | elem = mod_.getPRNG().nextRandReal(tensorMin, tensorMax); |
2829 | } |
2830 | |
2831 | RETURN_IF_ERR(createAndRegisterConstant(name, std::move(T))); |
2832 | |
2833 | return Error::success(); |
2834 | } |
2835 | |
2836 | // Load tensors with constant fill: |
2837 | if (typeName == "GaussianFill" ) { |
2838 | /* |
2839 | output: "data" |
2840 | name: "" |
2841 | type: "GaussianFill" |
2842 | arg { |
2843 | name: "mean" |
2844 | f: 0.0 |
2845 | } |
2846 | arg { |
2847 | name: "std" |
2848 | f: 1.0 |
2849 | } |
2850 | arg { |
2851 | name: "shape" |
2852 | ints: 1 |
2853 | ints: 16 |
2854 | } |
2855 | */ |
2856 | |
2857 | const auto &name = op.output(0); |
2858 | if (getConstantByNameOrNull(name)) { |
2859 | return Error::success(); |
2860 | } |
2861 | |
2862 | // The shape of the output is set by shape, if provided. Otherwise, it is |
2863 | // set by the shape of the input or the shape indicated by input if |
2864 | // input_as_shape is true |
2865 | NodeValue input; |
2866 | std::vector<dim_t> dims; |
2867 | if (dict.count("shape" )) { |
2868 | ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["shape" ])); |
2869 | } else { |
2870 | RETURN_ERR_IF_NOT(op.input_size() > 0, |
2871 | "If no shape provided, must have input shape." ); |
2872 | |
2873 | bool inputAsShape = false; |
2874 | if (dict.count("input_as_shape" )) { |
2875 | ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape, |
2876 | loadInt(dict["input_as_shape" ])); |
2877 | } |
2878 | |
2879 | if (inputAsShape) { |
2880 | Constant *in; |
2881 | ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(0))); |
2882 | RETURN_ERR_IF_NOT(in->dims().size() == 1, |
2883 | opErrMsg(op, "Input must be 1D tensor." )); |
2884 | RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy, |
2885 | opErrMsg(op, "Input must be of int64 type." )); |
2886 | const auto handle = in->getHandle<int64_t>(); |
2887 | dims.reserve(in->dims().size()); |
2888 | for (auto dim : handle) { |
2889 | dims.push_back(dim); |
2890 | } |
2891 | } else { |
2892 | ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0))); |
2893 | dims = input.dims(); |
2894 | } |
2895 | |
2896 | if (dict.count("extra_shape" )) { |
2897 | std::vector<dim_t> ; |
2898 | ASSIGN_VALUE_OR_RETURN_ERR(extra_shape, |
2899 | getShape<dim_t>(dict["extra_shape" ])); |
2900 | dims.insert(dims.end(), extra_shape.begin(), extra_shape.end()); |
2901 | } |
2902 | } |
2903 | if ((!input && !dims.empty()) || input.dims().vec() != dims) { |
2904 | input = |
2905 | G_->createSplat("in" , mod_.uniqueType(ElemKind::FloatTy, dims), 0.); |
2906 | } |
2907 | float mean; |
2908 | ASSIGN_VALUE_OR_RETURN_ERR(mean, loadFloat(dict["mean" ])); |
2909 | float scale; |
2910 | ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["std" ])); |
2911 | |
2912 | auto GF = G_->createGaussianFill(opName, input, mean, scale, |
2913 | std::random_device{}()); |
2914 | auto outputType = |
2915 | mod_.uniqueType(ElemKind::FloatTy, GF->getResult().dims()); |
2916 | auto node = G_->createConvertTo(opName + ".ConvertOutput" , GF, outputType); |
2917 | RETURN_IF_ERR(addNodeAsOutput(op, node)); |
2918 | |
2919 | return Error::success(); |
2920 | } |
2921 | |
2922 | return MAKE_ERR(unexpectedNodeErrorMessage(op, "Unsupported weight kind" )); |
2923 | } |
2924 | |
2925 | Error Caffe2ModelLoader::loadWeightsFromNet(caffe2::NetDef &net) { |
2926 | for (auto &op : net.op()) { |
2927 | RETURN_IF_ERR(loadWeight(op)); |
2928 | } |
2929 | return Error::success(); |
2930 | } |
2931 | |
2932 | Caffe2ModelLoader::Caffe2ModelLoader(Function &F, Error *errPtr) |
2933 | : CommonOperatorLoader({}, {}, &F, errPtr) { |
2934 | deleteUnusedConstants(); |
2935 | } |
2936 | |
2937 | Caffe2ModelLoader::Caffe2ModelLoader( |
2938 | const std::string &netDescFilename, const std::string &netWeightFilename, |
2939 | llvm::ArrayRef<const char *> names, llvm::ArrayRef<TypeRef> types, |
2940 | Function &F, Error *errPtr, OriginNameToTQPMap *originNameToTQPMap, |
2941 | bool loadUniquedDummyQParams, bool zeroScaleFP16Clip, |
2942 | bool clipQuantRangeToFP16) |
2943 | : CommonOperatorLoader(names, types, &F, errPtr, |
2944 | /* loadIntoExistingModule */ false, |
2945 | originNameToTQPMap, loadUniquedDummyQParams, |
2946 | zeroScaleFP16Clip, clipQuantRangeToFP16) { |
2947 | // if errPtr already contains an error then don't continue with constructor |
2948 | if (errPtr && *errPtr) { |
2949 | return; |
2950 | } |
2951 | |
2952 | // Lambda to setup the Caffe2ModelLoader and return any Errors that |
2953 | // were raised. |
2954 | auto setup = [&]() -> Error { |
2955 | // The caffe2 network descriptor that we are deserializing. |
2956 | caffe2::NetDef networkDef; |
2957 | ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProtoFile(netDescFilename)); |
2958 | |
2959 | // The caffe2 weights that we are deserializing. |
2960 | caffe2::NetDef weightsDef; |
2961 | ASSIGN_VALUE_OR_RETURN_ERR(weightsDef, loadProtoFile(netWeightFilename)); |
2962 | |
2963 | RETURN_IF_ERR(loadWeightsFromNet(weightsDef)); |
2964 | RETURN_IF_ERR(loadNetwork(networkDef)); |
2965 | |
2966 | // This is to ensure that the same processing done with |
2967 | // the same network, even if order of operators is different. |
2968 | F.orderNodes(); |
2969 | RETURN_ERR_IF_NOT(F.verify(), "Function verification failed." ); |
2970 | |
2971 | deleteUnusedConstants(); |
2972 | |
2973 | RETURN_IF_ERR(verifyDummyQParams()); |
2974 | |
2975 | return Error::success(); |
2976 | }; |
2977 | |
2978 | if (errPtr) { |
2979 | *errPtr = setup(); |
2980 | } else { |
2981 | EXIT_ON_ERR(setup()); |
2982 | } |
2983 | } |
2984 | |
2985 | Error Caffe2ModelLoader::initWithModule(caffe2::NetDef &networkDef, |
2986 | llvm::StringRef funNamePrefix, |
2987 | runtime::PrePartitionedConfig *PPC) { |
2988 | // Look for any partitions that will be needed. If there is no |
2989 | // partition_info then we create a single Function to load into. Otherwise |
2990 | // we create multiple Functions and switch between them as we load each |
2991 | // operator. |
2992 | std::unordered_map<Function *, std::vector<runtime::DeviceIDTy>> funToIDs; |
2993 | std::unordered_map<Function *, BackendSpecificOptions> funToOpts; |
2994 | if (networkDef.partition_info_size() == 0) { |
2995 | G_ = mod_.createFunction(funNamePrefix); |
2996 | } else { |
2997 | for (int i = 0; i < networkDef.partition_info_size(); i++) { |
2998 | const std::string &pName = networkDef.partition_info(i).name(); |
2999 | const std::string funName = funNamePrefix.str() + "_" + pName; |
3000 | Function *PF = mod_.createFunction(funName); |
3001 | partNameToFun_[pName] = PF; |
3002 | for (auto id : networkDef.partition_info(i).device_id()) { |
3003 | funToIDs[PF].push_back(id); |
3004 | } |
3005 | |
3006 | // Now set up device options for this partition. |
3007 | auto &optsMap = funToOpts[PF]; |
3008 | for (auto &backendOpts : networkDef.partition_info(i).backend_options()) { |
3009 | const std::string &backendName = backendOpts.backend_name(); |
3010 | for (auto &keyVal : backendOpts.option()) { |
3011 | optsMap[backendName + "_" + keyVal.key()] = keyVal.val(); |
3012 | } |
3013 | } |
3014 | } |
3015 | } |
3016 | |
3017 | RETURN_IF_ERR(loadNetwork(networkDef)); |
3018 | |
3019 | // Now setup the pre-partitioned config if relevant. |
3020 | if (partNameToFun_.size()) { |
3021 | RETURN_ERR_IF_NOT( |
3022 | PPC, "Partitioned model but no config to store meta information in." ); |
3023 | PPC->funcName = funNamePrefix.str(); |
3024 | |
3025 | PPC->funcs.reserve(partNameToFun_.size()); |
3026 | PPC->logicalIDs.reserve(partNameToFun_.size()); |
3027 | for (auto &SF : partNameToFun_) { |
3028 | Function *F = SF.getValue(); |
3029 | // Remove unused Functions from the module and skip them. |
3030 | if (F->getNodes().size() == 0) { |
3031 | mod_.eraseFunction(SF.getValue()); |
3032 | continue; |
3033 | } |
3034 | // This is to ensure that the same processing done with |
3035 | // the same network, even if order of operators is different. |
3036 | F->orderNodes(); |
3037 | PPC->funcs.push_back(F); |
3038 | PPC->logicalIDs.emplace_back(funToIDs[F]); |
3039 | PPC->backendSpecificOpts.emplace_back(funToOpts[F]); |
3040 | // Replication counts not currently loaded through C2, so default to 1. |
3041 | PPC->replicationCounts.emplace_back(1); |
3042 | // Backend hints not currently loaded through C2, so use default. |
3043 | PPC->backendHints.emplace_back(); |
3044 | RETURN_ERR_IF_NOT(F->verify(), "Function verification failed." ); |
3045 | } |
3046 | } |
3047 | |
3048 | deleteUnusedConstants(); |
3049 | |
3050 | RETURN_IF_ERR(verifyDummyQParams()); |
3051 | |
3052 | return Error::success(); |
3053 | } |
3054 | |
3055 | Caffe2ModelLoader::Caffe2ModelLoader(const std::string &netDescFilename, |
3056 | const std::string &netWeightFilename, |
3057 | llvm::ArrayRef<const char *> names, |
3058 | llvm::ArrayRef<TypeRef> types, Module &mod, |
3059 | llvm::StringRef funNamePrefix, |
3060 | runtime::PrePartitionedConfig *PPC, |
3061 | Error *errPtr) |
3062 | : CommonOperatorLoader(names, types, mod, errPtr) { |
3063 | // if errPtr already contains an error then don't continue with constructor |
3064 | if (errPtr && *errPtr) { |
3065 | return; |
3066 | } |
3067 | |
3068 | // Lambda to setup the Caffe2ModelLoader and return any Errors that |
3069 | // were raised. |
3070 | auto setup = [&]() -> Error { |
3071 | // The caffe2 network descriptor that we are deserializing. |
3072 | caffe2::NetDef networkDef; |
3073 | ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProtoFile(netDescFilename)); |
3074 | |
3075 | // The caffe2 weights that we are deserializing. |
3076 | caffe2::NetDef weightsDef; |
3077 | ASSIGN_VALUE_OR_RETURN_ERR(weightsDef, loadProtoFile(netWeightFilename)); |
3078 | |
3079 | RETURN_IF_ERR(loadWeightsFromNet(weightsDef)); |
3080 | |
3081 | return initWithModule(networkDef, funNamePrefix, PPC); |
3082 | }; |
3083 | |
3084 | if (errPtr) { |
3085 | *errPtr = setup(); |
3086 | } else { |
3087 | EXIT_ON_ERR(setup()); |
3088 | } |
3089 | } |
3090 | |
3091 | Caffe2ModelLoader::Caffe2ModelLoader( |
3092 | const std::string &modelStr, uint32_t weightsCount, |
3093 | const onnxTensorDescriptorV1 *weightDescriptors, Module &dummyMod, |
3094 | Error *errPtr, OriginNameToTQPMap *originNameToTQPMap, |
3095 | bool clipQuantRangeToFP16) |
3096 | : CommonOperatorLoader( |
3097 | {}, {}, dummyMod, errPtr, |
3098 | /* loadIntoExistingModule */ false, originNameToTQPMap, |
3099 | /* loadUniquedDummyQParams */ false, /* replaceDummyTQPs */ false, |
3100 | /* zeroScaleFP16Clip */ false, clipQuantRangeToFP16) { |
3101 | if (errPtr && *errPtr) { |
3102 | return; |
3103 | } |
3104 | |
3105 | constFoldInLoader_ = false; |
3106 | |
3107 | // Lambda to setup the Caffe2ModelLoader and return any Errors that were |
3108 | // raised. |
3109 | auto setup = [&]() -> Error { |
3110 | caffe2::NetDef networkDef; |
3111 | RETURN_ERR_IF_NOT( |
3112 | google::protobuf::TextFormat::ParseFromString(modelStr, &networkDef), |
3113 | "Error loading model from string" ); |
3114 | |
3115 | ArgumentDictionaryTy dict = loadArgumentMap(networkDef); |
3116 | |
3117 | std::unordered_set<std::string> initializers; |
3118 | if (dict.count("initializers" )) { |
3119 | const auto &strings = dict.at("initializers" )->strings(); |
3120 | for (const auto &s : strings) { |
3121 | initializers.insert(s); |
3122 | } |
3123 | } |
3124 | |
3125 | RETURN_IF_ERR(loadWeights(weightsCount, weightDescriptors)); |
3126 | |
3127 | RETURN_IF_ERR(loadInputs(networkDef, initializers)); |
3128 | |
3129 | // Identify primary input sequence |
3130 | std::unordered_set<std::string> weights; |
3131 | for (uint32_t i = 0; i < weightsCount; ++i) { |
3132 | weights.emplace(weightDescriptors[i].name); |
3133 | } |
3134 | |
3135 | runtime::PrePartitionedConfig dummyPPC; |
3136 | return initWithModule(networkDef, "dummy" , &dummyPPC); |
3137 | }; |
3138 | |
3139 | *errPtr = setup(); |
3140 | } |
3141 | |
3142 | Caffe2ModelLoader::Caffe2ModelLoader( |
3143 | const void *model, uint32_t modelSize, uint32_t weightsCount, |
3144 | const onnxTensorDescriptorV1 *weightDescriptors, Module &mod, |
3145 | llvm::StringRef funNamePrefix, runtime::PrePartitionedConfig *PPC, |
3146 | Error *errPtr, bool constFoldInLoader, |
3147 | OriginNameToTQPMap *originNameToTQPMap, bool loadUniquedDummyQParams, |
3148 | bool zeroScaleFP16Clip, bool clipQuantRangeToFP16) |
3149 | : CommonOperatorLoader({}, {}, mod, errPtr, |
3150 | /* loadIntoExistingModule */ false, |
3151 | originNameToTQPMap, loadUniquedDummyQParams, |
3152 | /* replaceDummyTQPs */ false, zeroScaleFP16Clip, |
3153 | clipQuantRangeToFP16) { |
3154 | // if errPtr already contains an error then don't continue with constructor |
3155 | if (errPtr && *errPtr) { |
3156 | return; |
3157 | } |
3158 | |
3159 | // Always override the default for folding in this constructor. |
3160 | constFoldInLoader_ = constFoldInLoader; |
3161 | |
3162 | // Lambda to setup the Caffe2ModelLoader and return any Errors that were |
3163 | // raised. |
3164 | auto setup = [&]() -> Error { |
3165 | caffe2::NetDef networkDef; |
3166 | ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProto(model, modelSize)); |
3167 | |
3168 | ArgumentDictionaryTy dict = loadArgumentMap(networkDef); |
3169 | |
3170 | std::unordered_set<std::string> initializers; |
3171 | if (dict.count("initializers" )) { |
3172 | const auto &strings = dict.at("initializers" )->strings(); |
3173 | for (const auto &s : strings) { |
3174 | initializers.insert(s); |
3175 | } |
3176 | } |
3177 | |
3178 | RETURN_IF_ERR(loadWeights(weightsCount, weightDescriptors)); |
3179 | |
3180 | RETURN_IF_ERR(loadInputs(networkDef, initializers)); |
3181 | |
3182 | // Identify primary input sequence |
3183 | std::unordered_set<std::string> weights; |
3184 | for (uint32_t i = 0; i < weightsCount; ++i) { |
3185 | weights.emplace(weightDescriptors[i].name); |
3186 | } |
3187 | for (const auto &input : networkDef.external_input()) { |
3188 | if (!weights.count(input)) { |
3189 | positionalInputNames_.emplace_back(input); |
3190 | } |
3191 | } |
3192 | for (const auto &output : networkDef.external_output()) { |
3193 | positionalOutputNames_.emplace_back(output); |
3194 | } |
3195 | |
3196 | return initWithModule(networkDef, funNamePrefix, PPC); |
3197 | }; |
3198 | |
3199 | if (errPtr) { |
3200 | *errPtr = setup(); |
3201 | } else { |
3202 | EXIT_ON_ERR(setup()); |
3203 | } |
3204 | } |
3205 | |