1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "glow/Importer/Caffe2ModelLoader.h"
18#include "glow/Base/Tensor.h"
19#include "glow/Graph/Graph.h"
20#include "glow/Graph/Nodes.h"
21#include "glow/Runtime/RuntimeTypes.h"
22#include "glow/Support/Error.h"
23
24#include "llvm/Support/Casting.h"
25
26#include "caffe2/proto/caffe2.pb.h"
27#include <google/protobuf/io/coded_stream.h>
28#include <google/protobuf/io/zero_copy_stream_impl.h>
29
30#include <cstddef>
31#include <cstdint>
32#include <fstream>
33#include <iostream>
34#include <string>
35#include <vector>
36
37using namespace glow;
38using llvm::cast;
39
40using ArgumentDictionaryTy =
41 std::unordered_map<std::string, const caffe2::Argument *>;
42
43namespace glow {
44/// Template specialization of loadOperatorName for caffe2.
45template <>
46std::string
47loadOperatorName<caffe2::OperatorDef>(const caffe2::OperatorDef &op) {
48 if (op.name().length()) {
49 return op.name();
50 }
51 if (op.output_size() > 0) {
52 return op.output(0);
53 }
54 return op.type();
55}
56
57// FIXME: this is a temporary solution for the case when NonZero returns
58// -2^31 as the boundary for the returned indices. For examples, currently
59// we get this NonZero([0, 1, 1, 0, 0]) -> [1, 2, -2^31, 0, 0], because the
60// shapes are static. This function makes sure that the output looks like
61// [1, 2, -1, -1, -1] which is more convenient for now.
62// The logic is we get [1, 2, -2^31, 0, 0], then we convert to [0, 0, 1, 0, 0]
63// by finding negative element, then do cumsum so we get [0, 0, 1, 1, 1],
64// then whenever we see 0, we use original value and when we see 1 we use -1,
65// so it becomes [1, 2, -1, -1, -1].
66Node *fixNonZero(Function *F, Module &mod, const std::string opName,
67 NodeValue node) {
68 auto zeroes = F->createSplat(opName + ".fixNZ.zeroes", node.getType(), 0);
69 auto floatTy = mod.uniqueType(ElemKind::Float16Ty, node.dims());
70 auto minusOnesFloat =
71 F->createSplat(opName + ".fixNZ.minusOnesFloat", floatTy, -1);
72 auto zeroesFloat = F->createSplat(opName + ".fixNZ.zeroesFloat", floatTy, 0);
73 auto onesFloat = F->createSplat(opName + ".fixNZ.onesFloat", floatTy, 1);
74 auto nodeFloat = F->createConvertTo(opName + ".fixNZ.float", node, floatTy);
75
76 // If there is a boundary, it will be marked as true.
77 auto isNegBool = F->createCmpLT(opName + ".fixNZ.isNegBool", node, zeroes);
78 auto isNegFloat = F->createSelect(opName + ".fixNZ.isNegFloat", isNegBool,
79 onesFloat, zeroesFloat);
80 auto isNegInt = F->createConvertTo(opName + ".fixNZ.isNegInt", isNegFloat,
81 node.getType());
82
83 // After applying cumsum every element before boundary will be 0
84 // and starting from boundary will be 1.
85 auto cumSum = F->createCumSum(opName + ".fixNZ.cumSum", isNegInt, 0);
86
87 auto isAfterBoundary =
88 F->createCmpGT(opName + ".fixNZ.isAfterBoundary", cumSum, zeroes);
89
90 auto withMinusOnesFloat =
91 F->createSelect(opName + ".fixNZ.withMinusOnesFloat", isAfterBoundary,
92 minusOnesFloat, nodeFloat);
93
94 auto withMinusOnesInt = F->createConvertTo(
95 opName + ".fixNZ.withMinusOnesInt", withMinusOnesFloat, node.getType());
96
97 return withMinusOnesInt;
98}
99}; // namespace glow
100
101/// Legacy padding modes supported in caffe2. These are used by MaxPool
102/// operators, and are defined in caffe2_legacy.proto in the caffe2 source
103/// tree.
104enum LegacyPaddingMode { NOTSET, VALID, SAME, CAFFE_LEGACY_POOLING, N_MODES };
105
106/// Creates tensor \p T from the input \p in. Note, there is no data associated
107/// with the Tensor. This method makes sure that the tensor is created with the
108/// proper shape and element type.
109Expected<LoadWeightResult>
110Caffe2ModelLoader::createAndSetTensorType(const caffe2::TensorProto &in) {
111 std::vector<dim_t> dim;
112 for (auto d : in.dims()) {
113 if (d == 0) {
114 return MAKE_ERR("0 dimension is not supported");
115 }
116 dim.push_back(d);
117 }
118
119 LoadWeightResult result;
120 result.t = glow::make_unique<Tensor>();
121
122 if (in.data_type() == caffe2::TensorProto::FLOAT) {
123 result.t->reset(ElemKind::FloatTy, dim);
124 } else if (in.data_type() == caffe2::TensorProto::FLOAT16) {
125 result.t->reset(ElemKind::Float16Ty, dim);
126 } else if (in.data_type() == caffe2::TensorProto::INT32) {
127 result.t->reset(ElemKind::Int32ITy, dim);
128 } else if (in.data_type() == caffe2::TensorProto::INT64) {
129 result.t->reset(ElemKind::Int64ITy, dim);
130 } else if (in.data_type() == caffe2::TensorProto::UINT8) {
131 result.t->reset(ElemKind::UInt8QTy, dim, 1.0, 0);
132 } else if (in.data_type() == caffe2::TensorProto::INT8) {
133 result.t->reset(ElemKind::Int8QTy, dim, 1.0, 0);
134 } else {
135 return MAKE_ERR(
136 strFormat("FP32/16, Int32/64, Int8/Uint8 are supported. Got type"
137 " %s for tensor %s.",
138 caffe2::TensorProto_DataType_Name(in.data_type()).c_str(),
139 in.name().c_str()));
140 }
141
142 return Expected<LoadWeightResult>(std::move(result));
143}
144
145Expected<LoadWeightResult>
146Caffe2ModelLoader::createAndSetTensorType(const caffe2::QTensorProto &in) {
147 std::vector<dim_t> dim;
148 for (auto d : in.dims()) {
149 if (d == 0) {
150 return MAKE_ERR("0 dimension qtensor is not supported");
151 }
152 dim.push_back(d);
153 }
154
155 if (in.axis() != 1) {
156 return MAKE_ERR("axis must be 1");
157 }
158
159 dim_t qparams = static_cast<dim_t>(in.scales().size());
160
161 RETURN_ERR_IF_NOT(qparams > 0, "No qparams found");
162
163 RETURN_ERR_IF_NOT(in.biases().size() == in.scales().size(),
164 "Found a different number of biases and scales");
165
166 LoadWeightResult result;
167 result.t = glow::make_unique<Tensor>();
168
169 float scale = 1.0;
170 int32_t offset = 0;
171
172 // If only one set of qparams is present then use them, otherwise load the
173 // multiple sets of qparams as separate tensors and use the default qparams
174 // for the main tensor result.t.
175 // TODO: should we check is_multiparam?
176 if (qparams == 1) {
177 scale = in.scales(0);
178 offset = in.biases(0);
179 } else {
180 RETURN_ERR_IF_NOT(!originNameToTQPMap_,
181 "Unsupported loading of uniqued qparams for vector of "
182 "scales/biases for " +
183 in.name());
184 result.scales = glow::make_unique<Tensor>(ElemKind::FloatTy,
185 llvm::makeArrayRef({qparams}));
186 result.offsets = glow::make_unique<Tensor>(ElemKind::Int32ITy,
187 llvm::makeArrayRef({qparams}));
188
189 auto scalesH = result.scales->getHandle<float>();
190 auto offsetsH = result.offsets->getHandle<int32_t>();
191 for (size_t i = 0; i < qparams; ++i) {
192 scalesH.raw(i) = in.scales(i);
193 offsetsH.raw(i) = in.biases(i);
194 }
195 }
196
197 if (in.data_type() == caffe2::TensorProto::INT8) {
198 TypeRef outTy;
199 ASSIGN_VALUE_OR_RETURN_ERR(
200 outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int8QTy, dim,
201 scale, offset,
202 /* shiftUInt8ToInt8 */ false));
203 result.t->reset(*outTy);
204 } else if (in.data_type() == caffe2::TensorProto::UINT8) {
205 TypeRef outTy;
206 ASSIGN_VALUE_OR_RETURN_ERR(
207 outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int8QTy, dim,
208 scale, offset));
209 result.t->reset(*outTy);
210 } else if (in.data_type() == caffe2::TensorProto::INT32) {
211 TypeRef outTy;
212 ASSIGN_VALUE_OR_RETURN_ERR(
213 outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int32QTy, dim,
214 scale, offset));
215 result.t->reset(*outTy);
216 } else {
217 return MAKE_ERR("Only int8, uint8, and int32 qtensors are supported");
218 }
219
220 return Expected<LoadWeightResult>(std::move(result));
221}
222
223/// Translates the protocol buffer node \p op into a random access map.
224template <typename T> static ArgumentDictionaryTy loadArgumentMap(const T &t) {
225 ArgumentDictionaryTy dict;
226 for (auto &arg : t.arg()) {
227 dict[arg.name()] = &arg;
228 }
229 return dict;
230}
231
232static Expected<std::vector<unsigned_t>> getPads(ArgumentDictionaryTy &dict) {
233 if (dict.count("pad")) {
234 int pad;
235 ASSIGN_VALUE_OR_RETURN_ERR(pad, loadInt(dict.at("pad")));
236 std::vector<unsigned_t> pads(4, pad);
237 return pads;
238 }
239 if (dict.count("pad_t")) {
240 std::vector<unsigned_t> pads(4);
241 ASSIGN_VALUE_OR_RETURN_ERR(pads[0], loadInt(dict.at("pad_t")));
242 RETURN_ERR_IF_NOT(dict.count("pad_l"), "missing pad_l");
243 ASSIGN_VALUE_OR_RETURN_ERR(pads[1], loadInt(dict.at("pad_l")));
244 RETURN_ERR_IF_NOT(dict.count("pad_b"), "missing pad_b");
245 ASSIGN_VALUE_OR_RETURN_ERR(pads[2], loadInt(dict.at("pad_b")));
246 RETURN_ERR_IF_NOT(dict.count("pad_r"), "missing pad_r");
247 ASSIGN_VALUE_OR_RETURN_ERR(pads[3], loadInt(dict.at("pad_r")));
248 return pads;
249 }
250 if (dict.count("pads")) {
251 std::vector<unsigned_t> shape;
252 ASSIGN_VALUE_OR_RETURN_ERR(shape, getShape<unsigned_t>(dict["pads"]));
253 return shape;
254 }
255 // Return default value 0 for pads.
256 return std::vector<unsigned_t>{0, 0, 0, 0};
257}
258
259/// Translates the "order" field of dictionary \p dict into a channel number.
260static Expected<unsigned_t> getChannel(ArgumentDictionaryTy &dict) {
261 std::string order = "NCHW"; // default
262 auto orderIt = dict.find("order");
263 if (orderIt != dict.end()) {
264 ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(orderIt->second));
265 }
266 if (order == "NHWC") {
267 return 3;
268 } else if (order == "NCHW") {
269 return 1;
270 }
271 return MAKE_ERR("Invalid order field");
272}
273
274static Expected<std::vector<unsigned_t>> getSizeHW(ArgumentDictionaryTy &dict,
275 const std::string &name,
276 unsigned_t defaultValue) {
277 if (dict.count(name)) {
278 int value;
279 ASSIGN_VALUE_OR_RETURN_ERR(value, loadInt(dict[name]));
280 std::vector<unsigned_t> result(2, value);
281 return result;
282 }
283 if (dict.count(name + "_h") && dict.count(name + "_w")) {
284 std::vector<unsigned_t> result(2);
285 ASSIGN_VALUE_OR_RETURN_ERR(result[0], loadInt(dict[name + "_h"]));
286 ASSIGN_VALUE_OR_RETURN_ERR(result[1], loadInt(dict[name + "_w"]));
287 return result;
288 }
289 if (dict.count(name + "s")) {
290 return getShape<unsigned_t>(dict[name + "s"]);
291 }
292 return std::vector<unsigned_t>{defaultValue, defaultValue};
293}
294
295Expected<caffe2::NetDef>
296Caffe2ModelLoader::loadProtoFile(const std::string &filename) {
297 std::ifstream ff(filename, std::ios::in | std::ios::binary);
298 RETURN_ERR_IF_NOT(ff,
299 strFormat("Can't find the model or network files for %s",
300 filename.c_str()));
301 caffe2::NetDef net;
302
303 bool parseNet = false;
304 if (filename.find(".pbtxt") != std::string::npos) {
305 std::string str((std::istreambuf_iterator<char>(ff)),
306 std::istreambuf_iterator<char>());
307 parseNet = google::protobuf::TextFormat::ParseFromString(str, &net);
308 } else {
309 // Construct and configure a Coded Input Stream
310 google::protobuf::io::IstreamInputStream filestr(&ff);
311 google::protobuf::io::CodedInputStream codedstr(&filestr);
312 // Don't warn about large file sizes.
313#if GOOGLE_PROTOBUF_VERSION >= 3002000
314 codedstr.SetTotalBytesLimit(MAX_PROTO_SIZE);
315#else
316 codedstr.SetTotalBytesLimit(MAX_PROTO_SIZE, MAX_PROTO_SIZE);
317#endif
318 parseNet = net.ParseFromCodedStream(&codedstr);
319 }
320
321 RETURN_ERR_IF_NOT(parseNet, "Failed to parse the network descriptor.");
322 return net;
323}
324
325Expected<caffe2::NetDef> Caffe2ModelLoader::loadProto(const void *c2Model,
326 size_t c2ModelSize) {
327 google::protobuf::io::ArrayInputStream arrayStream(c2Model, c2ModelSize);
328 // Construct and configure a Coded Input Stream
329 google::protobuf::io::CodedInputStream codedStream(&arrayStream);
330
331 // Don't warn about large file sizes.
332#if GOOGLE_PROTOBUF_VERSION >= 3002000
333 codedStream.SetTotalBytesLimit(MAX_PROTO_SIZE);
334#else
335 codedStream.SetTotalBytesLimit(MAX_PROTO_SIZE, MAX_PROTO_SIZE);
336#endif
337 caffe2::NetDef MP;
338 bool parseNet = MP.ParseFromCodedStream(&codedStream);
339 RETURN_ERR_IF_NOT(parseNet, "Failed to parse NetDef");
340 return MP;
341}
342
343Expected<bool> Caffe2ModelLoader::getBroadcast(ArgumentDictionaryTy &dict) {
344 if (!dict.count("broadcast")) {
345 return false;
346 }
347 int broadcast;
348 ASSIGN_VALUE_OR_RETURN_ERR(broadcast, loadInt(dict.at("broadcast")));
349 return broadcast == 1;
350}
351
352bool Caffe2ModelLoader::hasMultidirectionalBroadcast(
353 const llvm::StringRef typeName) {
354 (void)typeName;
355 return false;
356}
357
358const std::string Caffe2ModelLoader::opErrMsg(const caffe2::OperatorDef &op,
359 const std::string &errMsg) {
360 const std::string &opName = loadOperatorName(op);
361 return strFormat(" [Operator-'%s'] : %s ", opName.c_str(), errMsg.c_str());
362}
363
364// Caffe2 PRelu
365// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/prelu_op.cc
366Error Caffe2ModelLoader::loadPRelu(const caffe2::OperatorDef &op,
367 ArgumentDictionaryTy &dict) {
368 const std::string &opName = loadOperatorName(op);
369
370 NodeValue in;
371 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
372
373 NodeValue slope;
374 ASSIGN_VALUE_OR_RETURN_ERR(slope, getNodeValueByName(op.input(1)));
375
376 // Do broadcasting.
377 auto targetDim = in.dims();
378 // Set the axis assuming i/p is of NCHW format.
379 int axis = 1;
380 auto *finalSlope = G_->createBroadcast(opName, slope, targetDim, axis);
381 auto *R = G_->createPRELU(opName, in, finalSlope);
382 RETURN_IF_ERR(addNodeAsOutput(op, R));
383 return Error::success();
384}
385
386Error Caffe2ModelLoader::loadSoftmax(const caffe2::OperatorDef &op,
387 ArgumentDictionaryTy &dict) {
388 const std::string &opName = loadOperatorName(op);
389
390 NodeValue in;
391 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
392
393 RETURN_ERR_IF_NOT(
394 in.dims().size() >= 2,
395 opErrMsg(op,
396 strFormat(
397 "SoftMax input dims must be >= 2, but found input dims %zu ",
398 in.dims().size())));
399
400 // Create a constant to store labels to be used in SoftMaxGradNode.
401 auto *selected = G_->createSplat(
402 opName + ".selected",
403 mod_.uniqueType(ElemKind::Int64ITy, {in.dims()[0], 1}), 0.f);
404
405 int axis = 1;
406 if (dict.count("axis")) {
407 ASSIGN_VALUE_OR_RETURN_ERR(axis,
408 loadAxis<int>(dict["axis"], in.dims().size()));
409 }
410
411 auto *FN = G_->createFlatten(opName + ".reshapeInput", in, axis);
412 auto *SM = G_->createSoftMax(opName, FN, selected);
413
414 // The output should have the same shape as the original input.
415 auto origInDims = in.getType()->dims();
416 auto *RN = G_->createReshape(opName + ".reshapeOutput", SM, origInDims);
417 RETURN_IF_ERR(addNodeAsOutput(op, RN));
418 return Error::success();
419}
420
421Error Caffe2ModelLoader::loadConv(const caffe2::OperatorDef &op,
422 ArgumentDictionaryTy &dict) {
423 const std::string &opName = loadOperatorName(op);
424
425 // Load the inputs:
426 std::vector<unsigned_t> strides;
427 ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", 1));
428 std::vector<unsigned_t> pads;
429 ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
430 std::vector<unsigned_t> kernels;
431 ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", 0));
432 unsigned_t group = 1;
433 if (dict.count("group")) {
434 ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
435 }
436 std::string order = "NCHW";
437 if (dict.count("order")) {
438 ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
439 }
440 std::vector<unsigned_t> dilations;
441 ASSIGN_VALUE_OR_RETURN_ERR(dilations,
442 getDilations(dict, std::vector<unsigned_t>{1, 1}));
443
444 NodeValue in;
445 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
446
447 NodeValue w;
448 ASSIGN_VALUE_OR_RETURN_ERR(w, getConstantByName(op.input(1)));
449
450 // Transpose the weights to the right format. Glow expects to read the
451 // weights in the format CRSK.
452 // C - output_depth, R - filter_height, S - filter_width, K - input_depth.
453 // Caffe2 "Conv" op always stores the weight as CKRS.
454 w = G_->createTranspose(w.getNode()->getName().str() + "_NHWC", w, NCHW2NHWC,
455 "NHWC");
456
457 // The structure of the conv weights is: CRSK. We take the C, which is the
458 // number of filters. We use this value to calculate the size of the bias
459 // if it is not specified.
460 dim_t depth = w.dims()[0];
461
462 // We expect the input to be NHWC.
463 NodeValue finalIn;
464 if (order == "NCHW") {
465 finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
466 } else {
467 finalIn = in;
468 }
469
470 TypeRef finalInType = finalIn.getType();
471
472 // Calculate the size and allocate the output buffer.
473 ShapeNHWC idim = ShapeNHWC(finalInType->dims());
474 auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides,
475 pads, dilations);
476 std::array<dim_t, 4> outDims = {{idim.n, outSz.first, outSz.second, depth}};
477
478 // Try to find a loaded bias constant.
479 NodeValue bias(nullptr);
480 if (op.input_size() > 2) {
481 const auto &biasName = op.input(2);
482 bias = getConstantByNameOrNull(biasName);
483 }
484 // Construct the bias constant if one wasn't found.
485 if (!bias.getNode()) {
486 TypeRef bTy = mod_.uniqueType(ElemKind::FloatTy, {depth});
487 bias = G_->createSplat(opName + ".bias", bTy, 0.f);
488 }
489
490 TypeRef outTy = mod_.uniqueType(ElemKind::FloatTy, outDims);
491
492 Node *node = G_->createConv(opName, finalIn, w, bias, outTy, kernels, strides,
493 pads, group, dilations);
494 if (op.type() == "ConvRelu") {
495 node = G_->createRELU(opName + ".relu", node);
496 }
497 if (order == "NCHW") {
498 // Transpose the output back.
499 node = G_->createTranspose(opName, node, NHWC2NCHW);
500 }
501 RETURN_IF_ERR(addNodeAsOutput(op, node));
502 return Error::success();
503}
504
505Error Caffe2ModelLoader::loadConvQuantized(const caffe2::OperatorDef &op,
506 ArgumentDictionaryTy &dict) {
507 const std::string &opName = loadOperatorName(op);
508
509 // Load the inputs:
510 std::vector<unsigned_t> strides;
511 ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", 1));
512 std::vector<unsigned_t> pads;
513 ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
514 std::vector<unsigned_t> kernels;
515 ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", 0));
516 unsigned_t group = 1;
517 if (dict.count("group")) {
518 ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
519 }
520 std::string order = "NCHW";
521 if (dict.count("order")) {
522 ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
523 }
524 bool quantizeGroupwise = false;
525 if (dict.count("quantize_groupwise")) {
526 ASSIGN_VALUE_OR_RETURN_ERR(quantizeGroupwise,
527 loadInt(dict["quantize_groupwise"]));
528 }
529 std::vector<unsigned_t> dilations;
530 ASSIGN_VALUE_OR_RETURN_ERR(dilations,
531 getDilations(dict, std::vector<unsigned_t>{1, 1}));
532
533 // Group quantization only applies if there is more than one group.
534 quantizeGroupwise &= group > 1;
535
536 NodeValue in;
537 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
538
539 NodeValue w;
540 ASSIGN_VALUE_OR_RETURN_ERR(w, getConstantByName(op.input(1)));
541
542 // Transpose the weights to the right format. Glow expects to read the
543 // weights in the format CRSK.
544 // C - output_depth, R - filter_height, S - filter_width, K - input_depth.
545 // For Caffe2 "Int8Conv" and "Int8ConvRelu", the weights always follows the
546 // "order" arg.
547 if (order != "NHWC") {
548 w = G_->createTranspose(w.getNode()->getName().str() + "_NHWC", w,
549 NCHW2NHWC, "NHWC");
550 }
551
552 // The structure of the conv weights is: CRSK. We take the C, which is the
553 // number of filters. We use this value to calculate the size of the bias
554 // if it is not specified.
555 dim_t depth = w.dims()[0];
556
557 // We expect the input to be NHWC.
558 NodeValue finalIn;
559 if (order == "NCHW") {
560 finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
561 } else {
562 finalIn = in;
563 }
564
565 TypeRef finalInType = finalIn.getType();
566
567 // Calculate the size and allocate the output buffer.
568 ShapeNHWC idim = ShapeNHWC(finalInType->dims());
569 auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides,
570 pads, dilations);
571 std::array<dim_t, 4> outDims = {{idim.n, outSz.first, outSz.second, depth}};
572
573 TypeRef outTy;
574
575 RETURN_ERR_IF_NOT(dict.count("Y_zero_point"),
576 opErrMsg(op,
577 "ConvQuantized "
578 "missing zero point for quantized output type"));
579 RETURN_ERR_IF_NOT(dict.count("Y_scale"),
580 opErrMsg(op, "ConvQuantized "
581 "missing Y_scale for quantized output type"));
582
583 // Try to find a loaded bias constant.
584 NodeValue bias(nullptr);
585 if (op.input_size() > 2) {
586 const auto &biasName = op.input(2);
587 bias = getConstantByNameOrNull(biasName);
588 }
589 // Construct the bias constant if one wasn't found.
590 if (!bias.getNode()) {
591 TypeRef bTy = mod_.uniqueType(ElemKind::Int32QTy, {depth}, 1.0, 0);
592 bias = G_->createSplat(opName + "_conv.bias", bTy, 0.f);
593 }
594
595 RETURN_ERR_IF_NOT(
596 bias.getType()->size() == depth,
597 opErrMsg(op, strFormat("Loaded bias tensor of incorrect size %d ",
598 int(bias.getType()->size()))));
599
600 // Construct output type
601 ASSIGN_VALUE_OR_RETURN_ERR(
602 outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
603
604 Node *node;
605
606 if (quantizeGroupwise) {
607 auto wScalesName = strFormat("%s_loaded_scales", op.input(1).c_str());
608 auto wOffsetsName = strFormat("%s_loaded_offsets", op.input(1).c_str());
609 Constant *wScales;
610 Constant *wOffsets;
611 ASSIGN_VALUE_OR_RETURN_ERR(wScales, getConstantByName(wScalesName));
612 ASSIGN_VALUE_OR_RETURN_ERR(wOffsets, getConstantByName(wOffsetsName));
613
614 // Quantize the filter automatically (only if it is float). The bias is NOT
615 // quantized automatically and is left at the disposal of each Backend to
616 // quantize it later using custom logic.
617 node = G_->createChannelwiseQuantizedConv(
618 opName, finalIn, w, bias, wScales, wOffsets, /* biasScales */ nullptr,
619 /* biasOffsets */ nullptr, outTy, kernels, strides, pads, group,
620 dilations, /* quantizeFilter */ true, /* quantizeBias */ false);
621 } else {
622 // If the bias isn't quantized for a non group quantized conv, quantize it.
623 if (bias.getElementType() == ElemKind::FloatTy) {
624 int32_t biasOffset = 0;
625 float biasScale = finalInType->getScale() * w.getType()->getScale();
626
627 auto biasTy = mod_.uniqueType(ElemKind::Int32QTy, bias.dims(), biasScale,
628 biasOffset);
629 bias = G_->createQuantize(opName + "_conv.bias", bias, biasTy);
630 }
631
632 node = G_->createConv(opName, finalIn, w, bias, outTy, kernels, strides,
633 pads, group, dilations);
634 }
635
636 if (op.type() == "Int8ConvRelu") {
637 node = G_->createRELU(opName + ".relu", node);
638 }
639
640 if (order == "NCHW") {
641 // Transpose the output back.
642 node = G_->createTranspose(opName, node, NHWC2NCHW);
643 }
644 RETURN_IF_ERR(addNodeAsOutput(op, node));
645 return Error::success();
646}
647
648Error Caffe2ModelLoader::loadLayerNorm(const caffe2::OperatorDef &op,
649 ArgumentDictionaryTy &dict) {
650 const std::string &opName = loadOperatorName(op);
651
652 NodeValue in;
653 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
654
655 unsigned_t axis = 1; // Caffe2 default.
656 if (dict.count("axis")) {
657 ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
658 ASSIGN_VALUE_OR_RETURN_ERR(axis,
659 getPositiveAxis<int>(axis, in.dims().size()));
660 }
661
662 // Feature shape is based on the input dims, from the axis to the end.
663 ShapeVector featDims;
664 for (dim_t i = axis, e = in.dims().size(); i < e; ++i) {
665 featDims.push_back(in.dims()[i]);
666 }
667 TypeRef featTy = mod_.uniqueTypeWithNewShape(in.getType(), featDims);
668
669 NodeValue weight, bias;
670 if (op.input_size() > 1) {
671 RETURN_ERR_IF_NOT(op.input_size() == 3,
672 opErrMsg(op, "Must have both weight and bias"));
673
674 ASSIGN_VALUE_OR_RETURN_ERR(weight, getNodeValueByName(op.input(1)));
675 RETURN_ERR_IF_NOT(weight.getType() == featTy,
676 opErrMsg(op, "Invalid weight shape"));
677
678 ASSIGN_VALUE_OR_RETURN_ERR(bias, getNodeValueByName(op.input(2)));
679 RETURN_ERR_IF_NOT(bias.getType() == featTy,
680 opErrMsg(op, "Invalid bias shape"));
681 } else {
682 // Caffe2 default to use weight 1 and bias 0.
683 weight = G_->createSplat(opName + "_weight_ones", featTy, 1.0)->getResult();
684 bias = G_->createSplat(opName + "_bias_zeros", featTy, 0.0)->getResult();
685 }
686
687 float eps = 0.001; // Caffe2 default.
688 if (dict.count("epsilon")) {
689 ASSIGN_VALUE_OR_RETURN_ERR(eps, loadFloat(dict["epsilon"]));
690 }
691
692 LayerNormalizationNode *node =
693 G_->createLayerNormalization(opName, in.getType(), in, weight, bias, eps);
694
695 // We only support one output for LayoutNorm. Ignoring the
696 // rest of the outputs.
697 RETURN_IF_ERR(addNodeAsOutput(op, node, /* numOutputs */ 1));
698
699 return Error::success();
700}
701
702Expected<bool> Caffe2ModelLoader::foldOperator(const caffe2::OperatorDef &op) {
703 const unsigned numInputs = op.input_size();
704 const std::string &typeName = op.type();
705 llvm::SmallVector<NodeValue, 4> inputs;
706 inputs.reserve(numInputs);
707 for (unsigned i = 0; i < numInputs; i++) {
708 NodeValue in;
709 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i)));
710 inputs.push_back(in);
711 }
712
713 if (!isConstantFoldable(inputs, typeName)) {
714 return false;
715 }
716
717 // Create a temporary lightweight loader to construct function representing
718 // current Op, and then constant fold the function using Interp backend.
719 Function *tmpF = mod_.createFunction("eval_const_fold__");
720 Caffe2ModelLoader tmpLoader(*tmpF, nullptr);
721 bool foldStatus =
722 !ERR_TO_BOOL(constantFoldInLoader<Caffe2ModelLoader, caffe2::OperatorDef>(
723 tmpF, tmpLoader, this, op),
724 /* log */ false);
725 mod_.eraseFunction(tmpF);
726 return foldStatus;
727}
728
729Error Caffe2ModelLoader::loadConvTranspose(const caffe2::OperatorDef &op,
730 ArgumentDictionaryTy &dict) {
731 const std::string &opName = loadOperatorName(op);
732
733 // Load the inputs:
734 std::vector<unsigned_t> strides;
735 ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", 1));
736 std::vector<unsigned_t> pads;
737 ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
738 std::vector<unsigned_t> kernels;
739 ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", 0));
740 unsigned_t group = 1;
741 if (dict.count("group")) {
742 ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
743 }
744 std::string order = "NCHW";
745 if (dict.count("order")) {
746 ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
747 }
748 std::vector<unsigned_t> dilations;
749 ASSIGN_VALUE_OR_RETURN_ERR(dilations,
750 getDilations(dict, std::vector<unsigned_t>{1, 1}));
751
752 NodeValue in;
753 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
754
755 NodeValue weight;
756 ASSIGN_VALUE_OR_RETURN_ERR(weight, getConstantByName(op.input(1)));
757
758 // Transpose the weights to the right format. Glow expects to read the
759 // weights in the format CRSK.
760 // C - output_depth, R - filter_height, S - filter_width, K - input_depth.
761 // Caffe2 "ConvTranspose" op always stores the weight as KCRS.
762 weight = G_->createTranspose(weight.getNode()->getName().str() + "_NHWC",
763 weight, CNHW2NHWC, "NHWC");
764
765 // The structure of the conv weights is: CRSK. We take the C, which is the
766 // number of filters. We use this value to calculate the size of the bias
767 // if it is not specified.
768 dim_t depth = weight.dims()[0];
769
770 // We expect the input to be NHWC.
771 NodeValue finalIn;
772 if (order == "NCHW") {
773 finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
774 } else {
775 finalIn = in;
776 }
777
778 TypeRef finalInType = finalIn.getType();
779
780 // Calculate the size and allocate the output buffer.
781 ShapeNHWC idim = ShapeNHWC(finalInType->dims());
782 auto outSz = calculateConvTransposeOutputDims(idim.h, idim.w, kernels,
783 strides, pads, dilations);
784 std::array<dim_t, 4> outDims = {{idim.n, outSz.first, outSz.second, depth}};
785
786 // Try to find a loaded bias constant.
787 NodeValue bias(nullptr);
788 if (op.input_size() > 2) {
789 const auto &biasName = op.input(2);
790 bias = getConstantByNameOrNull(biasName);
791 }
792 // Construct the bias constant if one wasn't found.
793 if (!bias.getNode()) {
794 TypeRef bTy = mod_.uniqueType(ElemKind::FloatTy, {depth});
795 bias = G_->createSplat(opName + "_conv.bias", bTy, 0.f);
796 }
797
798 TypeRef outTy = mod_.uniqueType(ElemKind::FloatTy, outDims);
799
800 Node *node =
801 G_->createConvTranspose(opName, finalIn, weight, bias, outTy, kernels,
802 strides, pads, group, dilations);
803
804 if (order == "NCHW") {
805 // Transpose the output back.
806 node = G_->createTranspose(opName, node, NHWC2NCHW);
807 }
808 RETURN_IF_ERR(addNodeAsOutput(op, node));
809 return Error::success();
810}
811
812Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
813 ArgumentDictionaryTy dict = loadArgumentMap(op);
814 const std::string &typeName = op.type();
815 mod_.registerOriginalName(op.name());
816
817 // Check if operator is supported in parent class, CommonOperatorLoader.
818 bool loadCommonOperatorSuccess;
819 ASSIGN_VALUE_OR_RETURN_ERR(loadCommonOperatorSuccess,
820 tryLoadCommonOperator(typeName, op, dict));
821 if (loadCommonOperatorSuccess) {
822 return Error::success();
823 }
824 const std::string &opName = loadOperatorName(op);
825
826 if (typeName == "Gelu") {
827 NodeValue in;
828 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
829 Node *node = G_->createGelu(opName, in);
830
831 RETURN_IF_ERR(addNodeAsOutput(op, node));
832 return Error::success();
833 }
834
835 if (typeName == "Conv" || typeName == "ConvRelu") {
836 return loadConv(op, dict);
837 }
838
839 if (typeName == "Softmax") {
840 return loadSoftmax(op, dict);
841 }
842
843 if (typeName == "PRelu") {
844 return loadPRelu(op, dict);
845 }
846
847 if (typeName == "ConvTranspose") {
848 return loadConvTranspose(op, dict);
849 }
850
851 if (typeName == "Int8Conv" || typeName == "Int8ConvRelu") {
852 return loadConvQuantized(op, dict);
853 }
854
855 if (typeName == "LayerNorm") {
856 return loadLayerNorm(op, dict);
857 }
858
859 if (typeName == "Int8SumRelu") {
860 RETURN_ERR_IF_NOT(op.input_size() == 2,
861 opErrMsg(op, "Only Sum of 2 inputs is supported."));
862 RETURN_ERR_IF_NOT(
863 dict.count("Y_zero_point"),
864 opErrMsg(op, "missing zero point for quantized outout type"));
865 RETURN_ERR_IF_NOT(
866 dict.count("Y_scale"),
867 opErrMsg(op, "missing Y_scale for quantized output type"));
868 NodeValue in0;
869 ASSIGN_VALUE_OR_RETURN_ERR(in0, getNodeValueByName(op.input(0)));
870 NodeValue in1;
871 ASSIGN_VALUE_OR_RETURN_ERR(in1, getNodeValueByName(op.input(1)));
872 auto outDims = in0.getType()->dims();
873 TypeRef outTy;
874 ASSIGN_VALUE_OR_RETURN_ERR(
875 outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
876 auto *add = G_->createAdd(opName + ".sum", outTy, in0, in1);
877 auto *relu = G_->createRELU(opName + ".relu", add);
878 RETURN_IF_ERR(addNodeAsOutput(op, relu));
879 return Error::success();
880 }
881
882 if (typeName == "Int8Relu") {
883 RETURN_ERR_IF_NOT(op.input_size() == 1,
884 opErrMsg(op, "Only one input is supported."));
885 RETURN_ERR_IF_NOT(
886 dict.count("Y_zero_point"),
887 opErrMsg(op, "missing zero point for quantized outout type"));
888 RETURN_ERR_IF_NOT(
889 dict.count("Y_scale"),
890 opErrMsg(op, "missing Y_scale for quantized output type"));
891 NodeValue in;
892 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
893 auto outDims = in.getType()->dims();
894 TypeRef outTy;
895 ASSIGN_VALUE_OR_RETURN_ERR(
896 outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
897 auto *relu = G_->createRELU(opName, in, outTy);
898 RETURN_IF_ERR(addNodeAsOutput(op, relu));
899 return Error::success();
900 }
901
902 if (typeName == "Int8Quantize") {
903 RETURN_ERR_IF_NOT(
904 op.input_size() == 1,
905 opErrMsg(op, "Glow only supports Int8Quantize with 1 input"));
906 RETURN_ERR_IF_NOT(
907 dict.count("Y_zero_point"),
908 opErrMsg(op, "missing zero point for quantized output type"));
909 RETURN_ERR_IF_NOT(
910 dict.count("Y_scale"),
911 opErrMsg(op, "missing Y_scale for quantized output type"));
912 NodeValue in;
913 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
914 auto outDims = in.getType()->dims();
915 TypeRef outTy;
916 ASSIGN_VALUE_OR_RETURN_ERR(
917 outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
918 Node *N = G_->createQuantize(opName, in, outTy);
919 RETURN_IF_ERR(addNodeAsOutput(op, N));
920 return Error::success();
921 }
922
923 if (typeName == "Int8Dequantize") {
924 NodeValue in;
925 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
926 auto *node = G_->createDequantize(opName, in, ElemKind::FloatTy);
927 RETURN_IF_ERR(addNodeAsOutput(op, node));
928 return Error::success();
929 }
930
931 if (typeName == "MaxPool" || typeName == "AveragePool" ||
932 typeName == "Int8MaxPool" || typeName == "Int8AveragePool") {
933 // Load the inputs:
934 NodeValue in;
935 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
936 std::vector<unsigned_t> strides;
937 ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", 1));
938 std::vector<unsigned_t> kernels;
939 ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", 0));
940 std::vector<unsigned_t> pads;
941 ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
942 bool countIncludePads;
943 ASSIGN_VALUE_OR_RETURN_ERR(
944 countIncludePads, getCountIncludePads(dict, /* defaultValue */ true));
945 std::string order = "NCHW";
946 if (dict.count("order")) {
947 ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
948 }
949 // We expect the input to be NHWC.
950 NodeValue finalIn;
951 if (order == "NCHW") {
952 finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
953 } else {
954 finalIn = in;
955 }
956
957 // If 'global_pooling' is set then the operation will pool over the size
958 // of the input by doing: kernels = {height, width}.
959 if (dict.count("global_pooling")) {
960 auto Ty = in.getType();
961 kernels[0] = Ty->dims()[2];
962 kernels[1] = Ty->dims()[3];
963 }
964
965 // Check the padding style.
966 if (dict.count("legacy_pad")) {
967 int mode;
968 ASSIGN_VALUE_OR_RETURN_ERR(mode, loadInt(dict["legacy_pad"]));
969 // Caffe1 (legacy) rounded-up and Caffe2 rounds down.
970 // This style is deprecated according to caffe2's caffe2_legacy.proto
971 // definition.
972 if (static_cast<LegacyPaddingMode>(mode) ==
973 LegacyPaddingMode::CAFFE_LEGACY_POOLING) {
974 return MAKE_ERR(opErrMsg(op,
975 "MaxPool nodes with legacy caffe padding are "
976 "deprecated and not supported."));
977 }
978 }
979
980 Node *node = nullptr;
981
982 if (typeName == "Int8MaxPool" || typeName == "Int8AveragePool") {
983 // Create the node with quantized type.
984 RETURN_ERR_IF_NOT(
985 dict.count("Y_zero_point"),
986 opErrMsg(op, "missing zero point for quantized output type"));
987 RETURN_ERR_IF_NOT(
988 dict.count("Y_scale"),
989 opErrMsg(op, "missing Y_scale for quantized output type"));
990
991 TypeRef finalInType = finalIn.getType();
992 ShapeNHWC idim = ShapeNHWC(finalInType->dims());
993 auto outSz =
994 calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads);
995 std::array<dim_t, 4> outDims = {
996 {idim.n, outSz.first, outSz.second, idim.c}};
997 if (typeName == "Int8MaxPool") {
998 // Int8Maxpool output quantization should be same as the input, so
999 // just ignore the given params.
1000 node = G_->createMaxPool(opName, finalIn, kernels, strides, pads);
1001 } else {
1002 TypeRef outTy;
1003 ASSIGN_VALUE_OR_RETURN_ERR(
1004 outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
1005 node = G_->createAvgPool(opName, finalIn, outTy, kernels, strides, pads,
1006 NHWC, countIncludePads);
1007 }
1008 } else if (typeName == "MaxPool") {
1009 node = G_->createMaxPool(opName, finalIn, kernels, strides, pads);
1010 } else {
1011 node = G_->createAvgPool(opName, finalIn, kernels, strides, pads, NHWC,
1012 countIncludePads);
1013 }
1014 if (order == "NCHW") {
1015 unsigned resIdx = 0;
1016 if (llvm::isa<MaxPoolNode>(node)) {
1017 resIdx = MaxPoolNode::ResultIdx;
1018 } else if (llvm::isa<AvgPoolNode>(node)) {
1019 resIdx = AvgPoolNode::ResultIdx;
1020 } else {
1021 return MAKE_ERR("Expected either Max or Avg Pool.");
1022 }
1023 // Transpose the output back.
1024 node = G_->createTranspose(opName, node->getNthResult(resIdx), NHWC2NCHW);
1025 }
1026 RETURN_IF_ERR(addNodeAsOutput(op, node));
1027 return Error::success();
1028 }
1029
1030 if (typeName == "SpatialBN") {
1031 NodeValue in;
1032 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1033 Constant *scale;
1034 ASSIGN_VALUE_OR_RETURN_ERR(scale, getConstantByName(op.input(1)));
1035 Constant *bias;
1036 ASSIGN_VALUE_OR_RETURN_ERR(bias, getConstantByName(op.input(2)));
1037 Constant *mean;
1038 ASSIGN_VALUE_OR_RETURN_ERR(mean, getConstantByName(op.input(3)));
1039 Constant *var;
1040 ASSIGN_VALUE_OR_RETURN_ERR(var, getConstantByName(op.input(4)));
1041 float epsilon = 1e-5f; // default
1042 auto epsilonIt = dict.find("epsilon");
1043 if (epsilonIt != dict.end()) {
1044 ASSIGN_VALUE_OR_RETURN_ERR(epsilon, loadFloat(epsilonIt->second));
1045 }
1046
1047 unsigned_t channel;
1048 ASSIGN_VALUE_OR_RETURN_ERR(channel, getChannel(dict));
1049 auto *node = G_->createBatchNormalization(
1050 opName, in.getType(), in, bias, scale, mean, var, channel, epsilon);
1051
1052 RETURN_IF_ERR(addNodeAsOutput(op, node));
1053 return Error::success();
1054 }
1055
1056 if (typeName == "Bucketize") {
1057 NodeValue in;
1058 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1059 RETURN_ERR_IF_NOT(
1060 dict.count("boundaries"),
1061 opErrMsg(op, "Bucketize: Expected a boundaries member vector"));
1062 std::vector<float> boundaries;
1063 ASSIGN_VALUE_OR_RETURN_ERR(boundaries, getFloats(dict["boundaries"]));
1064 auto *node = G_->createBucketizeNode(opName, in, boundaries);
1065 RETURN_IF_ERR(addNodeAsOutput(op, node));
1066 return Error::success();
1067 }
1068
1069 if (typeName == "ResizeNearest") {
1070 NodeValue in;
1071 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1072
1073 std::string order = "NCHW";
1074 if (dict.count("order")) {
1075 ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
1076 }
1077 // We expect the input to be NHWC.
1078 NodeValue finalIn;
1079 if (order == "NCHW") {
1080 finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
1081 } else {
1082 finalIn = in;
1083 }
1084
1085 float heightScale;
1086 ASSIGN_VALUE_OR_RETURN_ERR(heightScale, loadFloat(dict["height_scale"]));
1087 float widthScale;
1088 ASSIGN_VALUE_OR_RETURN_ERR(widthScale, loadFloat(dict["width_scale"]));
1089
1090 std::vector<float> scales;
1091 scales.push_back(1.0f);
1092 scales.push_back(heightScale);
1093 scales.push_back(widthScale);
1094 scales.push_back(1.0f);
1095
1096 auto *node = G_->createResizeNearest(opName, finalIn, scales);
1097 RETURN_IF_ERR(addNodeAsOutput(op, node));
1098 return Error::success();
1099 }
1100
1101 if (typeName == "Concat") {
1102 const unsigned numInputs = op.input_size();
1103 llvm::SmallVector<NodeValue, 4> inputs;
1104 inputs.reserve(numInputs);
1105 for (unsigned i = 0; i < numInputs; i++) {
1106 NodeValue in;
1107 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i)));
1108 inputs.push_back(std::move(in));
1109 }
1110
1111 // If axis exists it takes priority over channel.
1112 unsigned_t channel;
1113 if (dict.count("axis")) {
1114 ASSIGN_VALUE_OR_RETURN_ERR(channel, loadInt(dict["axis"]));
1115 } else {
1116 ASSIGN_VALUE_OR_RETURN_ERR(channel, getChannel(dict));
1117 }
1118
1119 unsigned_t addAxis = 0;
1120 if (dict.count("add_axis")) {
1121 ASSIGN_VALUE_OR_RETURN_ERR(addAxis, loadInt(dict["add_axis"]));
1122 }
1123
1124 Node *node{nullptr};
1125
1126 if (addAxis) {
1127 // When add axis is used, this means we have to add a new dimension
1128 // before the axis, instead of merging on the axis.
1129 std::vector<dim_t> outputDims = inputs[0].dims();
1130
1131 if (channel < outputDims.size()) {
1132 unsigned i = 0;
1133 for (const auto &input : inputs) {
1134 RETURN_ERR_IF_NOT(
1135 outputDims[channel] == input.dims()[channel],
1136 opErrMsg(op,
1137 strFormat("inputs need all to have the same dims for "
1138 "concat with add_axis: input 0 (%s) vs "
1139 "input %u (%s), %u vs %u, channel = %u",
1140 op.input(0).c_str(), i, op.input(i).c_str(),
1141 static_cast<unsigned>(outputDims[channel]),
1142 static_cast<unsigned>(input.dims()[channel]),
1143 channel)));
1144 ++i;
1145 }
1146 outputDims.insert(outputDims.begin() + channel, numInputs);
1147 node = G_->createConcat(opName, inputs, channel);
1148 node = G_->createReshape(opName, node, outputDims);
1149 } else if (channel == outputDims.size()) {
1150 // We convert inputs into 2D arrays with single columns, thus the
1151 // number of rows will be equal to the product of all original dims.
1152 // Every converted input will look like a vertical line of numbers.
1153 const auto flatVerticalShape = flattenCdr(inputs[0].dims(), channel);
1154 llvm::SmallVector<NodeValue, 4> verticalInputs;
1155 for (auto &input : inputs) {
1156 verticalInputs.push_back(G_->createReshape(
1157 opName, input,
1158 {flatVerticalShape.first, flatVerticalShape.second}));
1159 }
1160
1161 // We glue together the vertical lines, so, the number of columns
1162 // becomes equal to the number of original inputs.
1163 node = G_->createConcat(opName, verticalInputs, 1);
1164
1165 // Reshape to convert to desired shape.
1166 outputDims.push_back(numInputs);
1167 node = G_->createReshape(opName, node, outputDims);
1168 } else {
1169 return MAKE_ERR(opErrMsg(
1170 op, strFormat("Invalid input: channel (=%u) > number of dims (=%u)",
1171 channel, static_cast<unsigned>(outputDims.size()))));
1172 }
1173 } else {
1174 // In normal case (i.e. when we are not adding a new dimension)
1175 // plain createConcat() would suffice.
1176 node = G_->createConcat(opName, inputs, channel);
1177 }
1178
1179 // If we add the axis then node is a Reshape, otherwise it should be
1180 // Concat.
1181 RETURN_ERR_IF_NOT(
1182 llvm::isa<ConcatNode>(node) || llvm::isa<ReshapeNode>(node),
1183 opErrMsg(op,
1184 "Internal error: Node should either be a Concat or Reshape."));
1185 NodeValue finalNode = llvm::isa<ConcatNode>(node)
1186 ? NodeValue(node, ConcatNode::ResultIdx)
1187 : NodeValue(node, ReshapeNode::ResultIdx);
1188 nodeValueByName_[op.output(0)] = finalNode;
1189 // Concat may have a second output in Caffe2 (split_info), but we don't
1190 // use it for inference
1191 return Error::success();
1192 }
1193
1194 if (typeName == "FC" || typeName == "FCTransposed" || typeName == "Int8FC" ||
1195 typeName == "FbFCPacked") {
1196 RETURN_ERR_IF_NOT(op.input_size() == 3,
1197 "Glow only suports FC with 3 inputs");
1198 // Load the inputs:
1199 NodeValue in;
1200 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1201
1202 auto originalInputDims = in.getType()->dims();
1203
1204 size_t axis = 1;
1205 if (dict.count("axis")) {
1206 ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1207 }
1208
1209 // Load weights.
1210 unsigned_t axis_w = 1;
1211 if (dict.count("axis_w")) {
1212 ASSIGN_VALUE_OR_RETURN_ERR(axis_w, loadInt(dict["axis_w"]));
1213 }
1214
1215 NodeValue W;
1216 if (hasConstantByName(op.input(1))) {
1217 ASSIGN_VALUE_OR_RETURN_ERR(W, getConstantByName(op.input(1)));
1218 } else {
1219 ASSIGN_VALUE_OR_RETURN_ERR(W, getNodeValueByName(op.input(1)));
1220 }
1221
1222 // Caffe2 stores the transposed W matrix. In here we first coerce W to a
1223 // 2D matrix size if necessary and then transpose it back.
1224 auto wDims = flattenCdr(W.dims(), axis_w);
1225 if (W.dims().size() > 2) {
1226 W = G_->createReshape(W.getNode()->getName(), W,
1227 {wDims.first, wDims.second});
1228 }
1229
1230 if (typeName == "FC" || typeName == "Int8FC" || typeName == "FbFCPacked") {
1231 W = G_->createTranspose(W.getNode()->getName(), W, {1, 0});
1232 }
1233
1234 NodeValue B;
1235 if (hasConstantByName(op.input(2))) {
1236 ASSIGN_VALUE_OR_RETURN_ERR(B, getConstantByName(op.input(2)));
1237 } else {
1238 ASSIGN_VALUE_OR_RETURN_ERR(B, getNodeValueByName(op.input(2)));
1239 }
1240
1241 Node *node = nullptr;
1242 if (typeName == "Int8FC") {
1243 // Create a node with quantized type.
1244 auto outputDims = flattenCdr(in.dims(), axis);
1245 TypeRef outTy;
1246 ASSIGN_VALUE_OR_RETURN_ERR(
1247 outTy, loadQuantTy(opName, ElemKind::Int8QTy,
1248 {outputDims.first, B.dims()[0]}, dict));
1249 int dequantizeOutput = 0;
1250 if (dict.count("dequantize_output")) {
1251 ASSIGN_VALUE_OR_RETURN_ERR(dequantizeOutput,
1252 loadInt(dict["dequantize_output"]));
1253 }
1254 if (dequantizeOutput == 1) {
1255 node = G_->createDynamicQuantizedFullyConnected(opName, in, W, B);
1256 } else {
1257 node = G_->createFullyConnected(opName, in, W, B, outTy, axis);
1258 }
1259 } else if (typeName == "FbFCPacked") {
1260 RETURN_ERR_IF_NOT(W.getElementType() == ElemKind::Float16Ty,
1261 opErrMsg(op, "Expected float16 weights."));
1262 auto fp16InputType =
1263 mod_.uniqueType(ElemKind::Float16Ty, in.getType()->dims());
1264 in = G_->createConvertTo(opName + ".ConvertInput", in, fp16InputType);
1265
1266 auto fp16BiasType = mod_.uniqueType(ElemKind::Float16Ty, B.dims());
1267 auto *fp16Bias =
1268 G_->createConvertTo(opName + ".ConvertBias", B, fp16BiasType);
1269
1270 auto outputDims = flattenCdr(in.dims(), axis);
1271 TypeRef OT =
1272 mod_.uniqueType(ElemKind::Float16Ty, {outputDims.first, B.dims()[0]});
1273 auto fc = G_->createFullyConnected(opName, in, W, fp16Bias, OT, axis);
1274 auto outputType =
1275 mod_.uniqueType(ElemKind::FloatTy, fc->getResult().dims());
1276 node = G_->createConvertTo(opName + ".ConvertOutput", fc, outputType);
1277 } else {
1278 auto outputDims = flattenCdr(in.dims(), axis);
1279 TypeRef outputType =
1280 mod_.uniqueType(ElemKind::FloatTy, {outputDims.first, B.dims()[0]});
1281 node = G_->createFullyConnected(opName, in, W, B, outputType, axis);
1282 }
1283
1284 // If number of original input dims is greater than 2, expand the output
1285 // dims back with the same axis.
1286 if (axis != 1) {
1287 llvm::SmallVector<dim_t, max_tensor_dimensions> reshapeDims;
1288 size_t totalReshapeSize = 1;
1289 for (size_t i = 0; i < axis; ++i) {
1290 auto d = originalInputDims[i];
1291 reshapeDims.push_back(d);
1292 totalReshapeSize *= static_cast<dim_t>(d);
1293 }
1294
1295 size_t finalDim = typeName == "FCTransposed" ? wDims.second : wDims.first;
1296
1297 reshapeDims.push_back(finalDim);
1298 totalReshapeSize *= finalDim;
1299
1300 size_t totalOriginalOutputSize = node->getNthResult(0).getType()->size();
1301 RETURN_ERR_IF_NOT(
1302 totalReshapeSize == totalOriginalOutputSize,
1303 opErrMsg(op, strFormat("Cannot reshape from size %lu to size %lu",
1304 totalOriginalOutputSize, totalReshapeSize)));
1305
1306 node = G_->createReshape(opName + ".fc.out", node, reshapeDims);
1307 }
1308
1309 // Save the outputs:
1310 RETURN_IF_ERR(addNodeAsOutput(op, node));
1311 return Error::success();
1312 }
1313
1314 if (typeName == "ChannelShuffle") {
1315 NodeValue in;
1316 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1317
1318 size_t group;
1319 ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
1320 size_t kernel;
1321 ASSIGN_VALUE_OR_RETURN_ERR(kernel, loadInt(dict["kernel"]));
1322
1323 Node *node = G_->createChannelShuffle(opName, in, group, kernel);
1324 RETURN_IF_ERR(addNodeAsOutput(op, node));
1325 return Error::success();
1326 }
1327
1328 if (typeName == "Squeeze") {
1329 NodeValue in;
1330 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1331 std::vector<dim_t> dims;
1332 ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["dims"]));
1333 Node *node = G_->createSqueeze(opName, in, dims);
1334 RETURN_IF_ERR(addNodeAsOutput(op, node));
1335 return Error::success();
1336 }
1337
1338 if (typeName == "Log") {
1339 // Load the inputs:
1340 NodeValue in;
1341 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1342 // Create the log:
1343 auto *R = G_->createLog(opName, in);
1344 RETURN_IF_ERR(addNodeAsOutput(op, R));
1345 return Error::success();
1346 }
1347
1348 if (typeName == "Swish") {
1349 NodeValue in;
1350 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1351 auto *S = G_->createSwish(opName, in);
1352 RETURN_IF_ERR(addNodeAsOutput(op, S));
1353 return Error::success();
1354 }
1355
1356 if (typeName == "Logit") {
1357 // Load the input and (optional) epsilon clamping value:
1358 NodeValue input;
1359 ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0)));
1360 auto epsIt = dict.find("eps");
1361 // default: 1e-6 (as in Caffe2)
1362 float eps = 1E-6f;
1363 if (epsIt != dict.end()) {
1364 ASSIGN_VALUE_OR_RETURN_ERR(eps, loadFloat(epsIt->second));
1365 }
1366
1367 auto *node = G_->createLogit(opName, input, eps);
1368 // Save the outputs:
1369 RETURN_IF_ERR(addNodeAsOutput(op, node));
1370 return Error::success();
1371 }
1372
1373 if (typeName == "EQ") {
1374 NodeValue in0;
1375 ASSIGN_VALUE_OR_RETURN_ERR(in0, getNodeValueByName(op.input(0)));
1376 NodeValue in1;
1377 ASSIGN_VALUE_OR_RETURN_ERR(in1, getNodeValueByName(op.input(1)));
1378 auto *node = G_->createCmpEQ(opName, in0, in1);
1379 RETURN_IF_ERR(addNodeAsOutput(op, node));
1380 return Error::success();
1381 }
1382
1383 if (typeName == "Tile") {
1384 NodeValue in;
1385 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1386 unsigned_t tiles;
1387 ASSIGN_VALUE_OR_RETURN_ERR(tiles, loadInt(dict["tiles"]));
1388 unsigned_t axis;
1389 ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1390
1391 auto *node = G_->createTile(opName, in, tiles, axis);
1392 RETURN_IF_ERR(addNodeAsOutput(op, node));
1393 return Error::success();
1394 }
1395
1396 if (typeName == "Free") {
1397 // Glow frees memory automatically.
1398 return Error::success();
1399 }
1400 if (typeName == "StopGradient" || typeName == "ScaleGradient") {
1401 NodeValue in;
1402 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1403 // Currently Caffe2 importer only supports inference.
1404 RETURN_IF_ERR(addNodeAsOutput(op, in));
1405 return Error::success();
1406 }
1407
1408 if (typeName == "Transpose") {
1409 RETURN_IF_ERR(loadTranspose(op, dict, "axes"));
1410 return Error::success();
1411 }
1412
1413 if (typeName == "NCHW2NHWC") {
1414 NodeValue in;
1415 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1416 auto *node = G_->createTranspose(opName, in, NCHW2NHWC);
1417 RETURN_IF_ERR(addNodeAsOutput(op, node));
1418 return Error::success();
1419 }
1420
1421 if (typeName == "CopyCPUToMKL" || typeName == "CopyMKLToCPU" ||
1422 typeName == "Copy" || typeName == "EnsureCPUOutput" ||
1423 typeName == "EnsureDense" || typeName == "Dropout") {
1424 // Glow does not support any of these ops now, so implement them as
1425 // no-ops. Note: Implement this as a no-op reshape because these ops may
1426 // have partition information, and we need a node to maintain the parent
1427 // Function partition it specified. This reshape will get eliminated later
1428 // on during graph optimizations.
1429 NodeValue in;
1430 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1431 ReshapeNode *RN = G_->createReshape(in.getNode()->getName(), in, in.dims());
1432 RETURN_IF_ERR(addNodeAsOutput(op, RN));
1433 return Error::success();
1434 }
1435
1436 if (typeName == "Slice") {
1437 NodeValue data;
1438 ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0)));
1439
1440 std::vector<ssize_t> starts;
1441 ASSIGN_VALUE_OR_RETURN_ERR(starts, getShape<ssize_t>(dict["starts"]));
1442 std::vector<ssize_t> ends;
1443 ASSIGN_VALUE_OR_RETURN_ERR(ends, getShape<ssize_t>(dict["ends"]));
1444
1445 std::vector<dim_t> newStarts, newEnds;
1446 RETURN_ERR_IF_NOT(
1447 starts.size() == ends.size(),
1448 opErrMsg(op, strFormat(
1449 "Slice starts %lu and %lu ends must be the same size.",
1450 starts.size(), ends.size())));
1451 for (size_t i = 0; i < starts.size(); i++) {
1452 ssize_t newStart = starts[i];
1453 if (newStart == -1) {
1454 newStart = data.dims()[i];
1455 }
1456 RETURN_ERR_IF_NOT(
1457 newStart >= 0,
1458 opErrMsg(op,
1459 strFormat("Indices should never be negative, but found %lu ",
1460 newStart)));
1461 newStarts.push_back(newStart);
1462
1463 ssize_t newEnd = ends[i];
1464 if (newEnd == -1) {
1465 newEnd = data.dims()[i];
1466 }
1467 RETURN_ERR_IF_NOT(
1468 newEnd >= 0,
1469 opErrMsg(op,
1470 strFormat("Indices should never be negative, but found %lu ",
1471 newEnd)));
1472 newEnds.push_back(newEnd);
1473 }
1474
1475 Node *SN = G_->createSlice(opName, data, newStarts, newEnds);
1476 RETURN_IF_ERR(addNodeAsOutput(op, SN));
1477 return Error::success();
1478 }
1479
1480 if (typeName == "Clip") {
1481 NodeValue in;
1482 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1483 float cmin = std::numeric_limits<float>::lowest();
1484 if (dict.count("min")) {
1485 ASSIGN_VALUE_OR_RETURN_ERR(cmin, loadFloat(dict.find("min")->second));
1486 }
1487
1488 float cmax = std::numeric_limits<float>::max();
1489 if (dict.count("max")) {
1490 ASSIGN_VALUE_OR_RETURN_ERR(cmax, loadFloat(dict.find("max")->second));
1491 }
1492
1493 auto *node = G_->createClip(loadOperatorName(op), in, cmin, cmax);
1494 RETURN_IF_ERR(addNodeAsOutput(op, node));
1495 return Error::success();
1496 }
1497
1498 if (typeName == "MatMul") {
1499 RETURN_IF_ERR(loadMatMul(op, dict));
1500 return Error::success();
1501 }
1502
1503 if (typeName == "Cast") {
1504 NodeValue in;
1505 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1506 int to;
1507 ASSIGN_VALUE_OR_RETURN_ERR(to, loadInt(dict["to"]));
1508
1509 switch (to) {
1510 case caffe2::TensorProto_DataType_FLOAT: {
1511 RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::FloatTy,
1512 opErrMsg(op, "Can only cast float to float."));
1513 break;
1514 }
1515 case caffe2::TensorProto_DataType_INT32: {
1516 RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::Int32ITy,
1517 opErrMsg(op, "Can only cast int32 to int32."));
1518 break;
1519 }
1520 case caffe2::TensorProto_DataType_INT64: {
1521 RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::Int64ITy,
1522 opErrMsg(op, "Can only cast int64 to int64."));
1523 break;
1524 }
1525 default:
1526 return MAKE_ERR(opErrMsg(op, "Unsupported Cast type."));
1527 }
1528
1529 RETURN_IF_ERR(addNodeAsOutput(op, in));
1530 return Error::success();
1531 }
1532
1533 if (typeName == "HalfToFloat") {
1534 NodeValue in;
1535 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1536 auto convertedType =
1537 mod_.uniqueType(ElemKind::FloatTy, in.getType()->dims());
1538 auto *R = G_->createConvertTo(opName + ".ConvertInput", in, convertedType);
1539 RETURN_IF_ERR(addNodeAsOutput(op, R));
1540 return Error::success();
1541 }
1542
1543 if (typeName == "ScatterAssign") {
1544 NodeValue data;
1545 ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0)));
1546 NodeValue indices;
1547 ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(1)));
1548 NodeValue slices;
1549 ASSIGN_VALUE_OR_RETURN_ERR(slices, getNodeValueByName(op.input(2)));
1550
1551 assert(indices.dims().size() == 1 && "Indices should be 1-dimensional!");
1552 NodeValue indices2D = G_->createReshape(opName + ".indices.2d", indices,
1553 {indices.dims()[0], 1});
1554 Node *SAN = G_->createScatterData(opName, data, indices2D, slices);
1555 RETURN_IF_ERR(addNodeAsOutput(op, SAN));
1556 return Error::success();
1557 }
1558
1559 if (typeName == "ConstantFill" || typeName == "GivenTensorIntFill" ||
1560 typeName == "GivenTensorInt64Fill" || typeName == "GaussianFill" ||
1561 typeName == "UniformFill") {
1562 RETURN_IF_ERR(loadWeight(op));
1563 return Error::success();
1564 }
1565
1566 if (typeName == "SigmoidCrossEntropyWithLogits") {
1567 NodeValue logits;
1568 ASSIGN_VALUE_OR_RETURN_ERR(logits, getNodeValueByName(op.input(0)));
1569 NodeValue targets;
1570 ASSIGN_VALUE_OR_RETURN_ERR(targets, getNodeValueByName(op.input(1)));
1571 Node *SCEL =
1572 G_->createSigmoidCrossEntropyWithLogits(opName, logits, targets);
1573 RETURN_IF_ERR(addNodeAsOutput(op, SCEL));
1574 return Error::success();
1575 }
1576
1577 if (typeName == "ElementwiseLinear") {
1578 NodeValue X, w, b;
1579
1580 // If the axis argument does not exist in the protobuf, the default
1581 // value should be 1.
1582 unsigned axis = 1;
1583
1584 ASSIGN_VALUE_OR_RETURN_ERR(X, getNodeValueByName(op.input(0)));
1585 ASSIGN_VALUE_OR_RETURN_ERR(w, getNodeValueByName(op.input(1)));
1586 ASSIGN_VALUE_OR_RETURN_ERR(b, getNodeValueByName(op.input(2)));
1587
1588 if (dict.count("axis")) {
1589 ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1590 }
1591
1592 Node *EL = G_->createElementwiseLinear(opName, X, w, b, axis);
1593 RETURN_IF_ERR(addNodeAsOutput(op, EL));
1594 return Error::success();
1595 }
1596
1597 if (typeName == "AveragedLoss") {
1598 NodeValue in;
1599 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1600 auto *node = G_->createBatchedReduceMean(opName, in, 0);
1601 RETURN_IF_ERR(addNodeAsOutput(op, node));
1602 return Error::success();
1603 }
1604
1605 if (typeName == "Mod") {
1606 NodeValue in;
1607 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1608 int64_t divisor;
1609 ASSIGN_VALUE_OR_RETURN_ERR(divisor, loadInt(dict["divisor"]));
1610
1611 RETURN_ERR_IF_NOT(
1612 divisor >= 1,
1613 opErrMsg(op,
1614 strFormat("Divisor must not be less than 1, but found %ld ",
1615 divisor)));
1616
1617 bool signFollowDivisor = false;
1618 if (dict.count("sign_follow_divisor")) {
1619 ASSIGN_VALUE_OR_RETURN_ERR(signFollowDivisor,
1620 loadInt(dict["sign_follow_divisor"]));
1621 }
1622
1623 auto *node = G_->createModulo(opName, in, divisor, signFollowDivisor);
1624 RETURN_IF_ERR(addNodeAsOutput(op, node));
1625
1626 return Error::success();
1627 }
1628
1629 if (typeName == "Scale") {
1630 NodeValue in;
1631 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1632 float scale = 1.0;
1633 if (dict.count("scale")) {
1634 ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["scale"]));
1635 }
1636 auto scaleType = mod_.uniqueType(ElemKind::FloatTy, {in.dims()});
1637 auto scales = G_->createSplat(opName + ".scales", scaleType, scale);
1638 Node *node = G_->createMul(opName, in, scales);
1639
1640 RETURN_IF_ERR(addNodeAsOutput(op, node));
1641 return Error::success();
1642 }
1643
1644 if (typeName == "SparseLengthsWeightedSum8BitsRowwise" ||
1645 typeName == "SparseLengthsSum8BitsRowwise" ||
1646 typeName == "SparseLengthsWeightedSumFused8BitRowwise" ||
1647 typeName == "SparseLengthsSumFused8BitRowwise" ||
1648 typeName == "SparseLengthsWeightedSumFused4BitRowwise" ||
1649 typeName == "SparseLengthsSumFused4BitRowwise") {
1650 const bool isWeighted =
1651 typeName == "SparseLengthsWeightedSum8BitsRowwise" ||
1652 typeName == "SparseLengthsWeightedSumFused8BitRowwise" ||
1653 typeName == "SparseLengthsWeightedSumFused4BitRowwise";
1654 const bool isFused =
1655 typeName == "SparseLengthsWeightedSumFused8BitRowwise" ||
1656 typeName == "SparseLengthsSumFused8BitRowwise" ||
1657 typeName == "SparseLengthsWeightedSumFused4BitRowwise" ||
1658 typeName == "SparseLengthsSumFused4BitRowwise";
1659 const bool is4Bit =
1660 typeName == "SparseLengthsWeightedSumFused4BitRowwise" ||
1661 typeName == "SparseLengthsSumFused4BitRowwise";
1662 // If weighted, then the weights are the second input and so we need to
1663 // shift indices/lengths/scalesBiases.
1664 size_t indicesIdx = 1;
1665 size_t lengthsIdx = 2;
1666 size_t scalesBiasesIdx = 3;
1667 if (isWeighted) {
1668 indicesIdx++;
1669 lengthsIdx++;
1670 scalesBiasesIdx++;
1671 }
1672
1673 NodeValue data;
1674 ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0)));
1675 NodeValue weights;
1676 if (isWeighted) {
1677 ASSIGN_VALUE_OR_RETURN_ERR(weights, getNodeValueByName(op.input(1)));
1678 }
1679 NodeValue indices;
1680 ASSIGN_VALUE_OR_RETURN_ERR(indices,
1681 getNodeValueByName(op.input(indicesIdx)));
1682 NodeValue lengths;
1683 ASSIGN_VALUE_OR_RETURN_ERR(lengths,
1684 getNodeValueByName(op.input(lengthsIdx)));
1685 Storage *dataS = llvm::dyn_cast<Storage>(data);
1686
1687 const dim_t numRows = data.dims()[0];
1688
1689 // Make sure all the shapes make sense.
1690 RETURN_ERR_IF_NOT(lengths.dims().size() == 1,
1691 opErrMsg(op, "lengths must be a vector."));
1692 RETURN_ERR_IF_NOT(indices.dims().size() == 1,
1693 opErrMsg(op, "indices must be a vector."));
1694
1695 LengthsMode lengthsMode;
1696 ASSIGN_VALUE_OR_RETURN_ERR(lengthsMode, getLengthsMode(dict));
1697
1698 float avgLength;
1699 ASSIGN_VALUE_OR_RETURN_ERR(avgLength, getAvgLength(dict));
1700
1701 Node *node;
1702 if (isFused) {
1703 RETURN_IF_ERR(setFusedTy(dataS, is4Bit ? ElemKind::UInt4FusedFP16QTy
1704 : ElemKind::UInt8FusedQTy));
1705
1706 // No other work to do, since the data is already loaded fused, so just
1707 // create the new node with its inputs.
1708 if (isWeighted) {
1709 node = G_->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
1710 opName, dataS, weights, indices, lengths,
1711 /* useFP16Accumulation */ false, lengthsMode, avgLength);
1712 } else {
1713 node = G_->createFusedRowwiseQuantizedSparseLengthsSum(
1714 opName, dataS, indices, lengths, /* useFP16Accumulation */ false,
1715 lengthsMode, avgLength);
1716 }
1717
1718 if (is4Bit) {
1719 node = G_->createConvertTo(opName, node, ElemKind::FloatTy);
1720 }
1721 } else {
1722 NodeValue scalesBiases;
1723 ASSIGN_VALUE_OR_RETURN_ERR(scalesBiases,
1724 getNodeValueByName(op.input(scalesBiasesIdx)));
1725
1726 Constant *scalesBiasesC = llvm::dyn_cast<Constant>(scalesBiases);
1727 RETURN_ERR_IF_NOT(scalesBiasesC,
1728 opErrMsg(op, "scales_biases must be Constant."));
1729 RETURN_ERR_IF_NOT(scalesBiases.dims().size() == 2,
1730 opErrMsg(op, "scale_bias has to be a matrix."));
1731 RETURN_ERR_IF_NOT(
1732 scalesBiases.dims()[0] == numRows,
1733 opErrMsg(
1734 op,
1735 strFormat("scale_bias must have the same number of rows as data, "
1736 "but found scale_bias %d and rows %d ",
1737 int(scalesBiases.dims()[0]), int(numRows))));
1738 RETURN_ERR_IF_NOT(
1739 scalesBiases.dims()[1] == 2,
1740 opErrMsg(op,
1741 strFormat("Second dim of scale_bias has to be equal to 2 "
1742 "but found %d ",
1743 int(scalesBiases.dims()[1]))));
1744
1745 // Now strip out the scales and biases into their own tensors.
1746 NodeValue sliceScales =
1747 G_->createSlice(scalesBiasesC->getName().str() + "_scale",
1748 scalesBiasesC, {0, 0}, {numRows, 1});
1749 NodeValue sliceBiases =
1750 G_->createSlice(scalesBiasesC->getName().str() + "_bias",
1751 scalesBiasesC, {0, 1}, {numRows, 2});
1752 sliceScales =
1753 G_->createReshape(sliceScales.getNode()->getName().str() + "_1D",
1754 sliceScales, {numRows});
1755 sliceBiases =
1756 G_->createReshape(sliceBiases.getNode()->getName().str() + "_1D",
1757 sliceBiases, {numRows});
1758
1759 // Now create the actual node.
1760 if (isWeighted) {
1761 node = G_->createRowwiseQuantizedSparseLengthsWeightedSum(
1762 opName, dataS, sliceScales, sliceBiases, weights, indices, lengths,
1763 /* precision */ ElemKind::FloatTy,
1764 /* useFP16Accumulation */ false, lengthsMode, avgLength);
1765 } else {
1766 node = G_->createRowwiseQuantizedSparseLengthsSum(
1767 opName, dataS, sliceScales, sliceBiases, indices, lengths,
1768 /* precision */ ElemKind::FloatTy,
1769 /* useFP16Accumulation */ false, lengthsMode, avgLength);
1770 }
1771 }
1772
1773 RETURN_IF_ERR(addNodeAsOutput(op, node));
1774 return Error::success();
1775 }
1776
1777 if (typeName == "LengthsRangeFill") {
1778 NodeValue lengths;
1779 ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(0)));
1780 RETURN_ERR_IF_NOT(lengths.dims().size() == 1,
1781 opErrMsg(op, "lengths must be a 1D vector."));
1782
1783 auto maxOutputSizeIt = dict.find("maxOutputSize");
1784 RETURN_ERR_IF_NOT(
1785 maxOutputSizeIt != dict.end(),
1786 opErrMsg(op, "Require maxOutputSize when loading LengthsRangeFill."));
1787 unsigned_t maxOutputSize;
1788 ASSIGN_VALUE_OR_RETURN_ERR(maxOutputSize, loadInt(maxOutputSizeIt->second));
1789
1790 auto *LRF = G_->createLengthsRangeFill(opName, lengths, maxOutputSize);
1791 RETURN_IF_ERR(addNodeAsOutput(op, LRF));
1792
1793 return Error::success();
1794 }
1795
1796 // TODO: add checks for number of inputs and argument values
1797 if (typeName == "ReduceBackSum") {
1798 NodeValue in;
1799 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1800 RETURN_ERR_IF_NOT(in.dims().size() >= 2,
1801 opErrMsg(op, "Input should be at least 2D."));
1802 Node *node = G_->createBatchedReduceAdd(opName, in, in.dims().size() - 1);
1803 RETURN_IF_ERR(addNodeAsOutput(op, node));
1804 return Error::success();
1805 }
1806
1807 if (typeName == "RMSNorm") {
1808 NodeValue X, gamma, beta;
1809 ASSIGN_VALUE_OR_RETURN_ERR(X, getNodeValueByName(op.input(0)));
1810 RETURN_ERR_IF_NOT(X.dims().size() == 2,
1811 opErrMsg(op, "X should be a 2D tensor."));
1812 ASSIGN_VALUE_OR_RETURN_ERR(gamma, getNodeValueByName(op.input(1)));
1813 RETURN_ERR_IF_NOT(gamma.dims().size() == 1,
1814 opErrMsg(op, "gamma should be a 1D tensor."));
1815 ASSIGN_VALUE_OR_RETURN_ERR(beta, getNodeValueByName(op.input(2)));
1816 RETURN_ERR_IF_NOT(beta.dims().size() == 1,
1817 opErrMsg(op, "beta should be a 1D tensor."));
1818
1819 float epsilon = .0f;
1820 if (dict.count("eps")) {
1821 ASSIGN_VALUE_OR_RETURN_ERR(epsilon, loadFloat(dict["eps"]));
1822 }
1823
1824 auto nodes = G_->createRMSNorm(opName, X, gamma, beta, epsilon);
1825 nodeValueByName_[op.output(0)] = nodes[0];
1826 nodeValueByName_[op.output(1)] = nodes[1];
1827 return Error::success();
1828 }
1829
1830 if (typeName == "Mean") {
1831 const unsigned numInputs = op.input_size();
1832 RETURN_ERR_IF_NOT(numInputs > 0,
1833 opErrMsg(op, "Expect at least one input."));
1834
1835 std::vector<NodeValue> inputs;
1836 inputs.reserve(numInputs);
1837 for (unsigned i = 0; i < numInputs; i++) {
1838 NodeValue in;
1839 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i)));
1840 inputs.push_back(std::move(in));
1841 }
1842
1843 // Check that all inputs have the same shape
1844 const auto shape = inputs[0].dims();
1845 for (unsigned i = 1; i < numInputs; i++) {
1846 RETURN_ERR_IF_NOT(
1847 shape == inputs[i].dims(),
1848 opErrMsg(op,
1849 "All inputs should have the same shape, violating input " +
1850 op.input(i)));
1851 }
1852
1853 if (numInputs == 1) {
1854 RETURN_IF_ERR(addNodeAsOutput(op, inputs[0]));
1855 return Error::success();
1856 }
1857
1858 Node *node = G_->createConcat(opName + ".concat", inputs, 0);
1859
1860 std::vector<dim_t> newShape{numInputs};
1861 newShape.insert(newShape.end(), shape.begin(), shape.end());
1862 node = G_->createReshape(opName + ".reshape", node, newShape);
1863
1864 node = G_->createBatchedReduceMean(opName + ".reduceMean", node, 0);
1865
1866 RETURN_IF_ERR(addNodeAsOutput(op, node));
1867 return Error::success();
1868 }
1869
1870 if (typeName == "Negative") {
1871 RETURN_IF_ERR(loadNeg(op, dict));
1872 return Error::success();
1873 }
1874
1875 if (typeName == "LpNorm") {
1876 NodeValue in;
1877 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1878
1879 int p = 2;
1880 if (dict.count("p")) {
1881 ASSIGN_VALUE_OR_RETURN_ERR(p, loadInt(dict["p"]));
1882 RETURN_ERR_IF_NOT(p == 1 || p == 2,
1883 opErrMsg(op, "p should be either 1 or 2."));
1884 }
1885 bool average = false;
1886 if (dict.count("average")) {
1887 ASSIGN_VALUE_OR_RETURN_ERR(average, loadInt(dict["average"]));
1888 }
1889 RETURN_ERR_IF_NOT(!average, opErrMsg(op, "average is not supported."));
1890
1891 Node *node = nullptr;
1892 if (p == 1) {
1893 node = G_->createAbs(opName, in);
1894 } else {
1895 node = G_->createPow(opName, in, 2);
1896 }
1897
1898 const auto dims1D = flattenCdr(in.dims(), in.dims().size());
1899 node = G_->createReshape(opName + ".reshape1D", node, dims1D.first);
1900
1901 auto outputType = mod_.uniqueType(in.getElementType(), {1});
1902 node = G_->createBatchedReduceAdd(opName + ".sum", outputType, node, 0);
1903
1904 RETURN_IF_ERR(addNodeAsOutput(op, node));
1905 return Error::success();
1906 }
1907
1908 if (typeName == "ArgMin") {
1909 NodeValue input;
1910 ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0)));
1911 int axis = 0;
1912 if (dict.count("axis")) {
1913 ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1914 }
1915 bool keepDims = true;
1916 if (dict.count("keepdims")) {
1917 ASSIGN_VALUE_OR_RETURN_ERR(keepDims, loadInt(dict.at("keepdims")));
1918 }
1919
1920 auto node = G_->createArgMin(opName, input, axis, keepDims);
1921 RETURN_IF_ERR(addNodeAsOutput(op, node));
1922 return Error::success();
1923 }
1924
1925 if (typeName == "Sign") {
1926 NodeValue in;
1927 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1928
1929 Node *zeroes = G_->createSplat(opName + ".zeroes", in.getType(), 0.f);
1930
1931 Node *isPos = G_->createCmpLT(opName + ".isPos", zeroes, in);
1932 Node *isNeg = G_->createCmpLT(opName + ".isNeg", in, zeroes);
1933
1934 Node *posOnes = G_->createSplat(opName + ".posOnes", in.getType(), 1);
1935 Node *negOnes = G_->createSplat(opName + ".negOnes", in.getType(), -1);
1936
1937 Node *node = G_->createSelect(opName + ".fillPos", isPos, posOnes, zeroes);
1938 node = G_->createSelect(opName + ".fillNeg", isNeg, negOnes, node);
1939
1940 RETURN_IF_ERR(addNodeAsOutput(op, node));
1941 return Error::success();
1942 }
1943
1944 if (typeName == "Softplus") {
1945 NodeValue in;
1946 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1947
1948 Node *node = G_->createSoftPlus(opName, in);
1949
1950 RETURN_IF_ERR(addNodeAsOutput(op, node));
1951 return Error::success();
1952 }
1953
1954 if (typeName == "TopK") {
1955 NodeValue in;
1956 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
1957 RETURN_ERR_IF_NOT(
1958 op.input_size() <= 2,
1959 opErrMsg(
1960 op,
1961 strFormat(
1962 "TopK: Maximum number of inputs is 2, but found input size %d ",
1963 op.input_size())));
1964 unsigned_t k = 0;
1965 if (op.input_size() > 1) {
1966 Constant *kConst = getConstantByNameOrNull(op.input(1));
1967 RETURN_ERR_IF_NOT(
1968 kConst,
1969 opErrMsg(op, "TopK: Non-constant k is not supported by Glow."));
1970 RETURN_ERR_IF_NOT(
1971 kConst->getElementType() == ElemKind::Int64ITy,
1972 opErrMsg(op, strFormat(
1973 "TopK: k input must be of type Int64, but found "
1974 "input type '%s' ",
1975 kConst->getType()->getElementName().str().c_str())));
1976 auto constH = kConst->getPayload().getHandle<int64_t>();
1977 k = constH.at({0});
1978 } else {
1979 ASSIGN_VALUE_OR_RETURN_ERR(k, loadInt(dict["k"]));
1980 }
1981
1982 int lastDim = in.dims().size() - 1;
1983 int axis = lastDim;
1984 if (dict.count("axis")) {
1985 ASSIGN_VALUE_OR_RETURN_ERR(axis,
1986 loadAxis<int>(dict["axis"], in.dims().size()));
1987 }
1988
1989 RETURN_ERR_IF_NOT(
1990 axis == lastDim,
1991 opErrMsg(
1992 op,
1993 strFormat(
1994 "TopK: Currently only support axis %d being last dimension %d ",
1995 axis, lastDim)));
1996
1997 TopKNode *R = G_->createTopK(opName, in, k, ElemKind::Int32ITy);
1998 RETURN_IF_ERR(addNodeAsOutput(op, R));
1999 return Error::success();
2000 }
2001
2002 if (typeName == "FillExamplesWithIndicator") {
2003 // Support FillExamplesWithIndicator
2004 NodeValue data;
2005 ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(0)));
2006 NodeValue indicator;
2007 ASSIGN_VALUE_OR_RETURN_ERR(indicator, getNodeValueByName(op.input(1)));
2008 // Validating input types and shapes
2009 RETURN_ERR_IF_NOT(
2010 indicator.getElementType() == ElemKind::Int32ITy ||
2011 indicator.getElementType() == ElemKind::Int64ITy,
2012 opErrMsg(op, "Indicator should be of int32 or int64 type."));
2013 RETURN_ERR_IF_NOT(indicator.dims().size() == 1,
2014 opErrMsg(op, "Indicator should be 1D tensor."));
2015 dim_t dataReshapeDim = flattenCdr(data.dims()).second;
2016 ShapeVector outDims{indicator.dims()[0]};
2017 outDims.insert(outDims.end(), data.dims().begin() + 1, data.dims().end());
2018 auto outTy2D = mod_.uniqueTypeWithNewShape(
2019 data.getType(), {indicator.dims()[0], dataReshapeDim});
2020
2021 auto data2D = G_->createReshape(opName + ".data2D", data,
2022 {data.dims()[0], dataReshapeDim});
2023 if (indicator.getElementType() == ElemKind::Int64ITy) {
2024 indicator = G_->createConvertTo(opName + ".int64ToInt32", indicator,
2025 ElemKind::Int32ITy);
2026 }
2027 // Select only takes boolean indicators, and converting from int to bool
2028 // must go from int -> float -> bool. Due to fp16 clipping, since only
2029 // int32 -> fp16 conversions are available, there is an initial conversion
2030 // from int64 to int32 if necessary.
2031 auto indicatorFloat = G_->createConvertTo(opName + ".intToFloat", indicator,
2032 ElemKind::FloatTy);
2033 auto indicatorBool = G_->createConvertTo(opName + ".floatToBool",
2034 indicatorFloat, ElemKind::BoolTy);
2035 auto nzIndices = G_->createNonZero(opName + ".nonzero", indicatorBool);
2036
2037 auto nzIndicesFixed = fixNonZero(G_, mod_, opName, nzIndices);
2038 auto nonZeroCount = data.dims()[0];
2039 RETURN_ERR_IF_NOT(nonZeroCount <= nzIndicesFixed->getNthResult(0).dims()[0],
2040 opErrMsg(op,
2041 "The number of "
2042 "non-zero elements in the indicator must be at "
2043 "least that of the first dimension of data"));
2044
2045 auto indices = G_->createSlice(opName + ".indices", nzIndicesFixed, {0, 0},
2046 {data.dims()[0], 1});
2047
2048 auto zeros = G_->createSplat(opName + ".zeros", outTy2D, 0);
2049
2050 auto res2D = G_->createScatterData(opName + ".scatterData", zeros, indices,
2051 data2D, true);
2052 auto node = G_->createReshape(opName + ".result", res2D, outDims);
2053 RETURN_IF_ERR(addNodeAsOutput(op, node));
2054 return Error::success();
2055 }
2056
2057 if (typeName == "BatchSparseToDense") {
2058 // Support BatchSparseToDense for output second dim = 1 only
2059 NodeValue lengths;
2060 ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(0)));
2061 NodeValue indices;
2062 ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(1)));
2063 NodeValue values;
2064 ASSIGN_VALUE_OR_RETURN_ERR(values, getNodeValueByName(op.input(2)));
2065
2066 dim_t denseLastDim = 1;
2067 if (dict.count("dense_last_dim")) {
2068 ASSIGN_VALUE_OR_RETURN_ERR(denseLastDim,
2069 loadInt(dict.at("dense_last_dim")));
2070 }
2071
2072 RETURN_ERR_IF_NOT(
2073 denseLastDim == 1,
2074 opErrMsg(op, "Only output second dimension = 1 supported"));
2075 // Validating input types and shapes
2076 RETURN_ERR_IF_NOT(
2077 lengths.getElementType() == ElemKind::Int32ITy ||
2078 lengths.getElementType() == ElemKind::Int64ITy,
2079 opErrMsg(op, "Lengths should be of int32 or int64 type."));
2080 RETURN_ERR_IF_NOT(lengths.dims().size() == 1,
2081 opErrMsg(op, "Lengths should be 1D tensor."));
2082 RETURN_ERR_IF_NOT(
2083 indices.getElementType() == ElemKind::Int32ITy ||
2084 indices.getElementType() == ElemKind::Int64ITy,
2085 opErrMsg(op, "Indices should be of int32 or int64 type."));
2086 RETURN_ERR_IF_NOT(indices.dims().size() == 1,
2087 opErrMsg(op, "Indices should be 1D tensor."));
2088 RETURN_ERR_IF_NOT(values.getElementType() == ElemKind::FloatTy,
2089 opErrMsg(op, "Values should be of float type."));
2090 RETURN_ERR_IF_NOT(
2091 indices.dims()[0] == values.dims()[0],
2092 opErrMsg(op, "There should be the same number of values as indices."));
2093
2094 float defaultValue = 0.0;
2095 if (dict.count("default_value")) {
2096 ASSIGN_VALUE_OR_RETURN_ERR(defaultValue,
2097 loadFloat(dict.at("default_value")));
2098 }
2099 // Select only takes boolean indicators, and converting from int to bool
2100 // must go from int -> float -> bool. Due to fp16 clipping, since only
2101 // int32 -> fp16 conversions are available, there is an initial conversion
2102 // from int64 to int32 if necessary.
2103 if (lengths.getElementType() == ElemKind::Int64ITy) {
2104 lengths = G_->createConvertTo(opName + ".int64ToInt32", lengths,
2105 ElemKind::Int32ITy);
2106 }
2107 auto lengthsIntToFloat =
2108 G_->createConvertTo(opName + ".intToFloat", lengths, ElemKind::FloatTy);
2109 auto lengthsFloatToBool = G_->createConvertTo(
2110 opName + ".floatToBool", lengthsIntToFloat, ElemKind::BoolTy);
2111 auto nonZeroIndices =
2112 G_->createNonZero(opName + ".nonzero", lengthsFloatToBool);
2113 auto nonZeroIndicesFixed = fixNonZero(G_, mod_, opName, nonZeroIndices);
2114 auto numIndices = indices.dims()[0];
2115 auto indicesSliced = G_->createSlice(
2116 opName + ".indicesSlice", nonZeroIndicesFixed, {0, 0}, {numIndices, 1});
2117
2118 ShapeVector outDims{lengths.dims()[0], 1};
2119 auto dataTy = mod_.uniqueTypeWithNewShape(values.getType(), outDims);
2120 auto data = G_->createSplat(opName + ".data", dataTy, defaultValue);
2121 auto values2D =
2122 G_->createReshape(opName + ".reshape", values, {numIndices, 1});
2123 auto scatterData = G_->createScatterData(opName + ".scatterData", data,
2124 indicesSliced, values2D, true);
2125
2126 RETURN_IF_ERR(addNodeAsOutput(op, scatterData));
2127 return Error::success();
2128 }
2129
2130 if (typeName == "SparseLabelSplit") {
2131 NodeValue lengths;
2132 ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(0)));
2133 NodeValue indices;
2134 ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(1)));
2135 NodeValue values;
2136 ASSIGN_VALUE_OR_RETURN_ERR(values, getNodeValueByName(op.input(2)));
2137
2138 dim_t numLabels = 0;
2139 RETURN_ERR_IF_NOT(dict.count("num_labels"),
2140 opErrMsg(op, "num_labels was not provided."));
2141 ASSIGN_VALUE_OR_RETURN_ERR(numLabels, loadInt(dict.at("num_labels")));
2142
2143 bool keepGradientOffsetMap = false;
2144 if (dict.count("keep_gradient_offset_map")) {
2145 ASSIGN_VALUE_OR_RETURN_ERR(keepGradientOffsetMap,
2146 loadInt(dict.at("keep_gradient_offset_map")));
2147 }
2148
2149 // Validating input types and shapes
2150 RETURN_ERR_IF_NOT(lengths.getElementType() == ElemKind::Int32ITy,
2151 opErrMsg(op, "Lengths should be of int32 type."));
2152 RETURN_ERR_IF_NOT(lengths.dims().size() == 1 || lengths.dims().size() == 2,
2153 opErrMsg(op, "Lengths should be 1D or 2D tensor."));
2154 RETURN_ERR_IF_NOT(indices.getElementType() == ElemKind::Int64ITy,
2155 opErrMsg(op, "Indices should be of int64 type."));
2156 RETURN_ERR_IF_NOT(indices.dims().size() == 1 || indices.dims().size() == 2,
2157 opErrMsg(op, "Indices should be 1D or 2D tensor."));
2158 RETURN_ERR_IF_NOT(values.getElementType() == ElemKind::FloatTy,
2159 opErrMsg(op, "Values should be of float type."));
2160 RETURN_ERR_IF_NOT(values.dims().size() == 1 || values.dims().size() == 2,
2161 opErrMsg(op, "Values should be 1D or 2D tensor."));
2162 RETURN_ERR_IF_NOT(
2163 indices.dims() == values.dims(),
2164 opErrMsg(op, "Indices and values should have the same shape."));
2165
2166 // Optional conversion from 2D to 1D inputs
2167 if (lengths.dims().size() == 2) {
2168 RETURN_ERR_IF_NOT(
2169 lengths.dims()[1] == 1,
2170 opErrMsg(op, "Second dimension should be 1 in lengths."));
2171 lengths = G_->createReshape(opName + ".lengths1D", lengths,
2172 {lengths.dims()[0]});
2173 }
2174 if (indices.dims().size() == 2) {
2175 RETURN_ERR_IF_NOT(
2176 indices.dims()[1] == 1,
2177 opErrMsg(op, "Second dimension should be 1 in indices."));
2178 indices = G_->createReshape(opName + ".indices1D", indices,
2179 {indices.dims()[0]});
2180 }
2181 if (values.dims().size() == 2) {
2182 RETURN_ERR_IF_NOT(
2183 values.dims()[1] == 1,
2184 opErrMsg(op, "Second dimension should be 1 in values."));
2185 values =
2186 G_->createReshape(opName + ".values1D", values, {values.dims()[0]});
2187 }
2188
2189 SparseLabelSplitNode *node =
2190 G_->createSparseLabelSplit(opName, lengths, indices, values, numLabels);
2191
2192 std::vector<SliceNode *> labelValueSlices;
2193 G_->createSplit(opName + ".splitLabelValues",
2194 node->getNthResult(SparseLabelSplitNode::LabelValuesIdx),
2195 numLabels, 0, {}, labelValueSlices);
2196
2197 std::vector<SliceNode *> exampleIdSlices;
2198 G_->createSplit(opName + ".splitExampleIds",
2199 node->getNthResult(SparseLabelSplitNode::ExampleIdsIdx),
2200 numLabels, 0, {}, exampleIdSlices);
2201
2202 const auto numItems = indices.dims()[0] / numLabels;
2203
2204 std::vector<Node *> labelValues;
2205 for (auto slice : labelValueSlices) {
2206 labelValues.push_back(
2207 G_->createReshape(opName + ".reshapeLabelValue", slice, {numItems}));
2208 }
2209
2210 std::vector<Node *> exampleIds;
2211 for (auto slice : exampleIdSlices) {
2212 exampleIds.push_back(
2213 G_->createReshape(opName + ".reshapeExamplId", slice, {numItems}));
2214 }
2215
2216 for (dim_t i = 0; i < numLabels; ++i) {
2217 nodeValueByName_[op.output(i)] = labelValues[i];
2218 }
2219 for (dim_t i = 0; i < numLabels; ++i) {
2220 nodeValueByName_[op.output(numLabels + i)] = exampleIds[i];
2221 }
2222 if (keepGradientOffsetMap) {
2223 nodeValueByName_[op.output(2 * numLabels)] =
2224 node->getNthResult(SparseLabelSplitNode::GradientOffsetMapIdx);
2225 }
2226 return Error::success();
2227 }
2228
2229 if (typeName == "Log1p") {
2230 NodeValue in;
2231 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
2232
2233 Node *ones = G_->createSplat(opName + ".ones", in.getType(), 1.0f);
2234 Node *add = G_->createAdd(opName + ".add", in, ones);
2235 Node *node = G_->createLog(opName + ".log", add);
2236
2237 RETURN_IF_ERR(addNodeAsOutput(op, node));
2238 return Error::success();
2239 }
2240
2241 if (typeName == "ReduceBackMean") {
2242 const unsigned numInputs = op.input_size();
2243 RETURN_ERR_IF_NOT(numInputs == 1,
2244 opErrMsg(op, "Only single input is supported."));
2245
2246 NodeValue in;
2247 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
2248 RETURN_ERR_IF_NOT(in.dims().size() >= 2,
2249 opErrMsg(op, "Input should be at least 2D."));
2250
2251 int numReduceDim = 1;
2252 if (dict.count("num_reduce_dim")) {
2253 ASSIGN_VALUE_OR_RETURN_ERR(numReduceDim, loadInt(dict["num_reduce_dim"]));
2254 }
2255 // TODO: check maybe we can support more dimensions to be reduced
2256 RETURN_ERR_IF_NOT(numReduceDim == 1,
2257 opErrMsg(op, "Supporting reducing only one dimension."));
2258
2259 Node *node = G_->createBatchedReduceMean(opName, in, in.dims().size() - 1);
2260 RETURN_IF_ERR(addNodeAsOutput(op, node));
2261 return Error::success();
2262 }
2263
2264 return MAKE_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator."));
2265}
2266
2267template <class TensorProtoType>
2268Error Caffe2ModelLoader::loadInputsWithTensorProtoType(
2269 const caffe2::NetDef &net,
2270 const std::unordered_set<std::string> &initializers,
2271 const TensorProtoType &in) {
2272 // Skip static weights
2273 if (getConstantByNameOrNull(in.name())) {
2274 return Error::success();
2275 }
2276
2277 if (getStaticPlaceholderByNameOrNull(in.name())) {
2278 return Error::success();
2279 }
2280
2281 LoadWeightResult loadRes;
2282 if (auto resOrErr = createAndSetTensorType(in)) {
2283 loadRes = std::move(*resOrErr);
2284 } else {
2285 RETURN_ERR(resOrErr.takeError());
2286 }
2287
2288 bool multiQParamsLoaded = loadRes.scales || loadRes.offsets;
2289 RETURN_ERR_IF_NOT(
2290 (!multiQParamsLoaded || (loadRes.scales && loadRes.offsets)),
2291 "For tensors with separate qparams, both scales and offsets must be "
2292 "loaded");
2293
2294 bool isInput = !initializers.count(in.name());
2295 if (isInput) {
2296 RETURN_ERR_IF_NOT(!clipQuantRangeToFP16_ ||
2297 !loadRes.t->getType().isQuantizedType() ||
2298 loadRes.t->getType().isFusedQuantizedType(),
2299 "Do not support clipQuantRangeToFP16 with unfused "
2300 "quantized input Placeholders: " +
2301 in.name());
2302 Placeholder *placeholder;
2303 ASSIGN_VALUE_OR_RETURN_ERR(
2304 placeholder,
2305 createAndRegisterPlaceholder(in.name(), &loadRes.t->getType()));
2306
2307 inputVarsByName_.try_emplace(in.name(), placeholder);
2308
2309 if (multiQParamsLoaded) {
2310 auto offsetsName = strFormat("%s_loaded_offsets", in.name().c_str());
2311 auto scalesName = strFormat("%s_loaded_scales", in.name().c_str());
2312 Placeholder *offsetsPlaceholder;
2313 Placeholder *scalesPlaceholder;
2314
2315 ASSIGN_VALUE_OR_RETURN_ERR(offsetsPlaceholder,
2316 createAndRegisterPlaceholder(
2317 offsetsName, &loadRes.offsets->getType()));
2318 inputVarsByName_.try_emplace(offsetsName, offsetsPlaceholder);
2319
2320 ASSIGN_VALUE_OR_RETURN_ERR(
2321 scalesPlaceholder,
2322 createAndRegisterPlaceholder(scalesName, &loadRes.scales->getType()));
2323 inputVarsByName_.try_emplace(scalesName, scalesPlaceholder);
2324 }
2325 } else {
2326 RETURN_IF_ERR(createAndRegisterConstant(in.name(), std::move(*loadRes.t)));
2327
2328 if (multiQParamsLoaded) {
2329 auto offsetsName = strFormat("%s_loaded_offsets", in.name().c_str());
2330 auto scalesName = strFormat("%s_loaded_scales", in.name().c_str());
2331 RETURN_IF_ERR(
2332 createAndRegisterConstant(offsetsName, std::move(*loadRes.offsets)));
2333 RETURN_IF_ERR(
2334 createAndRegisterConstant(scalesName, std::move(*loadRes.scales)));
2335 }
2336 }
2337 return Error::success();
2338}
2339
2340Error Caffe2ModelLoader::loadInputs(
2341 const caffe2::NetDef &net,
2342 const std::unordered_set<std::string> &initializers) {
2343 const caffe2::Argument *arg = nullptr, *qarg = nullptr;
2344 for (auto i = 0, e = net.arg_size(); i < e && (!arg || !qarg); ++i) {
2345 if (net.arg(i).name() == "input_shape_info") {
2346 arg = &net.arg(i);
2347 } else if (net.arg(i).name() == "input_qshape_info") {
2348 qarg = &net.arg(i);
2349 }
2350 }
2351
2352 // Load all regular tensor input
2353 if (arg) {
2354 for (const auto &in : arg->tensors()) {
2355 RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>(
2356 net, initializers, in));
2357 }
2358 }
2359
2360 // Load all quantized tensor input
2361 if (qarg) {
2362 for (const auto &in : qarg->qtensors()) {
2363 RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>(
2364 net, initializers, in));
2365 }
2366 }
2367
2368 return Error::success();
2369}
2370
2371Error Caffe2ModelLoader::loadNetwork(caffe2::NetDef &net) {
2372 // Make a claim on the unique name of all output Placeholders.
2373 for (int i = 0; i < net.external_output_size(); i++) {
2374 auto &outputName = net.external_output(i);
2375 mod_.registerStorageName(legalizeName(outputName));
2376 }
2377
2378 /// Load the network operators:
2379 for (int i = 0; i < net.op_size(); i++) {
2380 auto &op = net.op(i);
2381
2382 // Set up current partition to load into if relevant.
2383 if (partNameToFun_.size()) {
2384 auto &pName = op.device_option().node_name();
2385 auto it = partNameToFun_.find(pName);
2386 RETURN_ERR_IF_NOT(
2387 it != partNameToFun_.end(),
2388 strFormat("Did not find partition with name %s", pName.c_str()));
2389 G_ = it->second;
2390 }
2391 RETURN_ERR_IF_NOT(G_, "Internal Glow error; Graph was not valid.");
2392
2393 if (constFoldInLoader_) {
2394 auto tryFold = foldOperator(op);
2395 if (!tryFold) {
2396 // Error during constant folding; load the op normally below.
2397 const std::string errStr = ERR_TO_STRING(tryFold.takeError());
2398 VLOG(1) << "Error while trying to ConstantFold " << loadOperatorName(op)
2399 << ": " << errStr;
2400 } else if (tryFold.get()) {
2401 // Folded successfully, so skip loading the op below.
2402 continue;
2403 }
2404 }
2405 RETURN_IF_ERR(loadOperator(op));
2406 }
2407
2408 RETURN_ERR_IF_NOT(net.external_output_size(),
2409 "Network needs external outputs defined.");
2410
2411 for (int i = 0; i < net.external_output_size(); i++) {
2412 auto &outputName = net.external_output(i);
2413 NodeValue r;
2414 // We want to create the save node in the same Function as the original
2415 // NodeValue. Thus here we ignore the source function when getting the NV,
2416 // which avoids copying the NV to whatever G_ currently is via an
2417 // intermediate Placeholder.
2418 ASSIGN_VALUE_OR_RETURN_ERR(
2419 r, getNodeValueByName(outputName, /* ignoreSrcFun */ true));
2420
2421 PlaceholderList &PHList = mod_.getPlaceholders();
2422 // Create a Placeholder with the previously claimed name.
2423 auto *PH =
2424 new Placeholder(legalizeName(outputName), mod_.uniqueType(*r.getType()),
2425 false, ANY_LAYOUT);
2426 PHList.push_back(PH);
2427 // If r is storage then just use the current last Function to save, since
2428 // we're just saving directly from a Storage node anyway.
2429 Function *F = llvm::isa<Storage>(r) ? G_ : r.getNode()->getParent();
2430 assert(F && "F must be valid here.");
2431 auto *SN = F->createSave(outputName, r, PH);
2432 outputVarsByName_[outputName] = SN->getPlaceholder();
2433 }
2434 return Error::success();
2435}
2436
2437/// Fills \p T with data from \p values.
2438template <typename ElemTy, typename RangeTy>
2439static Error fillTensor(Tensor &T, ElemKind kind, llvm::ArrayRef<dim_t> dim,
2440 RangeTy values) {
2441 T.reset(kind, dim);
2442 auto TH = T.getHandle<ElemTy>();
2443 RETURN_ERR_IF_NOT((size_t)values.size() == T.size(),
2444 llvm::formatv("Wrong number of values for GivenTensorFill "
2445 "({0} given, {1} expected)",
2446 values.size(), T.size())
2447 .str());
2448 size_t i = 0;
2449 for (auto num : values) {
2450 TH.raw(i++) = num;
2451 }
2452 return Error::success();
2453}
2454
2455Error Caffe2ModelLoader::loadWeight(const caffe2::OperatorDef &op) {
2456 ArgumentDictionaryTy dict = loadArgumentMap(op);
2457 const std::string &typeName = op.type();
2458 const std::string &opName = loadOperatorName(op);
2459 // Load tensors with values:
2460 if (typeName == "GivenTensorFill" || typeName == "GivenTensorFp16Fill" ||
2461 typeName == "GivenTensorIntFill" || typeName == "GivenTensorInt64Fill") {
2462 /*
2463 * op {
2464 * output: "conv1_w"
2465 * name: ""
2466 * type: "GivenTensorFill"
2467 * arg {
2468 * name: "shape"
2469 * ints: 96
2470 * ints: 3
2471 * ints: 11
2472 * ints: 11
2473 * }
2474 * arg {
2475 * name: "values"
2476 * floats: -0.028315347
2477 * ...
2478 * }
2479 * }
2480 */
2481
2482 // Note: Explicitly allow for an empty dim here, representing a scalar value
2483 // will be loaded below.
2484 std::vector<dim_t> dim;
2485 ASSIGN_VALUE_OR_RETURN_ERR(
2486 dim, getShape<dim_t>(dict["shape"], /* allowEmptyShape */ true));
2487 auto const &values = dict["values"];
2488 RETURN_ERR_IF_NOT(
2489 op.output_size() == 1,
2490 opErrMsg(
2491 op, strFormat(
2492 "GivenTensorFill must have exactly 1 output, but found %d ",
2493 op.output_size())));
2494 Tensor T;
2495 if (typeName == "GivenTensorFill") {
2496 RETURN_IF_ERR(
2497 fillTensor<float>(T, ElemKind::FloatTy, dim, values->floats()));
2498 } else if (typeName == "GivenTensorFp16Fill") {
2499 RETURN_IF_ERR(
2500 fillTensor<float16_t>(T, ElemKind::Float16Ty, dim, values->floats()));
2501 } else if (typeName == "GivenTensorIntFill") {
2502 RETURN_IF_ERR(
2503 fillTensor<int32_t>(T, ElemKind::Int32ITy, dim, values->ints()));
2504 } else if (typeName == "GivenTensorInt64Fill") {
2505 RETURN_IF_ERR(
2506 fillTensor<int64_t>(T, ElemKind::Int64ITy, dim, values->ints()));
2507 } else {
2508 return MAKE_ERR(
2509 strFormat("Unhandled tensor fill type: %s", typeName.c_str()));
2510 }
2511 RETURN_IF_ERR(createAndRegisterConstant(op.output().Get(0), std::move(T)));
2512 return Error::success();
2513 }
2514
2515 if (typeName == "GivenTensorByteStringToUInt8Fill") {
2516 /*
2517 output: "data"
2518 type: "GivenTensorByteStringToUInt8Fill"
2519 arg {
2520 name: "shape"
2521 ints: 3
2522 ints: 10
2523 }
2524 arg {
2525 name: "values"
2526 s:
2527 "\000\377\152\232\115\072\000\000\200\077\000\377\050\132\215\073\063\063\023\100\000\377\314\063\232\073\000\000\220\100"
2528 }
2529 */
2530
2531 for (auto &o : op.output()) {
2532 Tensor T;
2533 if (getConstantByNameOrNull(o)) {
2534 continue;
2535 }
2536 std::vector<dim_t> dim;
2537 ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape"]));
2538 T.reset(ElemKind::UInt8QTy, dim, 0.0, 0);
2539 auto TH = T.getHandle<uint8_t>();
2540 RETURN_ERR_IF_NOT(
2541 dict["values"]->strings().size() == 1,
2542 "Expect single string input for GivenTensorByteStringToUInt8Fill");
2543 const std::string &str = dict["values"]->strings().Get(0);
2544
2545 size_t pos;
2546 for (pos = 0; pos < str.size(); pos++) {
2547 TH.raw(pos) = (uint8_t)str[pos];
2548 }
2549
2550 RETURN_ERR_IF_NOT(
2551 pos == T.size(),
2552 strFormat("The number of serialized values (%li) does not "
2553 "match the size of the tensor (%li).",
2554 pos, (size_t)T.size()));
2555 RETURN_IF_ERR(createAndRegisterConstant(o, std::move(T)));
2556 }
2557 return Error::success();
2558 }
2559
2560 // Load quantized tensors:
2561 if (typeName == "Int8GivenTensorFill" ||
2562 typeName == "Int8GivenIntTensorFill") {
2563 /*
2564 output: "conv1_w"
2565 name: ""
2566 type: "Int8GivenTensorFill"
2567 arg {
2568 name: "shape"
2569 ints: 96
2570 ints: 3
2571 ints: 11
2572 ints: 11
2573 }
2574 arg {
2575 name: "values"
2576 s: "\x7f\x80\x80\x7"
2577 }
2578 arg {
2579 name: "Y_scale"
2580 f: 0.00044428
2581 }
2582 arg {
2583 name: "Y_zero_point"
2584 i: 127
2585 }
2586 */
2587 for (auto &o : op.output()) {
2588 Tensor T;
2589 if (getConstantByNameOrNull(o)) {
2590 continue;
2591 }
2592
2593 std::vector<dim_t> dim;
2594 ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape"]));
2595
2596 RETURN_ERR_IF_NOT(dict.count("Y_zero_point"),
2597 ("missing zero point for quantized output type"));
2598 RETURN_ERR_IF_NOT(dict.count("Y_scale"),
2599 ("missing Y_scale for quantized output type"));
2600
2601 float scale;
2602 ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["Y_scale"]));
2603 (void)scale;
2604 int32_t offset;
2605 ASSIGN_VALUE_OR_RETURN_ERR(offset, loadInt(dict["Y_zero_point"]));
2606 (void)offset;
2607 size_t i = 0;
2608 if (typeName == "Int8GivenTensorFill") {
2609 // Although in Caffe2 quantized model, the weights is int8 quantized,
2610 // the weights is stored in uint8_t format due to that Caffe2 requires
2611 // the type of input and weights must be the same. Therefore, we need
2612 // to convert it to int8 by subtracting 128.
2613 TypeRef ty;
2614 ASSIGN_VALUE_OR_RETURN_ERR(
2615 ty, loadQuantTy(o, ElemKind::Int8QTy, dim, dict,
2616 /* skipClipQuantRangeToFP16 */ true));
2617 T.reset(*ty);
2618 auto TH = T.getHandle<int8_t>();
2619 std::string str = dict["values"]->s();
2620 for (; i < str.size(); i++) {
2621 TH.raw(i) = ((uint8_t)(str.c_str()[i]) - UINT8_TO_INT8_SHIFT);
2622 }
2623 } else {
2624 TypeRef ty;
2625 ASSIGN_VALUE_OR_RETURN_ERR(
2626 ty, loadQuantTy(o, ElemKind::Int32QTy, dim, dict,
2627 /* skipClipQuantRangeToFP16 */ true));
2628 T.reset(*ty);
2629 auto TH = T.getHandle<int32_t>();
2630 for (auto num : dict["values"]->ints()) {
2631 TH.raw(i++) = num;
2632 }
2633 }
2634
2635 // If we're clipping quantized ranges tp FP16, then we need to rescale the
2636 // Tensor and update its type.
2637 if (clipQuantRangeToFP16_) {
2638 const ElemKind k = T.getType().getElementType();
2639 const auto qMinMax = getQuantizedValueRange(T.getType().getScale(),
2640 T.getType().getOffset(), k);
2641 const float newMin = std::max(qMinMax.first, kMinFP16);
2642 const float newMax = std::min(qMinMax.second, kMaxFP16);
2643 if (newMin != qMinMax.first || newMax != qMinMax.second) {
2644 auto rescaledT = glow::make_unique<Tensor>();
2645 dispatchQuantizedImpl(rescaleQTensor, k, T, *rescaledT, newMin,
2646 newMax);
2647 T = std::move(*rescaledT);
2648 }
2649 }
2650
2651 RETURN_ERR_IF_NOT(
2652 i == T.size(),
2653 strFormat("The number of serialized values (%li) does not "
2654 "match the size of the tensor (%li).",
2655 i, (size_t)T.size()));
2656
2657 RETURN_IF_ERR(createAndRegisterConstant(o, std::move(T)));
2658 }
2659
2660 return Error::success();
2661 }
2662
2663 // Load tensors with constant fill:
2664 if (typeName == "ConstantFill") {
2665 /*
2666 output: "data"
2667 name: ""
2668 type: "ConstantFill"
2669 arg {
2670 name: "shape"
2671 ints: 1
2672 }
2673 */
2674
2675 const auto &name = op.output(0);
2676 // If the tensor is pre-populated by the user of this class then we don't
2677 // need to allocate a new tensor.
2678 if (getConstantByNameOrNull(name)) {
2679 return Error::success();
2680 }
2681
2682 // The shape is set either the shape argument, or from another input
2683 // tensor. Shape takes priority over input.
2684 std::vector<dim_t> dims;
2685 if (dict.count("shape")) {
2686 ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["shape"]));
2687 } else {
2688 RETURN_ERR_IF_NOT(op.input_size() > 0,
2689 "If no shape provided, must have input shape.");
2690
2691 bool inputAsShape = false;
2692 if (dict.count("input_as_shape")) {
2693 ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape,
2694 loadInt(dict["input_as_shape"]));
2695 }
2696
2697 if (inputAsShape) {
2698 // It must be registered as a Constant because it must be statically set
2699 // already, as shapes must be statically known.
2700 Constant *in;
2701 ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(0)));
2702 RETURN_ERR_IF_NOT(in->dims().size() == 1,
2703 opErrMsg(op, "Input must be 1D tensor."));
2704 RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy,
2705 opErrMsg(op, "Input must be of int64 type."));
2706 const auto handle = in->getHandle<int64_t>();
2707 dims.reserve(in->dims().size());
2708 for (auto dim : handle) {
2709 dims.push_back(dim);
2710 }
2711 } else {
2712 NodeValue in;
2713 ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(0)));
2714 dims = in.dims();
2715 }
2716 }
2717
2718 int to = caffe2::TensorProto_DataType_FLOAT;
2719 if (dict.count("dtype")) {
2720 ASSIGN_VALUE_OR_RETURN_ERR(to, loadInt(dict["dtype"]));
2721 }
2722
2723 SplatNode *splatNode{nullptr};
2724
2725 switch (to) {
2726 case caffe2::TensorProto_DataType_FLOAT: {
2727 float f = 0.0f;
2728 if ((dict.count("value") && dict["value"]->has_f())) {
2729 ASSIGN_VALUE_OR_RETURN_ERR(f, loadFloat(dict["value"]));
2730 }
2731 splatNode =
2732 G_->createSplat(opName, mod_.uniqueType(ElemKind::FloatTy, dims), f);
2733 break;
2734 }
2735 case caffe2::TensorProto_DataType_INT32: {
2736 int i = 0;
2737 if ((dict.count("value") && dict["value"]->has_i())) {
2738 ASSIGN_VALUE_OR_RETURN_ERR(i, loadInt(dict["value"]));
2739 }
2740 splatNode =
2741 G_->createSplat(opName, mod_.uniqueType(ElemKind::Int32ITy, dims), i);
2742 break;
2743 }
2744 case caffe2::TensorProto_DataType_INT64:
2745 case caffe2::TensorProto_DataType_BOOL: {
2746 int i = 0;
2747 if ((dict.count("value") && dict["value"]->has_i())) {
2748 ASSIGN_VALUE_OR_RETURN_ERR(i, loadInt(dict["value"]));
2749 }
2750 splatNode =
2751 G_->createSplat(opName, mod_.uniqueType(ElemKind::Int64ITy, dims), i);
2752 break;
2753 }
2754 default:
2755 return MAKE_ERR("Unsupported datatype for ConstantFill.");
2756 }
2757
2758 RETURN_IF_ERR(addNodeAsOutput(op, splatNode));
2759
2760 return Error::success();
2761 }
2762
2763 if (typeName == "UniformFill") {
2764 /*
2765 output: "fc/w"
2766 name: ""
2767 type: "UniformFill"
2768 arg {
2769 name: "max"
2770 f: 0.25
2771 }
2772 arg {
2773 name: "shape"
2774 ints: 1
2775 ints: 16
2776 }
2777 arg {
2778 name: "min"
2779 f: -0.25
2780 }
2781 */
2782 const auto &name = op.output(0);
2783 Tensor T;
2784 std::vector<dim_t> dim;
2785 if (dict.count("shape")) {
2786 ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape"]));
2787 } else {
2788 RETURN_ERR_IF_NOT(op.input_size() > 0,
2789 "If no shape provided, must have input shape.");
2790
2791 bool inputAsShape = false;
2792 if (dict.count("input_as_shape")) {
2793 ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape,
2794 loadInt(dict["input_as_shape"]));
2795 }
2796
2797 if (inputAsShape) {
2798 Constant *in;
2799 ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(0)));
2800 RETURN_ERR_IF_NOT(in->dims().size() == 1,
2801 opErrMsg(op, "Input must be 1D tensor."));
2802 RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy,
2803 opErrMsg(op, "Input must be of int64 type."));
2804 const auto handle = in->getHandle<int64_t>();
2805 dim.reserve(in->dims().size());
2806 for (auto d : handle) {
2807 dim.push_back(d);
2808 }
2809 } else {
2810 NodeValue input;
2811 ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0)));
2812 dim = input.dims();
2813 }
2814 }
2815 T.reset(ElemKind::FloatTy, dim);
2816 auto TH = T.getHandle<>();
2817 float tensorMin;
2818 ASSIGN_VALUE_OR_RETURN_ERR(tensorMin, loadFloat(dict["min"]));
2819 float tensorMax;
2820 ASSIGN_VALUE_OR_RETURN_ERR(tensorMax, loadFloat(dict["max"]));
2821
2822 DLOG(INFO)
2823 << "The model contains UniformFill operator, which generates random "
2824 "numbers. This could be source of discrepancy.";
2825
2826 // Uniformly generate random numbers in [tensorMin; tensorMax).
2827 for (auto &elem : TH) {
2828 elem = mod_.getPRNG().nextRandReal(tensorMin, tensorMax);
2829 }
2830
2831 RETURN_IF_ERR(createAndRegisterConstant(name, std::move(T)));
2832
2833 return Error::success();
2834 }
2835
2836 // Load tensors with constant fill:
2837 if (typeName == "GaussianFill") {
2838 /*
2839 output: "data"
2840 name: ""
2841 type: "GaussianFill"
2842 arg {
2843 name: "mean"
2844 f: 0.0
2845 }
2846 arg {
2847 name: "std"
2848 f: 1.0
2849 }
2850 arg {
2851 name: "shape"
2852 ints: 1
2853 ints: 16
2854 }
2855 */
2856
2857 const auto &name = op.output(0);
2858 if (getConstantByNameOrNull(name)) {
2859 return Error::success();
2860 }
2861
2862 // The shape of the output is set by shape, if provided. Otherwise, it is
2863 // set by the shape of the input or the shape indicated by input if
2864 // input_as_shape is true
2865 NodeValue input;
2866 std::vector<dim_t> dims;
2867 if (dict.count("shape")) {
2868 ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["shape"]));
2869 } else {
2870 RETURN_ERR_IF_NOT(op.input_size() > 0,
2871 "If no shape provided, must have input shape.");
2872
2873 bool inputAsShape = false;
2874 if (dict.count("input_as_shape")) {
2875 ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape,
2876 loadInt(dict["input_as_shape"]));
2877 }
2878
2879 if (inputAsShape) {
2880 Constant *in;
2881 ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(0)));
2882 RETURN_ERR_IF_NOT(in->dims().size() == 1,
2883 opErrMsg(op, "Input must be 1D tensor."));
2884 RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy,
2885 opErrMsg(op, "Input must be of int64 type."));
2886 const auto handle = in->getHandle<int64_t>();
2887 dims.reserve(in->dims().size());
2888 for (auto dim : handle) {
2889 dims.push_back(dim);
2890 }
2891 } else {
2892 ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(0)));
2893 dims = input.dims();
2894 }
2895
2896 if (dict.count("extra_shape")) {
2897 std::vector<dim_t> extra_shape;
2898 ASSIGN_VALUE_OR_RETURN_ERR(extra_shape,
2899 getShape<dim_t>(dict["extra_shape"]));
2900 dims.insert(dims.end(), extra_shape.begin(), extra_shape.end());
2901 }
2902 }
2903 if ((!input && !dims.empty()) || input.dims().vec() != dims) {
2904 input =
2905 G_->createSplat("in", mod_.uniqueType(ElemKind::FloatTy, dims), 0.);
2906 }
2907 float mean;
2908 ASSIGN_VALUE_OR_RETURN_ERR(mean, loadFloat(dict["mean"]));
2909 float scale;
2910 ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["std"]));
2911
2912 auto GF = G_->createGaussianFill(opName, input, mean, scale,
2913 std::random_device{}());
2914 auto outputType =
2915 mod_.uniqueType(ElemKind::FloatTy, GF->getResult().dims());
2916 auto node = G_->createConvertTo(opName + ".ConvertOutput", GF, outputType);
2917 RETURN_IF_ERR(addNodeAsOutput(op, node));
2918
2919 return Error::success();
2920 }
2921
2922 return MAKE_ERR(unexpectedNodeErrorMessage(op, "Unsupported weight kind"));
2923}
2924
2925Error Caffe2ModelLoader::loadWeightsFromNet(caffe2::NetDef &net) {
2926 for (auto &op : net.op()) {
2927 RETURN_IF_ERR(loadWeight(op));
2928 }
2929 return Error::success();
2930}
2931
2932Caffe2ModelLoader::Caffe2ModelLoader(Function &F, Error *errPtr)
2933 : CommonOperatorLoader({}, {}, &F, errPtr) {
2934 deleteUnusedConstants();
2935}
2936
2937Caffe2ModelLoader::Caffe2ModelLoader(
2938 const std::string &netDescFilename, const std::string &netWeightFilename,
2939 llvm::ArrayRef<const char *> names, llvm::ArrayRef<TypeRef> types,
2940 Function &F, Error *errPtr, OriginNameToTQPMap *originNameToTQPMap,
2941 bool loadUniquedDummyQParams, bool zeroScaleFP16Clip,
2942 bool clipQuantRangeToFP16)
2943 : CommonOperatorLoader(names, types, &F, errPtr,
2944 /* loadIntoExistingModule */ false,
2945 originNameToTQPMap, loadUniquedDummyQParams,
2946 zeroScaleFP16Clip, clipQuantRangeToFP16) {
2947 // if errPtr already contains an error then don't continue with constructor
2948 if (errPtr && *errPtr) {
2949 return;
2950 }
2951
2952 // Lambda to setup the Caffe2ModelLoader and return any Errors that
2953 // were raised.
2954 auto setup = [&]() -> Error {
2955 // The caffe2 network descriptor that we are deserializing.
2956 caffe2::NetDef networkDef;
2957 ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProtoFile(netDescFilename));
2958
2959 // The caffe2 weights that we are deserializing.
2960 caffe2::NetDef weightsDef;
2961 ASSIGN_VALUE_OR_RETURN_ERR(weightsDef, loadProtoFile(netWeightFilename));
2962
2963 RETURN_IF_ERR(loadWeightsFromNet(weightsDef));
2964 RETURN_IF_ERR(loadNetwork(networkDef));
2965
2966 // This is to ensure that the same processing done with
2967 // the same network, even if order of operators is different.
2968 F.orderNodes();
2969 RETURN_ERR_IF_NOT(F.verify(), "Function verification failed.");
2970
2971 deleteUnusedConstants();
2972
2973 RETURN_IF_ERR(verifyDummyQParams());
2974
2975 return Error::success();
2976 };
2977
2978 if (errPtr) {
2979 *errPtr = setup();
2980 } else {
2981 EXIT_ON_ERR(setup());
2982 }
2983}
2984
2985Error Caffe2ModelLoader::initWithModule(caffe2::NetDef &networkDef,
2986 llvm::StringRef funNamePrefix,
2987 runtime::PrePartitionedConfig *PPC) {
2988 // Look for any partitions that will be needed. If there is no
2989 // partition_info then we create a single Function to load into. Otherwise
2990 // we create multiple Functions and switch between them as we load each
2991 // operator.
2992 std::unordered_map<Function *, std::vector<runtime::DeviceIDTy>> funToIDs;
2993 std::unordered_map<Function *, BackendSpecificOptions> funToOpts;
2994 if (networkDef.partition_info_size() == 0) {
2995 G_ = mod_.createFunction(funNamePrefix);
2996 } else {
2997 for (int i = 0; i < networkDef.partition_info_size(); i++) {
2998 const std::string &pName = networkDef.partition_info(i).name();
2999 const std::string funName = funNamePrefix.str() + "_" + pName;
3000 Function *PF = mod_.createFunction(funName);
3001 partNameToFun_[pName] = PF;
3002 for (auto id : networkDef.partition_info(i).device_id()) {
3003 funToIDs[PF].push_back(id);
3004 }
3005
3006 // Now set up device options for this partition.
3007 auto &optsMap = funToOpts[PF];
3008 for (auto &backendOpts : networkDef.partition_info(i).backend_options()) {
3009 const std::string &backendName = backendOpts.backend_name();
3010 for (auto &keyVal : backendOpts.option()) {
3011 optsMap[backendName + "_" + keyVal.key()] = keyVal.val();
3012 }
3013 }
3014 }
3015 }
3016
3017 RETURN_IF_ERR(loadNetwork(networkDef));
3018
3019 // Now setup the pre-partitioned config if relevant.
3020 if (partNameToFun_.size()) {
3021 RETURN_ERR_IF_NOT(
3022 PPC, "Partitioned model but no config to store meta information in.");
3023 PPC->funcName = funNamePrefix.str();
3024
3025 PPC->funcs.reserve(partNameToFun_.size());
3026 PPC->logicalIDs.reserve(partNameToFun_.size());
3027 for (auto &SF : partNameToFun_) {
3028 Function *F = SF.getValue();
3029 // Remove unused Functions from the module and skip them.
3030 if (F->getNodes().size() == 0) {
3031 mod_.eraseFunction(SF.getValue());
3032 continue;
3033 }
3034 // This is to ensure that the same processing done with
3035 // the same network, even if order of operators is different.
3036 F->orderNodes();
3037 PPC->funcs.push_back(F);
3038 PPC->logicalIDs.emplace_back(funToIDs[F]);
3039 PPC->backendSpecificOpts.emplace_back(funToOpts[F]);
3040 // Replication counts not currently loaded through C2, so default to 1.
3041 PPC->replicationCounts.emplace_back(1);
3042 // Backend hints not currently loaded through C2, so use default.
3043 PPC->backendHints.emplace_back();
3044 RETURN_ERR_IF_NOT(F->verify(), "Function verification failed.");
3045 }
3046 }
3047
3048 deleteUnusedConstants();
3049
3050 RETURN_IF_ERR(verifyDummyQParams());
3051
3052 return Error::success();
3053}
3054
3055Caffe2ModelLoader::Caffe2ModelLoader(const std::string &netDescFilename,
3056 const std::string &netWeightFilename,
3057 llvm::ArrayRef<const char *> names,
3058 llvm::ArrayRef<TypeRef> types, Module &mod,
3059 llvm::StringRef funNamePrefix,
3060 runtime::PrePartitionedConfig *PPC,
3061 Error *errPtr)
3062 : CommonOperatorLoader(names, types, mod, errPtr) {
3063 // if errPtr already contains an error then don't continue with constructor
3064 if (errPtr && *errPtr) {
3065 return;
3066 }
3067
3068 // Lambda to setup the Caffe2ModelLoader and return any Errors that
3069 // were raised.
3070 auto setup = [&]() -> Error {
3071 // The caffe2 network descriptor that we are deserializing.
3072 caffe2::NetDef networkDef;
3073 ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProtoFile(netDescFilename));
3074
3075 // The caffe2 weights that we are deserializing.
3076 caffe2::NetDef weightsDef;
3077 ASSIGN_VALUE_OR_RETURN_ERR(weightsDef, loadProtoFile(netWeightFilename));
3078
3079 RETURN_IF_ERR(loadWeightsFromNet(weightsDef));
3080
3081 return initWithModule(networkDef, funNamePrefix, PPC);
3082 };
3083
3084 if (errPtr) {
3085 *errPtr = setup();
3086 } else {
3087 EXIT_ON_ERR(setup());
3088 }
3089}
3090
3091Caffe2ModelLoader::Caffe2ModelLoader(
3092 const std::string &modelStr, uint32_t weightsCount,
3093 const onnxTensorDescriptorV1 *weightDescriptors, Module &dummyMod,
3094 Error *errPtr, OriginNameToTQPMap *originNameToTQPMap,
3095 bool clipQuantRangeToFP16)
3096 : CommonOperatorLoader(
3097 {}, {}, dummyMod, errPtr,
3098 /* loadIntoExistingModule */ false, originNameToTQPMap,
3099 /* loadUniquedDummyQParams */ false, /* replaceDummyTQPs */ false,
3100 /* zeroScaleFP16Clip */ false, clipQuantRangeToFP16) {
3101 if (errPtr && *errPtr) {
3102 return;
3103 }
3104
3105 constFoldInLoader_ = false;
3106
3107 // Lambda to setup the Caffe2ModelLoader and return any Errors that were
3108 // raised.
3109 auto setup = [&]() -> Error {
3110 caffe2::NetDef networkDef;
3111 RETURN_ERR_IF_NOT(
3112 google::protobuf::TextFormat::ParseFromString(modelStr, &networkDef),
3113 "Error loading model from string");
3114
3115 ArgumentDictionaryTy dict = loadArgumentMap(networkDef);
3116
3117 std::unordered_set<std::string> initializers;
3118 if (dict.count("initializers")) {
3119 const auto &strings = dict.at("initializers")->strings();
3120 for (const auto &s : strings) {
3121 initializers.insert(s);
3122 }
3123 }
3124
3125 RETURN_IF_ERR(loadWeights(weightsCount, weightDescriptors));
3126
3127 RETURN_IF_ERR(loadInputs(networkDef, initializers));
3128
3129 // Identify primary input sequence
3130 std::unordered_set<std::string> weights;
3131 for (uint32_t i = 0; i < weightsCount; ++i) {
3132 weights.emplace(weightDescriptors[i].name);
3133 }
3134
3135 runtime::PrePartitionedConfig dummyPPC;
3136 return initWithModule(networkDef, "dummy", &dummyPPC);
3137 };
3138
3139 *errPtr = setup();
3140}
3141
3142Caffe2ModelLoader::Caffe2ModelLoader(
3143 const void *model, uint32_t modelSize, uint32_t weightsCount,
3144 const onnxTensorDescriptorV1 *weightDescriptors, Module &mod,
3145 llvm::StringRef funNamePrefix, runtime::PrePartitionedConfig *PPC,
3146 Error *errPtr, bool constFoldInLoader,
3147 OriginNameToTQPMap *originNameToTQPMap, bool loadUniquedDummyQParams,
3148 bool zeroScaleFP16Clip, bool clipQuantRangeToFP16)
3149 : CommonOperatorLoader({}, {}, mod, errPtr,
3150 /* loadIntoExistingModule */ false,
3151 originNameToTQPMap, loadUniquedDummyQParams,
3152 /* replaceDummyTQPs */ false, zeroScaleFP16Clip,
3153 clipQuantRangeToFP16) {
3154 // if errPtr already contains an error then don't continue with constructor
3155 if (errPtr && *errPtr) {
3156 return;
3157 }
3158
3159 // Always override the default for folding in this constructor.
3160 constFoldInLoader_ = constFoldInLoader;
3161
3162 // Lambda to setup the Caffe2ModelLoader and return any Errors that were
3163 // raised.
3164 auto setup = [&]() -> Error {
3165 caffe2::NetDef networkDef;
3166 ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProto(model, modelSize));
3167
3168 ArgumentDictionaryTy dict = loadArgumentMap(networkDef);
3169
3170 std::unordered_set<std::string> initializers;
3171 if (dict.count("initializers")) {
3172 const auto &strings = dict.at("initializers")->strings();
3173 for (const auto &s : strings) {
3174 initializers.insert(s);
3175 }
3176 }
3177
3178 RETURN_IF_ERR(loadWeights(weightsCount, weightDescriptors));
3179
3180 RETURN_IF_ERR(loadInputs(networkDef, initializers));
3181
3182 // Identify primary input sequence
3183 std::unordered_set<std::string> weights;
3184 for (uint32_t i = 0; i < weightsCount; ++i) {
3185 weights.emplace(weightDescriptors[i].name);
3186 }
3187 for (const auto &input : networkDef.external_input()) {
3188 if (!weights.count(input)) {
3189 positionalInputNames_.emplace_back(input);
3190 }
3191 }
3192 for (const auto &output : networkDef.external_output()) {
3193 positionalOutputNames_.emplace_back(output);
3194 }
3195
3196 return initWithModule(networkDef, funNamePrefix, PPC);
3197 };
3198
3199 if (errPtr) {
3200 *errPtr = setup();
3201 } else {
3202 EXIT_ON_ERR(setup());
3203 }
3204}
3205