Caffe2ModelLoader.cpp source code [glow/lib/Importer/Caffe2ModelLoader.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "glow/Importer/Caffe2ModelLoader.h"
18	#include "glow/Base/Tensor.h"
19	#include "glow/Graph/Graph.h"
20	#include "glow/Graph/Nodes.h"
21	#include "glow/Runtime/RuntimeTypes.h"
22	#include "glow/Support/Error.h"
23
24	#include "llvm/Support/Casting.h"
25
26	#include "caffe2/proto/caffe2.pb.h"
27	#include <google/protobuf/io/coded_stream.h>
28	#include <google/protobuf/io/zero_copy_stream_impl.h>
29
30	#include <cstddef>
31	#include <cstdint>
32	#include <fstream>
33	#include <iostream>
34	#include <string>
35	#include <vector>
36
37	using namespace glow;
38	using llvm::cast;
39
40	using ArgumentDictionaryTy =
41	std::unordered_map<std::string, const caffe2::Argument *>;
42
43	namespace glow {
44	/// Template specialization of loadOperatorName for caffe2.
45	template <>
46	std::string
47	loadOperatorName<caffe2::OperatorDef>(const caffe2::OperatorDef &op) {
48	if (op.name().length()) {
49	return op.name();
50	}
51	if (op.output_size() > `0`) {
52	return op.output(`0`);
53	}
54	return op.type();
55	}
56
57	// FIXME: this is a temporary solution for the case when NonZero returns
58	// -2^31 as the boundary for the returned indices. For examples, currently
59	// we get this NonZero([0, 1, 1, 0, 0]) -> [1, 2, -2^31, 0, 0], because the
60	// shapes are static. This function makes sure that the output looks like
61	// [1, 2, -1, -1, -1] which is more convenient for now.
62	// The logic is we get [1, 2, -2^31, 0, 0], then we convert to [0, 0, 1, 0, 0]
63	// by finding negative element, then do cumsum so we get [0, 0, 1, 1, 1],
64	// then whenever we see 0, we use original value and when we see 1 we use -1,
65	// so it becomes [1, 2, -1, -1, -1].
66	Node fixNonZero(Function F, Module &mod, const std::string opName,
67	NodeValue node) {
68	auto zeroes = F->createSplat(opName + ".fixNZ.zeroes", node.getType(), `0`);
69	auto floatTy = mod.uniqueType(ElemKind::Float16Ty, node.dims());
70	auto minusOnesFloat =
71	F->createSplat(opName + ".fixNZ.minusOnesFloat", floatTy, -`1`);
72	auto zeroesFloat = F->createSplat(opName + ".fixNZ.zeroesFloat", floatTy, `0`);
73	auto onesFloat = F->createSplat(opName + ".fixNZ.onesFloat", floatTy, `1`);
74	auto nodeFloat = F->createConvertTo(opName + ".fixNZ.float", node, floatTy);
75
76	// If there is a boundary, it will be marked as true.
77	auto isNegBool = F->createCmpLT(opName + ".fixNZ.isNegBool", node, zeroes);
78	auto isNegFloat = F->createSelect(opName + ".fixNZ.isNegFloat", isNegBool,
79	onesFloat, zeroesFloat);
80	auto isNegInt = F->createConvertTo(opName + ".fixNZ.isNegInt", isNegFloat,
81	node.getType());
82
83	// After applying cumsum every element before boundary will be 0
84	// and starting from boundary will be 1.
85	auto cumSum = F->createCumSum(opName + ".fixNZ.cumSum", isNegInt, `0`);
86
87	auto isAfterBoundary =
88	F->createCmpGT(opName + ".fixNZ.isAfterBoundary", cumSum, zeroes);
89
90	auto withMinusOnesFloat =
91	F->createSelect(opName + ".fixNZ.withMinusOnesFloat", isAfterBoundary,
92	minusOnesFloat, nodeFloat);
93
94	auto withMinusOnesInt = F->createConvertTo(
95	opName + ".fixNZ.withMinusOnesInt", withMinusOnesFloat, node.getType());
96
97	return withMinusOnesInt;
98	}
99	}; // namespace glow
100
101	/// Legacy padding modes supported in caffe2. These are used by MaxPool
102	/// operators, and are defined in caffe2_legacy.proto in the caffe2 source
103	/// tree.
104	enum LegacyPaddingMode { NOTSET, VALID, SAME, CAFFE_LEGACY_POOLING, N_MODES };
105
106	/// Creates tensor \p T from the input \p in. Note, there is no data associated
107	/// with the Tensor. This method makes sure that the tensor is created with the
108	/// proper shape and element type.
109	Expected<LoadWeightResult>
110	Caffe2ModelLoader::createAndSetTensorType(const caffe2::TensorProto &in) {
111	std::vector<dim_t> dim;
112	for (auto d : in.dims()) {
113	if (d == `0`) {
114	return MAKE_ERR("0 dimension is not supported");
115	}
116	dim.push_back(d);
117	}
118
119	LoadWeightResult result;
120	result.t = glow::make_unique<Tensor>();
121
122	if (in.data_type() == caffe2::TensorProto::FLOAT) {
123	result.t ->reset(ElemKind::FloatTy, dim);
124	} else if (in.data_type() == caffe2::TensorProto::FLOAT16) {
125	result.t ->reset(ElemKind::Float16Ty, dim);
126	} else if (in.data_type() == caffe2::TensorProto::INT32) {
127	result.t ->reset(ElemKind::Int32ITy, dim);
128	} else if (in.data_type() == caffe2::TensorProto::INT64) {
129	result.t ->reset(ElemKind::Int64ITy, dim);
130	} else if (in.data_type() == caffe2::TensorProto::UINT8) {
131	result.t ->reset(ElemKind::UInt8QTy, dim, `1.0`, `0`);
132	} else if (in.data_type() == caffe2::TensorProto::INT8) {
133	result.t ->reset(ElemKind::Int8QTy, dim, `1.0`, `0`);
134	} else {
135	return MAKE_ERR(
136	strFormat("FP32/16, Int32/64, Int8/Uint8 are supported. Got type"
137	" %s for tensor %s.",
138	caffe2::TensorProto_DataType_Name(in.data_type()).c_str(),
139	in.name().c_str()));
140	}
141
142	return Expected<LoadWeightResult>(std::move(result));
143	}
144
145	Expected<LoadWeightResult>
146	Caffe2ModelLoader::createAndSetTensorType(const caffe2::QTensorProto &in) {
147	std::vector<dim_t> dim;
148	for (auto d : in.dims()) {
149	if (d == `0`) {
150	return MAKE_ERR("0 dimension qtensor is not supported");
151	}
152	dim.push_back(d);
153	}
154
155	if (in.axis() != `1`) {
156	return MAKE_ERR("axis must be 1");
157	}
158
159	dim_t qparams = static_cast<dim_t>(in.scales().size());
160
161	RETURN_ERR_IF_NOT(qparams > `0`, "No qparams found");
162
163	RETURN_ERR_IF_NOT(in.biases().size() == in.scales().size(),
164	"Found a different number of biases and scales");
165
166	LoadWeightResult result;
167	result.t = glow::make_unique<Tensor>();
168
169	float scale = `1.0`;
170	int32_t offset = `0`;
171
172	// If only one set of qparams is present then use them, otherwise load the
173	// multiple sets of qparams as separate tensors and use the default qparams
174	// for the main tensor result.t.
175	// TODO: should we check is_multiparam?
176	if (qparams == `1`) {
177	scale = in.scales(`0`);
178	offset = in.biases(`0`);
179	} else {
180	RETURN_ERR_IF_NOT(!originNameToTQPMap_,
181	"Unsupported loading of uniqued qparams for vector of "
182	"scales/biases for " +
183	in.name());
184	result.scales = glow::make_unique<Tensor>(ElemKind::FloatTy,
185	llvm::makeArrayRef({qparams}));
186	result.offsets = glow::make_unique<Tensor>(ElemKind::Int32ITy,
187	llvm::makeArrayRef({qparams}));
188
189	auto scalesH = result.scales ->getHandle<float>();
190	auto offsetsH = result.offsets ->getHandle<int32_t>();
191	for (size_t i = `0`; i < qparams; ++i) {
192	scalesH.raw(i) = in.scales(i);
193	offsetsH.raw(i) = in.biases(i);
194	}
195	}
196
197	if (in.data_type() == caffe2::TensorProto::INT8) {
198	TypeRef outTy;
199	ASSIGN_VALUE_OR_RETURN_ERR(
200	outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int8QTy, dim,
201	scale, offset,
202	/ shiftUInt8ToInt8 / false));
203	result.t ->reset(*outTy);
204	} else if (in.data_type() == caffe2::TensorProto::UINT8) {
205	TypeRef outTy;
206	ASSIGN_VALUE_OR_RETURN_ERR(
207	outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int8QTy, dim,
208	scale, offset));
209	result.t ->reset(*outTy);
210	} else if (in.data_type() == caffe2::TensorProto::INT32) {
211	TypeRef outTy;
212	ASSIGN_VALUE_OR_RETURN_ERR(
213	outTy, ProtobufLoader::loadQuantTy(in.name(), ElemKind::Int32QTy, dim,
214	scale, offset));
215	result.t ->reset(*outTy);
216	} else {
217	return MAKE_ERR("Only int8, uint8, and int32 qtensors are supported");
218	}
219
220	return Expected<LoadWeightResult>(std::move(result));
221	}
222
223	/// Translates the protocol buffer node \p op into a random access map.
224	template <typename T> static ArgumentDictionaryTy loadArgumentMap(const T &t) {
225	ArgumentDictionaryTy dict;
226	for (auto &arg : t.arg()) {
227	dict[arg.name()] = &arg;
228	}
229	return dict;
230	}
231
232	static Expected<std::vector<unsigned_t>> getPads(ArgumentDictionaryTy &dict) {
233	if (dict.count("pad")) {
234	int pad;
235	ASSIGN_VALUE_OR_RETURN_ERR(pad, loadInt(dict.at("pad")));
236	std::vector<unsigned_t> pads(`4`, pad);
237	return pads;
238	}
239	if (dict.count("pad_t")) {
240	std::vector<unsigned_t> pads(`4`);
241	ASSIGN_VALUE_OR_RETURN_ERR(pads[`0`], loadInt(dict.at("pad_t")));
242	RETURN_ERR_IF_NOT(dict.count("pad_l"), "missing pad_l");
243	ASSIGN_VALUE_OR_RETURN_ERR(pads[`1`], loadInt(dict.at("pad_l")));
244	RETURN_ERR_IF_NOT(dict.count("pad_b"), "missing pad_b");
245	ASSIGN_VALUE_OR_RETURN_ERR(pads[`2`], loadInt(dict.at("pad_b")));
246	RETURN_ERR_IF_NOT(dict.count("pad_r"), "missing pad_r");
247	ASSIGN_VALUE_OR_RETURN_ERR(pads[`3`], loadInt(dict.at("pad_r")));
248	return pads;
249	}
250	if (dict.count("pads")) {
251	std::vector<unsigned_t> shape;
252	ASSIGN_VALUE_OR_RETURN_ERR(shape, getShape<unsigned_t>(dict["pads"]));
253	return shape;
254	}
255	// Return default value 0 for pads.
256	return std::vector<unsigned_t>{`0`, `0`, `0`, `0`};
257	}
258
259	/// Translates the "order" field of dictionary \p dict into a channel number.
260	static Expected<unsigned_t> getChannel(ArgumentDictionaryTy &dict) {
261	std::string order = "NCHW"; // default
262	auto orderIt = dict.find("order");
263	if (orderIt != dict.end()) {
264	ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(orderIt ->second));
265	}
266	if (order == "NHWC") {
267	return `3`;
268	} else if (order == "NCHW") {
269	return `1`;
270	}
271	return MAKE_ERR("Invalid order field");
272	}
273
274	static Expected<std::vector<unsigned_t>> getSizeHW(ArgumentDictionaryTy &dict,
275	const std::string &name,
276	unsigned_t defaultValue) {
277	if (dict.count(name)) {
278	int value;
279	ASSIGN_VALUE_OR_RETURN_ERR(value, loadInt(dict[name]));
280	std::vector<unsigned_t> result(`2`, value);
281	return result;
282	}
283	if (dict.count(name + "_h") && dict.count(name + "_w")) {
284	std::vector<unsigned_t> result(`2`);
285	ASSIGN_VALUE_OR_RETURN_ERR(result[`0`], loadInt(dict[name + "_h"]));
286	ASSIGN_VALUE_OR_RETURN_ERR(result[`1`], loadInt(dict[name + "_w"]));
287	return result;
288	}
289	if (dict.count(name + "s")) {
290	return getShape<unsigned_t>(dict [name + "s"]);
291	}
292	return std::vector<unsigned_t>{defaultValue, defaultValue};
293	}
294
295	Expected<caffe2::NetDef>
296	Caffe2ModelLoader::loadProtoFile(const std::string &filename) {
297	std::ifstream ff(filename, std::ios::in \| std::ios::binary);
298	RETURN_ERR_IF_NOT(ff,
299	strFormat("Can't find the model or network files for %s",
300	filename.c_str()));
301	caffe2::NetDef net;
302
303	bool parseNet = false;
304	if (filename.find(".pbtxt") != std::string::npos) {
305	std::string str((std::istreambuf_iterator<char>(ff)),
306	std::istreambuf_iterator<char>());
307	parseNet = google::protobuf::TextFormat::ParseFromString(str, &net);
308	} else {
309	// Construct and configure a Coded Input Stream
310	google::protobuf::io::IstreamInputStream filestr(&ff);
311	google::protobuf::io::CodedInputStream codedstr(&filestr);
312	// Don't warn about large file sizes.
313	#if GOOGLE_PROTOBUF_VERSION >= 3002000
314	codedstr.SetTotalBytesLimit(MAX_PROTO_SIZE);
315	#else
316	codedstr.SetTotalBytesLimit(MAX_PROTO_SIZE, MAX_PROTO_SIZE);
317	#endif
318	parseNet = net.ParseFromCodedStream(&codedstr);
319	}
320
321	RETURN_ERR_IF_NOT(parseNet, "Failed to parse the network descriptor.");
322	return net;
323	}
324
325	Expected<caffe2::NetDef> Caffe2ModelLoader::loadProto(const void *c2Model,
326	size_t c2ModelSize) {
327	google::protobuf::io::ArrayInputStream arrayStream(c2Model, c2ModelSize);
328	// Construct and configure a Coded Input Stream
329	google::protobuf::io::CodedInputStream codedStream(&arrayStream);
330
331	// Don't warn about large file sizes.
332	#if GOOGLE_PROTOBUF_VERSION >= 3002000
333	codedStream.SetTotalBytesLimit(MAX_PROTO_SIZE);
334	#else
335	codedStream.SetTotalBytesLimit(MAX_PROTO_SIZE, MAX_PROTO_SIZE);
336	#endif
337	caffe2::NetDef MP;
338	bool parseNet = MP.ParseFromCodedStream(&codedStream);
339	RETURN_ERR_IF_NOT(parseNet, "Failed to parse NetDef");
340	return MP;
341	}
342
343	Expected<bool> Caffe2ModelLoader::getBroadcast(ArgumentDictionaryTy &dict) {
344	if (!dict.count("broadcast")) {
345	return false;
346	}
347	int broadcast;
348	ASSIGN_VALUE_OR_RETURN_ERR(broadcast, loadInt(dict.at("broadcast")));
349	return broadcast == `1`;
350	}
351
352	bool Caffe2ModelLoader::hasMultidirectionalBroadcast(
353	const llvm::StringRef typeName) {
354	(void)typeName;
355	return false;
356	}
357
358	const std::string Caffe2ModelLoader::opErrMsg(const caffe2::OperatorDef &op,
359	const std::string &errMsg) {
360	const std::string &opName = loadOperatorName(op);
361	return strFormat(" [Operator-'%s'] : %s ", opName.c_str(), errMsg.c_str());
362	}
363
364	// Caffe2 PRelu
365	// https://github.com/pytorch/pytorch/blob/master/caffe2/operators/prelu_op.cc
366	Error Caffe2ModelLoader::loadPRelu(const caffe2::OperatorDef &op,
367	ArgumentDictionaryTy &dict) {
368	const std::string &opName = loadOperatorName(op);
369
370	NodeValue in;
371	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
372
373	NodeValue slope;
374	ASSIGN_VALUE_OR_RETURN_ERR(slope, getNodeValueByName(op.input(`1`)));
375
376	// Do broadcasting.
377	auto targetDim = in.dims();
378	// Set the axis assuming i/p is of NCHW format.
379	int axis = `1`;
380	auto *finalSlope = G_->createBroadcast(opName, slope, targetDim, axis);
381	auto *R = G_->createPRELU(opName, in, finalSlope);
382	RETURN_IF_ERR(addNodeAsOutput(op, R));
383	return Error::success();
384	}
385
386	Error Caffe2ModelLoader::loadSoftmax(const caffe2::OperatorDef &op,
387	ArgumentDictionaryTy &dict) {
388	const std::string &opName = loadOperatorName(op);
389
390	NodeValue in;
391	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
392
393	RETURN_ERR_IF_NOT(
394	in.dims().size() >= `2`,
395	opErrMsg(op,
396	strFormat(
397	"SoftMax input dims must be >= 2, but found input dims %zu ",
398	in.dims().size())));
399
400	// Create a constant to store labels to be used in SoftMaxGradNode.
401	auto *selected = G_->createSplat(
402	opName + ".selected",
403	mod_.uniqueType(ElemKind::Int64ITy, {in.dims()[`0`], `1`}), `0.f`);
404
405	int axis = `1`;
406	if (dict.count("axis")) {
407	ASSIGN_VALUE_OR_RETURN_ERR(axis,
408	loadAxis<int>(dict["axis"], in.dims().size()));
409	}
410
411	auto *FN = G_->createFlatten(opName + ".reshapeInput", in, axis);
412	auto *SM = G_->createSoftMax(opName, FN, selected);
413
414	// The output should have the same shape as the original input.
415	auto origInDims = in.getType()->dims();
416	auto *RN = G_->createReshape(opName + ".reshapeOutput", SM, origInDims);
417	RETURN_IF_ERR(addNodeAsOutput(op, RN));
418	return Error::success();
419	}
420
421	Error Caffe2ModelLoader::loadConv(const caffe2::OperatorDef &op,
422	ArgumentDictionaryTy &dict) {
423	const std::string &opName = loadOperatorName(op);
424
425	// Load the inputs:
426	std::vector<unsigned_t> strides;
427	ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", `1`));
428	std::vector<unsigned_t> pads;
429	ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
430	std::vector<unsigned_t> kernels;
431	ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", `0`));
432	unsigned_t group = `1`;
433	if (dict.count("group")) {
434	ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
435	}
436	std::string order = "NCHW";
437	if (dict.count("order")) {
438	ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
439	}
440	std::vector<unsigned_t> dilations;
441	ASSIGN_VALUE_OR_RETURN_ERR(dilations,
442	getDilations(dict, std::vector<unsigned_t>{`1`, `1`}));
443
444	NodeValue in;
445	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
446
447	NodeValue w;
448	ASSIGN_VALUE_OR_RETURN_ERR(w, getConstantByName(op.input(`1`)));
449
450	// Transpose the weights to the right format. Glow expects to read the
451	// weights in the format CRSK.
452	// C - output_depth, R - filter_height, S - filter_width, K - input_depth.
453	// Caffe2 "Conv" op always stores the weight as CKRS.
454	w = G_->createTranspose(w.getNode()->getName().str() + "_NHWC", w, NCHW2NHWC,
455	"NHWC");
456
457	// The structure of the conv weights is: CRSK. We take the C, which is the
458	// number of filters. We use this value to calculate the size of the bias
459	// if it is not specified.
460	dim_t depth = w.dims()[`0`];
461
462	// We expect the input to be NHWC.
463	NodeValue finalIn;
464	if (order == "NCHW") {
465	finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
466	} else {
467	finalIn = in;
468	}
469
470	TypeRef finalInType = finalIn.getType();
471
472	// Calculate the size and allocate the output buffer.
473	ShapeNHWC idim = ShapeNHWC (finalInType->dims());
474	auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides,
475	pads, dilations);
476	std::array<dim_t, `4`> outDims = {{idim.n, outSz.first, outSz.second, depth}};
477
478	// Try to find a loaded bias constant.
479	NodeValue bias(nullptr);
480	if (op.input_size() > `2`) {
481	const auto &biasName = op.input(`2`);
482	bias = getConstantByNameOrNull(biasName);
483	}
484	// Construct the bias constant if one wasn't found.
485	if (!bias.getNode()) {
486	TypeRef bTy = mod_.uniqueType(ElemKind::FloatTy, {depth});
487	bias = G_->createSplat(opName + ".bias", bTy, `0.f`);
488	}
489
490	TypeRef outTy = mod_.uniqueType(ElemKind::FloatTy, outDims);
491
492	Node *node = G_->createConv(opName, finalIn, w, bias, outTy, kernels, strides,
493	pads, group, dilations);
494	if (op.type() == "ConvRelu") {
495	node = G_->createRELU(opName + ".relu", node);
496	}
497	if (order == "NCHW") {
498	// Transpose the output back.
499	node = G_->createTranspose(opName, node, NHWC2NCHW);
500	}
501	RETURN_IF_ERR(addNodeAsOutput(op, node));
502	return Error::success();
503	}
504
505	Error Caffe2ModelLoader::loadConvQuantized(const caffe2::OperatorDef &op,
506	ArgumentDictionaryTy &dict) {
507	const std::string &opName = loadOperatorName(op);
508
509	// Load the inputs:
510	std::vector<unsigned_t> strides;
511	ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", `1`));
512	std::vector<unsigned_t> pads;
513	ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
514	std::vector<unsigned_t> kernels;
515	ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", `0`));
516	unsigned_t group = `1`;
517	if (dict.count("group")) {
518	ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
519	}
520	std::string order = "NCHW";
521	if (dict.count("order")) {
522	ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
523	}
524	bool quantizeGroupwise = false;
525	if (dict.count("quantize_groupwise")) {
526	ASSIGN_VALUE_OR_RETURN_ERR(quantizeGroupwise,
527	loadInt(dict["quantize_groupwise"]));
528	}
529	std::vector<unsigned_t> dilations;
530	ASSIGN_VALUE_OR_RETURN_ERR(dilations,
531	getDilations(dict, std::vector<unsigned_t>{`1`, `1`}));
532
533	// Group quantization only applies if there is more than one group.
534	quantizeGroupwise &= group > `1`;
535
536	NodeValue in;
537	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
538
539	NodeValue w;
540	ASSIGN_VALUE_OR_RETURN_ERR(w, getConstantByName(op.input(`1`)));
541
542	// Transpose the weights to the right format. Glow expects to read the
543	// weights in the format CRSK.
544	// C - output_depth, R - filter_height, S - filter_width, K - input_depth.
545	// For Caffe2 "Int8Conv" and "Int8ConvRelu", the weights always follows the
546	// "order" arg.
547	if (order != "NHWC") {
548	w = G_->createTranspose(w.getNode()->getName().str() + "_NHWC", w,
549	NCHW2NHWC, "NHWC");
550	}
551
552	// The structure of the conv weights is: CRSK. We take the C, which is the
553	// number of filters. We use this value to calculate the size of the bias
554	// if it is not specified.
555	dim_t depth = w.dims()[`0`];
556
557	// We expect the input to be NHWC.
558	NodeValue finalIn;
559	if (order == "NCHW") {
560	finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
561	} else {
562	finalIn = in;
563	}
564
565	TypeRef finalInType = finalIn.getType();
566
567	// Calculate the size and allocate the output buffer.
568	ShapeNHWC idim = ShapeNHWC (finalInType->dims());
569	auto outSz = calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides,
570	pads, dilations);
571	std::array<dim_t, `4`> outDims = {{idim.n, outSz.first, outSz.second, depth}};
572
573	TypeRef outTy;
574
575	RETURN_ERR_IF_NOT(dict.count("Y_zero_point"),
576	opErrMsg(op,
577	"ConvQuantized "
578	"missing zero point for quantized output type"));
579	RETURN_ERR_IF_NOT(dict.count("Y_scale"),
580	opErrMsg(op, "ConvQuantized "
581	"missing Y_scale for quantized output type"));
582
583	// Try to find a loaded bias constant.
584	NodeValue bias(nullptr);
585	if (op.input_size() > `2`) {
586	const auto &biasName = op.input(`2`);
587	bias = getConstantByNameOrNull(biasName);
588	}
589	// Construct the bias constant if one wasn't found.
590	if (!bias.getNode()) {
591	TypeRef bTy = mod_.uniqueType(ElemKind::Int32QTy, {depth}, `1.0`, `0`);
592	bias = G_->createSplat(opName + "_conv.bias", bTy, `0.f`);
593	}
594
595	RETURN_ERR_IF_NOT(
596	bias.getType()->size() == depth,
597	opErrMsg(op, strFormat("Loaded bias tensor of incorrect size %d ",
598	int(bias.getType()->size()))));
599
600	// Construct output type
601	ASSIGN_VALUE_OR_RETURN_ERR(
602	outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
603
604	Node *node;
605
606	if (quantizeGroupwise) {
607	auto wScalesName = strFormat("%s_loaded_scales", op.input(`1`).c_str());
608	auto wOffsetsName = strFormat("%s_loaded_offsets", op.input(`1`).c_str());
609	Constant *wScales;
610	Constant *wOffsets;
611	ASSIGN_VALUE_OR_RETURN_ERR(wScales, getConstantByName(wScalesName));
612	ASSIGN_VALUE_OR_RETURN_ERR(wOffsets, getConstantByName(wOffsetsName));
613
614	// Quantize the filter automatically (only if it is float). The bias is NOT
615	// quantized automatically and is left at the disposal of each Backend to
616	// quantize it later using custom logic.
617	node = G_->createChannelwiseQuantizedConv(
618	opName, finalIn, w, bias, wScales, wOffsets, / biasScales / nullptr,
619	/ biasOffsets / nullptr, outTy, kernels, strides, pads, group,
620	dilations, / quantizeFilter / true, / quantizeBias / false);
621	} else {
622	// If the bias isn't quantized for a non group quantized conv, quantize it.
623	if (bias.getElementType() == ElemKind::FloatTy) {
624	int32_t biasOffset = `0`;
625	float biasScale = finalInType->getScale() * w.getType()->getScale();
626
627	auto biasTy = mod_.uniqueType(ElemKind::Int32QTy, bias.dims(), biasScale,
628	biasOffset);
629	bias = G_->createQuantize(opName + "_conv.bias", bias, biasTy);
630	}
631
632	node = G_->createConv(opName, finalIn, w, bias, outTy, kernels, strides,
633	pads, group, dilations);
634	}
635
636	if (op.type() == "Int8ConvRelu") {
637	node = G_->createRELU(opName + ".relu", node);
638	}
639
640	if (order == "NCHW") {
641	// Transpose the output back.
642	node = G_->createTranspose(opName, node, NHWC2NCHW);
643	}
644	RETURN_IF_ERR(addNodeAsOutput(op, node));
645	return Error::success();
646	}
647
648	Error Caffe2ModelLoader::loadLayerNorm(const caffe2::OperatorDef &op,
649	ArgumentDictionaryTy &dict) {
650	const std::string &opName = loadOperatorName(op);
651
652	NodeValue in;
653	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
654
655	unsigned_t axis = `1`; // Caffe2 default.
656	if (dict.count("axis")) {
657	ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
658	ASSIGN_VALUE_OR_RETURN_ERR(axis,
659	getPositiveAxis<int>(axis, in.dims().size()));
660	}
661
662	// Feature shape is based on the input dims, from the axis to the end.
663	ShapeVector featDims;
664	for (dim_t i = axis, e = in.dims().size(); i < e; ++i) {
665	featDims.push_back(in.dims()[i]);
666	}
667	TypeRef featTy = mod_.uniqueTypeWithNewShape(in.getType(), featDims);
668
669	NodeValue weight, bias;
670	if (op.input_size() > `1`) {
671	RETURN_ERR_IF_NOT(op.input_size() == `3`,
672	opErrMsg(op, "Must have both weight and bias"));
673
674	ASSIGN_VALUE_OR_RETURN_ERR(weight, getNodeValueByName(op.input(`1`)));
675	RETURN_ERR_IF_NOT(weight.getType() == featTy,
676	opErrMsg(op, "Invalid weight shape"));
677
678	ASSIGN_VALUE_OR_RETURN_ERR(bias, getNodeValueByName(op.input(`2`)));
679	RETURN_ERR_IF_NOT(bias.getType() == featTy,
680	opErrMsg(op, "Invalid bias shape"));
681	} else {
682	// Caffe2 default to use weight 1 and bias 0.
683	weight = G_->createSplat(opName + "_weight_ones", featTy, `1.0`)->getResult();
684	bias = G_->createSplat(opName + "_bias_zeros", featTy, `0.0`)->getResult();
685	}
686
687	float eps = `0.001`; // Caffe2 default.
688	if (dict.count("epsilon")) {
689	ASSIGN_VALUE_OR_RETURN_ERR(eps, loadFloat(dict["epsilon"]));
690	}
691
692	LayerNormalizationNode *node =
693	G_->createLayerNormalization(opName, in.getType(), in, weight, bias, eps);
694
695	// We only support one output for LayoutNorm. Ignoring the
696	// rest of the outputs.
697	RETURN_IF_ERR(addNodeAsOutput(op, node, / numOutputs / `1`));
698
699	return Error::success();
700	}
701
702	Expected<bool> Caffe2ModelLoader::foldOperator(const caffe2::OperatorDef &op) {
703	const unsigned numInputs = op.input_size();
704	const std::string &typeName = op.type();
705	llvm::SmallVector<NodeValue, `4`> inputs;
706	inputs.reserve(numInputs);
707	for (unsigned i = `0`; i < numInputs; i++) {
708	NodeValue in;
709	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i)));
710	inputs.push_back(in);
711	}
712
713	if (!isConstantFoldable(inputs, typeName)) {
714	return false;
715	}
716
717	// Create a temporary lightweight loader to construct function representing
718	// current Op, and then constant fold the function using Interp backend.
719	Function *tmpF = mod_.createFunction("eval_const_fold__");
720	Caffe2ModelLoader tmpLoader(tmpF, nullptr*);
721	bool foldStatus =
722	!ERR_TO_BOOL(constantFoldInLoader<Caffe2ModelLoader, caffe2::OperatorDef>(
723	tmpF, tmpLoader, this, op),
724	/ log / false);
725	mod_.eraseFunction(tmpF);
726	return foldStatus;
727	}
728
729	Error Caffe2ModelLoader::loadConvTranspose(const caffe2::OperatorDef &op,
730	ArgumentDictionaryTy &dict) {
731	const std::string &opName = loadOperatorName(op);
732
733	// Load the inputs:
734	std::vector<unsigned_t> strides;
735	ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", `1`));
736	std::vector<unsigned_t> pads;
737	ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
738	std::vector<unsigned_t> kernels;
739	ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", `0`));
740	unsigned_t group = `1`;
741	if (dict.count("group")) {
742	ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
743	}
744	std::string order = "NCHW";
745	if (dict.count("order")) {
746	ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
747	}
748	std::vector<unsigned_t> dilations;
749	ASSIGN_VALUE_OR_RETURN_ERR(dilations,
750	getDilations(dict, std::vector<unsigned_t>{`1`, `1`}));
751
752	NodeValue in;
753	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
754
755	NodeValue weight;
756	ASSIGN_VALUE_OR_RETURN_ERR(weight, getConstantByName(op.input(`1`)));
757
758	// Transpose the weights to the right format. Glow expects to read the
759	// weights in the format CRSK.
760	// C - output_depth, R - filter_height, S - filter_width, K - input_depth.
761	// Caffe2 "ConvTranspose" op always stores the weight as KCRS.
762	weight = G_->createTranspose(weight.getNode()->getName().str() + "_NHWC",
763	weight, CNHW2NHWC, "NHWC");
764
765	// The structure of the conv weights is: CRSK. We take the C, which is the
766	// number of filters. We use this value to calculate the size of the bias
767	// if it is not specified.
768	dim_t depth = weight.dims()[`0`];
769
770	// We expect the input to be NHWC.
771	NodeValue finalIn;
772	if (order == "NCHW") {
773	finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
774	} else {
775	finalIn = in;
776	}
777
778	TypeRef finalInType = finalIn.getType();
779
780	// Calculate the size and allocate the output buffer.
781	ShapeNHWC idim = ShapeNHWC (finalInType->dims());
782	auto outSz = calculateConvTransposeOutputDims(idim.h, idim.w, kernels,
783	strides, pads, dilations);
784	std::array<dim_t, `4`> outDims = {{idim.n, outSz.first, outSz.second, depth}};
785
786	// Try to find a loaded bias constant.
787	NodeValue bias(nullptr);
788	if (op.input_size() > `2`) {
789	const auto &biasName = op.input(`2`);
790	bias = getConstantByNameOrNull(biasName);
791	}
792	// Construct the bias constant if one wasn't found.
793	if (!bias.getNode()) {
794	TypeRef bTy = mod_.uniqueType(ElemKind::FloatTy, {depth});
795	bias = G_->createSplat(opName + "_conv.bias", bTy, `0.f`);
796	}
797
798	TypeRef outTy = mod_.uniqueType(ElemKind::FloatTy, outDims);
799
800	Node *node =
801	G_->createConvTranspose(opName, finalIn, weight, bias, outTy, kernels,
802	strides, pads, group, dilations);
803
804	if (order == "NCHW") {
805	// Transpose the output back.
806	node = G_->createTranspose(opName, node, NHWC2NCHW);
807	}
808	RETURN_IF_ERR(addNodeAsOutput(op, node));
809	return Error::success();
810	}
811
812	Error Caffe2ModelLoader::loadOperator(const caffe2::OperatorDef &op) {
813	ArgumentDictionaryTy dict = loadArgumentMap(op);
814	const std::string &typeName = op.type();
815	mod_.registerOriginalName(op.name());
816
817	// Check if operator is supported in parent class, CommonOperatorLoader.
818	bool loadCommonOperatorSuccess;
819	ASSIGN_VALUE_OR_RETURN_ERR(loadCommonOperatorSuccess,
820	tryLoadCommonOperator(typeName, op, dict));
821	if (loadCommonOperatorSuccess) {
822	return Error::success();
823	}
824	const std::string &opName = loadOperatorName(op);
825
826	if (typeName == "Gelu") {
827	NodeValue in;
828	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
829	Node *node = G_->createGelu(opName, in);
830
831	RETURN_IF_ERR(addNodeAsOutput(op, node));
832	return Error::success();
833	}
834
835	if (typeName == "Conv" \|\| typeName == "ConvRelu") {
836	return loadConv(op, dict);
837	}
838
839	if (typeName == "Softmax") {
840	return loadSoftmax(op, dict);
841	}
842
843	if (typeName == "PRelu") {
844	return loadPRelu(op, dict);
845	}
846
847	if (typeName == "ConvTranspose") {
848	return loadConvTranspose(op, dict);
849	}
850
851	if (typeName == "Int8Conv" \|\| typeName == "Int8ConvRelu") {
852	return loadConvQuantized(op, dict);
853	}
854
855	if (typeName == "LayerNorm") {
856	return loadLayerNorm(op, dict);
857	}
858
859	if (typeName == "Int8SumRelu") {
860	RETURN_ERR_IF_NOT(op.input_size() == `2`,
861	opErrMsg(op, "Only Sum of 2 inputs is supported."));
862	RETURN_ERR_IF_NOT(
863	dict.count("Y_zero_point"),
864	opErrMsg(op, "missing zero point for quantized outout type"));
865	RETURN_ERR_IF_NOT(
866	dict.count("Y_scale"),
867	opErrMsg(op, "missing Y_scale for quantized output type"));
868	NodeValue in0;
869	ASSIGN_VALUE_OR_RETURN_ERR(in0, getNodeValueByName(op.input(`0`)));
870	NodeValue in1;
871	ASSIGN_VALUE_OR_RETURN_ERR(in1, getNodeValueByName(op.input(`1`)));
872	auto outDims = in0.getType()->dims();
873	TypeRef outTy;
874	ASSIGN_VALUE_OR_RETURN_ERR(
875	outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
876	auto *add = G_->createAdd(opName + ".sum", outTy, in0, in1);
877	auto *relu = G_->createRELU(opName + ".relu", add);
878	RETURN_IF_ERR(addNodeAsOutput(op, relu));
879	return Error::success();
880	}
881
882	if (typeName == "Int8Relu") {
883	RETURN_ERR_IF_NOT(op.input_size() == `1`,
884	opErrMsg(op, "Only one input is supported."));
885	RETURN_ERR_IF_NOT(
886	dict.count("Y_zero_point"),
887	opErrMsg(op, "missing zero point for quantized outout type"));
888	RETURN_ERR_IF_NOT(
889	dict.count("Y_scale"),
890	opErrMsg(op, "missing Y_scale for quantized output type"));
891	NodeValue in;
892	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
893	auto outDims = in.getType()->dims();
894	TypeRef outTy;
895	ASSIGN_VALUE_OR_RETURN_ERR(
896	outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
897	auto *relu = G_->createRELU(opName, in, outTy);
898	RETURN_IF_ERR(addNodeAsOutput(op, relu));
899	return Error::success();
900	}
901
902	if (typeName == "Int8Quantize") {
903	RETURN_ERR_IF_NOT(
904	op.input_size() == `1`,
905	opErrMsg(op, "Glow only supports Int8Quantize with 1 input"));
906	RETURN_ERR_IF_NOT(
907	dict.count("Y_zero_point"),
908	opErrMsg(op, "missing zero point for quantized output type"));
909	RETURN_ERR_IF_NOT(
910	dict.count("Y_scale"),
911	opErrMsg(op, "missing Y_scale for quantized output type"));
912	NodeValue in;
913	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
914	auto outDims = in.getType()->dims();
915	TypeRef outTy;
916	ASSIGN_VALUE_OR_RETURN_ERR(
917	outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
918	Node *N = G_->createQuantize(opName, in, outTy);
919	RETURN_IF_ERR(addNodeAsOutput(op, N));
920	return Error::success();
921	}
922
923	if (typeName == "Int8Dequantize") {
924	NodeValue in;
925	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
926	auto *node = G_->createDequantize(opName, in, ElemKind::FloatTy);
927	RETURN_IF_ERR(addNodeAsOutput(op, node));
928	return Error::success();
929	}
930
931	if (typeName == "MaxPool" \|\| typeName == "AveragePool" \|\|
932	typeName == "Int8MaxPool" \|\| typeName == "Int8AveragePool") {
933	// Load the inputs:
934	NodeValue in;
935	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
936	std::vector<unsigned_t> strides;
937	ASSIGN_VALUE_OR_RETURN_ERR(strides, getSizeHW(dict, "stride", `1`));
938	std::vector<unsigned_t> kernels;
939	ASSIGN_VALUE_OR_RETURN_ERR(kernels, getSizeHW(dict, "kernel", `0`));
940	std::vector<unsigned_t> pads;
941	ASSIGN_VALUE_OR_RETURN_ERR(pads, getPads(dict));
942	bool countIncludePads;
943	ASSIGN_VALUE_OR_RETURN_ERR(
944	countIncludePads, getCountIncludePads(dict, / defaultValue / true));
945	std::string order = "NCHW";
946	if (dict.count("order")) {
947	ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
948	}
949	// We expect the input to be NHWC.
950	NodeValue finalIn;
951	if (order == "NCHW") {
952	finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
953	} else {
954	finalIn = in;
955	}
956
957	// If 'global_pooling' is set then the operation will pool over the size
958	// of the input by doing: kernels = {height, width}.
959	if (dict.count("global_pooling")) {
960	auto Ty = in.getType();
961	kernels [`0`] = Ty->dims()[`2`];
962	kernels [`1`] = Ty->dims()[`3`];
963	}
964
965	// Check the padding style.
966	if (dict.count("legacy_pad")) {
967	int mode;
968	ASSIGN_VALUE_OR_RETURN_ERR(mode, loadInt(dict["legacy_pad"]));
969	// Caffe1 (legacy) rounded-up and Caffe2 rounds down.
970	// This style is deprecated according to caffe2's caffe2_legacy.proto
971	// definition.
972	if (static_cast<LegacyPaddingMode>(mode) ==
973	LegacyPaddingMode::CAFFE_LEGACY_POOLING) {
974	return MAKE_ERR(opErrMsg(op,
975	"MaxPool nodes with legacy caffe padding are "
976	"deprecated and not supported."));
977	}
978	}
979
980	Node node = nullptr*;
981
982	if (typeName == "Int8MaxPool" \|\| typeName == "Int8AveragePool") {
983	// Create the node with quantized type.
984	RETURN_ERR_IF_NOT(
985	dict.count("Y_zero_point"),
986	opErrMsg(op, "missing zero point for quantized output type"));
987	RETURN_ERR_IF_NOT(
988	dict.count("Y_scale"),
989	opErrMsg(op, "missing Y_scale for quantized output type"));
990
991	TypeRef finalInType = finalIn.getType();
992	ShapeNHWC idim = ShapeNHWC (finalInType->dims());
993	auto outSz =
994	calculateConvPoolOutputDims(idim.h, idim.w, kernels, strides, pads);
995	std::array<dim_t, `4`> outDims = {
996	{idim.n, outSz.first, outSz.second, idim.c}};
997	if (typeName == "Int8MaxPool") {
998	// Int8Maxpool output quantization should be same as the input, so
999	// just ignore the given params.
1000	node = G_->createMaxPool(opName, finalIn, kernels, strides, pads);
1001	} else {
1002	TypeRef outTy;
1003	ASSIGN_VALUE_OR_RETURN_ERR(
1004	outTy, loadQuantTy(opName, ElemKind::Int8QTy, outDims, dict));
1005	node = G_->createAvgPool(opName, finalIn, outTy, kernels, strides, pads,
1006	NHWC, countIncludePads);
1007	}
1008	} else if (typeName == "MaxPool") {
1009	node = G_->createMaxPool(opName, finalIn, kernels, strides, pads);
1010	} else {
1011	node = G_->createAvgPool(opName, finalIn, kernels, strides, pads, NHWC,
1012	countIncludePads);
1013	}
1014	if (order == "NCHW") {
1015	unsigned resIdx = `0`;
1016	if (llvm::isa<MaxPoolNode>(node)) {
1017	resIdx = MaxPoolNode::ResultIdx;
1018	} else if (llvm::isa<AvgPoolNode>(node)) {
1019	resIdx = AvgPoolNode::ResultIdx;
1020	} else {
1021	return MAKE_ERR("Expected either Max or Avg Pool.");
1022	}
1023	// Transpose the output back.
1024	node = G_->createTranspose(opName, node->getNthResult(resIdx), NHWC2NCHW);
1025	}
1026	RETURN_IF_ERR(addNodeAsOutput(op, node));
1027	return Error::success();
1028	}
1029
1030	if (typeName == "SpatialBN") {
1031	NodeValue in;
1032	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1033	Constant *scale;
1034	ASSIGN_VALUE_OR_RETURN_ERR(scale, getConstantByName(op.input(`1`)));
1035	Constant *bias;
1036	ASSIGN_VALUE_OR_RETURN_ERR(bias, getConstantByName(op.input(`2`)));
1037	Constant *mean;
1038	ASSIGN_VALUE_OR_RETURN_ERR(mean, getConstantByName(op.input(`3`)));
1039	Constant *var;
1040	ASSIGN_VALUE_OR_RETURN_ERR(var, getConstantByName(op.input(`4`)));
1041	float epsilon = `1e-5f`; // default
1042	auto epsilonIt = dict.find("epsilon");
1043	if (epsilonIt != dict.end()) {
1044	ASSIGN_VALUE_OR_RETURN_ERR(epsilon, loadFloat(epsilonIt ->second));
1045	}
1046
1047	unsigned_t channel;
1048	ASSIGN_VALUE_OR_RETURN_ERR(channel, getChannel(dict));
1049	auto *node = G_->createBatchNormalization(
1050	opName, in.getType(), in, bias, scale, mean, var, channel, epsilon);
1051
1052	RETURN_IF_ERR(addNodeAsOutput(op, node));
1053	return Error::success();
1054	}
1055
1056	if (typeName == "Bucketize") {
1057	NodeValue in;
1058	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1059	RETURN_ERR_IF_NOT(
1060	dict.count("boundaries"),
1061	opErrMsg(op, "Bucketize: Expected a boundaries member vector"));
1062	std::vector<float> boundaries;
1063	ASSIGN_VALUE_OR_RETURN_ERR(boundaries, getFloats(dict["boundaries"]));
1064	auto *node = G_->createBucketizeNode(opName, in, boundaries);
1065	RETURN_IF_ERR(addNodeAsOutput(op, node));
1066	return Error::success();
1067	}
1068
1069	if (typeName == "ResizeNearest") {
1070	NodeValue in;
1071	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1072
1073	std::string order = "NCHW";
1074	if (dict.count("order")) {
1075	ASSIGN_VALUE_OR_RETURN_ERR(order, loadStr(dict["order"]));
1076	}
1077	// We expect the input to be NHWC.
1078	NodeValue finalIn;
1079	if (order == "NCHW") {
1080	finalIn = G_->createTranspose(opName, in, NCHW2NHWC)->getResult();
1081	} else {
1082	finalIn = in;
1083	}
1084
1085	float heightScale;
1086	ASSIGN_VALUE_OR_RETURN_ERR(heightScale, loadFloat(dict["height_scale"]));
1087	float widthScale;
1088	ASSIGN_VALUE_OR_RETURN_ERR(widthScale, loadFloat(dict["width_scale"]));
1089
1090	std::vector<float> scales;
1091	scales.push_back(`1.0f`);
1092	scales.push_back(heightScale);
1093	scales.push_back(widthScale);
1094	scales.push_back(`1.0f`);
1095
1096	auto *node = G_->createResizeNearest(opName, finalIn, scales);
1097	RETURN_IF_ERR(addNodeAsOutput(op, node));
1098	return Error::success();
1099	}
1100
1101	if (typeName == "Concat") {
1102	const unsigned numInputs = op.input_size();
1103	llvm::SmallVector<NodeValue, `4`> inputs;
1104	inputs.reserve(numInputs);
1105	for (unsigned i = `0`; i < numInputs; i++) {
1106	NodeValue in;
1107	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i)));
1108	inputs.push_back(std::move(in));
1109	}
1110
1111	// If axis exists it takes priority over channel.
1112	unsigned_t channel;
1113	if (dict.count("axis")) {
1114	ASSIGN_VALUE_OR_RETURN_ERR(channel, loadInt(dict["axis"]));
1115	} else {
1116	ASSIGN_VALUE_OR_RETURN_ERR(channel, getChannel(dict));
1117	}
1118
1119	unsigned_t addAxis = `0`;
1120	if (dict.count("add_axis")) {
1121	ASSIGN_VALUE_OR_RETURN_ERR(addAxis, loadInt(dict["add_axis"]));
1122	}
1123
1124	Node node{nullptr*};
1125
1126	if (addAxis) {
1127	// When add axis is used, this means we have to add a new dimension
1128	// before the axis, instead of merging on the axis.
1129	std::vector<dim_t> outputDims = inputs [`0`].dims();
1130
1131	if (channel < outputDims.size()) {
1132	unsigned i = `0`;
1133	for (const auto &input : inputs) {
1134	RETURN_ERR_IF_NOT(
1135	outputDims[channel] == input.dims()[channel],
1136	opErrMsg(op,
1137	strFormat("inputs need all to have the same dims for "
1138	"concat with add_axis: input 0 (%s) vs "
1139	"input %u (%s), %u vs %u, channel = %u",
1140	op.input(`0`).c_str(), i, op.input(i).c_str(),
1141	static_cast<unsigned>(outputDims[channel]),
1142	static_cast<unsigned>(input.dims()[channel]),
1143	channel)));
1144	++i;
1145	}
1146	outputDims.insert(outputDims.begin() + channel, numInputs);
1147	node = G_->createConcat(opName, inputs, channel);
1148	node = G_->createReshape(opName, node, outputDims);
1149	} else if (channel == outputDims.size()) {
1150	// We convert inputs into 2D arrays with single columns, thus the
1151	// number of rows will be equal to the product of all original dims.
1152	// Every converted input will look like a vertical line of numbers.
1153	const auto flatVerticalShape = flattenCdr(inputs [`0`].dims(), channel);
1154	llvm::SmallVector<NodeValue, `4`> verticalInputs;
1155	for (auto &input : inputs) {
1156	verticalInputs.push_back(G_->createReshape(
1157	opName, input,
1158	{flatVerticalShape.first, flatVerticalShape.second}));
1159	}
1160
1161	// We glue together the vertical lines, so, the number of columns
1162	// becomes equal to the number of original inputs.
1163	node = G_->createConcat(opName, verticalInputs, `1`);
1164
1165	// Reshape to convert to desired shape.
1166	outputDims.push_back(numInputs);
1167	node = G_->createReshape(opName, node, outputDims);
1168	} else {
1169	return MAKE_ERR(opErrMsg(
1170	op, strFormat("Invalid input: channel (=%u) > number of dims (=%u)",
1171	channel, static_cast<unsigned>(outputDims.size()))));
1172	}
1173	} else {
1174	// In normal case (i.e. when we are not adding a new dimension)
1175	// plain createConcat() would suffice.
1176	node = G_->createConcat(opName, inputs, channel);
1177	}
1178
1179	// If we add the axis then node is a Reshape, otherwise it should be
1180	// Concat.
1181	RETURN_ERR_IF_NOT(
1182	llvm::isa<ConcatNode>(node) \|\| llvm::isa<ReshapeNode>(node),
1183	opErrMsg(op,
1184	"Internal error: Node should either be a Concat or Reshape."));
1185	NodeValue finalNode = llvm::isa<ConcatNode>(node)
1186	? NodeValue (node, ConcatNode::ResultIdx)
1187	: NodeValue (node, ReshapeNode::ResultIdx);
1188	nodeValueByName_[op.output(`0`)] = finalNode;
1189	// Concat may have a second output in Caffe2 (split_info), but we don't
1190	// use it for inference
1191	return Error::success();
1192	}
1193
1194	if (typeName == "FC" \|\| typeName == "FCTransposed" \|\| typeName == "Int8FC" \|\|
1195	typeName == "FbFCPacked") {
1196	RETURN_ERR_IF_NOT(op.input_size() == `3`,
1197	"Glow only suports FC with 3 inputs");
1198	// Load the inputs:
1199	NodeValue in;
1200	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1201
1202	auto originalInputDims = in.getType()->dims();
1203
1204	size_t axis = `1`;
1205	if (dict.count("axis")) {
1206	ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1207	}
1208
1209	// Load weights.
1210	unsigned_t axis_w = `1`;
1211	if (dict.count("axis_w")) {
1212	ASSIGN_VALUE_OR_RETURN_ERR(axis_w, loadInt(dict["axis_w"]));
1213	}
1214
1215	NodeValue W;
1216	if (hasConstantByName(op.input(`1`))) {
1217	ASSIGN_VALUE_OR_RETURN_ERR(W, getConstantByName(op.input(`1`)));
1218	} else {
1219	ASSIGN_VALUE_OR_RETURN_ERR(W, getNodeValueByName(op.input(`1`)));
1220	}
1221
1222	// Caffe2 stores the transposed W matrix. In here we first coerce W to a
1223	// 2D matrix size if necessary and then transpose it back.
1224	auto wDims = flattenCdr(W.dims(), axis_w);
1225	if (W.dims().size() > `2`) {
1226	W = G_->createReshape(W.getNode()->getName(), W,
1227	{wDims.first, wDims.second});
1228	}
1229
1230	if (typeName == "FC" \|\| typeName == "Int8FC" \|\| typeName == "FbFCPacked") {
1231	W = G_->createTranspose(W.getNode()->getName(), W, {`1`, `0`});
1232	}
1233
1234	NodeValue B;
1235	if (hasConstantByName(op.input(`2`))) {
1236	ASSIGN_VALUE_OR_RETURN_ERR(B, getConstantByName(op.input(`2`)));
1237	} else {
1238	ASSIGN_VALUE_OR_RETURN_ERR(B, getNodeValueByName(op.input(`2`)));
1239	}
1240
1241	Node node = nullptr*;
1242	if (typeName == "Int8FC") {
1243	// Create a node with quantized type.
1244	auto outputDims = flattenCdr(in.dims(), axis);
1245	TypeRef outTy;
1246	ASSIGN_VALUE_OR_RETURN_ERR(
1247	outTy, loadQuantTy(opName, ElemKind::Int8QTy,
1248	{outputDims.first, B.dims()[`0`]}, dict));
1249	int dequantizeOutput = `0`;
1250	if (dict.count("dequantize_output")) {
1251	ASSIGN_VALUE_OR_RETURN_ERR(dequantizeOutput,
1252	loadInt(dict["dequantize_output"]));
1253	}
1254	if (dequantizeOutput == `1`) {
1255	node = G_->createDynamicQuantizedFullyConnected(opName, in, W, B);
1256	} else {
1257	node = G_->createFullyConnected(opName, in, W, B, outTy, axis);
1258	}
1259	} else if (typeName == "FbFCPacked") {
1260	RETURN_ERR_IF_NOT(W.getElementType() == ElemKind::Float16Ty,
1261	opErrMsg(op, "Expected float16 weights."));
1262	auto fp16InputType =
1263	mod_.uniqueType(ElemKind::Float16Ty, in.getType()->dims());
1264	in = G_->createConvertTo(opName + ".ConvertInput", in, fp16InputType);
1265
1266	auto fp16BiasType = mod_.uniqueType(ElemKind::Float16Ty, B.dims());
1267	auto *fp16Bias =
1268	G_->createConvertTo(opName + ".ConvertBias", B, fp16BiasType);
1269
1270	auto outputDims = flattenCdr(in.dims(), axis);
1271	TypeRef OT =
1272	mod_.uniqueType(ElemKind::Float16Ty, {outputDims.first, B.dims()[`0`]});
1273	auto fc = G_->createFullyConnected(opName, in, W, fp16Bias, OT, axis);
1274	auto outputType =
1275	mod_.uniqueType(ElemKind::FloatTy, fc->getResult().dims());
1276	node = G_->createConvertTo(opName + ".ConvertOutput", fc, outputType);
1277	} else {
1278	auto outputDims = flattenCdr(in.dims(), axis);
1279	TypeRef outputType =
1280	mod_.uniqueType(ElemKind::FloatTy, {outputDims.first, B.dims()[`0`]});
1281	node = G_->createFullyConnected(opName, in, W, B, outputType, axis);
1282	}
1283
1284	// If number of original input dims is greater than 2, expand the output
1285	// dims back with the same axis.
1286	if (axis != `1`) {
1287	llvm::SmallVector<dim_t, max_tensor_dimensions> reshapeDims;
1288	size_t totalReshapeSize = `1`;
1289	for (size_t i = `0`; i < axis; ++i) {
1290	auto d = originalInputDims [i];
1291	reshapeDims.push_back(d);
1292	totalReshapeSize = static_cast*<dim_t>(d);
1293	}
1294
1295	size_t finalDim = typeName == "FCTransposed" ? wDims.second : wDims.first;
1296
1297	reshapeDims.push_back(finalDim);
1298	totalReshapeSize *= finalDim;
1299
1300	size_t totalOriginalOutputSize = node->getNthResult(`0`).getType()->size();
1301	RETURN_ERR_IF_NOT(
1302	totalReshapeSize == totalOriginalOutputSize,
1303	opErrMsg(op, strFormat("Cannot reshape from size %lu to size %lu",
1304	totalOriginalOutputSize, totalReshapeSize)));
1305
1306	node = G_->createReshape(opName + ".fc.out", node, reshapeDims);
1307	}
1308
1309	// Save the outputs:
1310	RETURN_IF_ERR(addNodeAsOutput(op, node));
1311	return Error::success();
1312	}
1313
1314	if (typeName == "ChannelShuffle") {
1315	NodeValue in;
1316	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1317
1318	size_t group;
1319	ASSIGN_VALUE_OR_RETURN_ERR(group, loadInt(dict["group"]));
1320	size_t kernel;
1321	ASSIGN_VALUE_OR_RETURN_ERR(kernel, loadInt(dict["kernel"]));
1322
1323	Node *node = G_->createChannelShuffle(opName, in, group, kernel);
1324	RETURN_IF_ERR(addNodeAsOutput(op, node));
1325	return Error::success();
1326	}
1327
1328	if (typeName == "Squeeze") {
1329	NodeValue in;
1330	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1331	std::vector<dim_t> dims;
1332	ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["dims"]));
1333	Node *node = G_->createSqueeze(opName, in, dims);
1334	RETURN_IF_ERR(addNodeAsOutput(op, node));
1335	return Error::success();
1336	}
1337
1338	if (typeName == "Log") {
1339	// Load the inputs:
1340	NodeValue in;
1341	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1342	// Create the log:
1343	auto *R = G_->createLog(opName, in);
1344	RETURN_IF_ERR(addNodeAsOutput(op, R));
1345	return Error::success();
1346	}
1347
1348	if (typeName == "Swish") {
1349	NodeValue in;
1350	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1351	auto *S = G_->createSwish(opName, in);
1352	RETURN_IF_ERR(addNodeAsOutput(op, S));
1353	return Error::success();
1354	}
1355
1356	if (typeName == "Logit") {
1357	// Load the input and (optional) epsilon clamping value:
1358	NodeValue input;
1359	ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(`0`)));
1360	auto epsIt = dict.find("eps");
1361	// default: 1e-6 (as in Caffe2)
1362	float eps = `1E-6f`;
1363	if (epsIt != dict.end()) {
1364	ASSIGN_VALUE_OR_RETURN_ERR(eps, loadFloat(epsIt ->second));
1365	}
1366
1367	auto *node = G_->createLogit(opName, input, eps);
1368	// Save the outputs:
1369	RETURN_IF_ERR(addNodeAsOutput(op, node));
1370	return Error::success();
1371	}
1372
1373	if (typeName == "EQ") {
1374	NodeValue in0;
1375	ASSIGN_VALUE_OR_RETURN_ERR(in0, getNodeValueByName(op.input(`0`)));
1376	NodeValue in1;
1377	ASSIGN_VALUE_OR_RETURN_ERR(in1, getNodeValueByName(op.input(`1`)));
1378	auto *node = G_->createCmpEQ(opName, in0, in1);
1379	RETURN_IF_ERR(addNodeAsOutput(op, node));
1380	return Error::success();
1381	}
1382
1383	if (typeName == "Tile") {
1384	NodeValue in;
1385	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1386	unsigned_t tiles;
1387	ASSIGN_VALUE_OR_RETURN_ERR(tiles, loadInt(dict["tiles"]));
1388	unsigned_t axis;
1389	ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1390
1391	auto *node = G_->createTile(opName, in, tiles, axis);
1392	RETURN_IF_ERR(addNodeAsOutput(op, node));
1393	return Error::success();
1394	}
1395
1396	if (typeName == "Free") {
1397	// Glow frees memory automatically.
1398	return Error::success();
1399	}
1400	if (typeName == "StopGradient" \|\| typeName == "ScaleGradient") {
1401	NodeValue in;
1402	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1403	// Currently Caffe2 importer only supports inference.
1404	RETURN_IF_ERR(addNodeAsOutput(op, in));
1405	return Error::success();
1406	}
1407
1408	if (typeName == "Transpose") {
1409	RETURN_IF_ERR(loadTranspose(op, dict, "axes"));
1410	return Error::success();
1411	}
1412
1413	if (typeName == "NCHW2NHWC") {
1414	NodeValue in;
1415	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1416	auto *node = G_->createTranspose(opName, in, NCHW2NHWC);
1417	RETURN_IF_ERR(addNodeAsOutput(op, node));
1418	return Error::success();
1419	}
1420
1421	if (typeName == "CopyCPUToMKL" \|\| typeName == "CopyMKLToCPU" \|\|
1422	typeName == "Copy" \|\| typeName == "EnsureCPUOutput" \|\|
1423	typeName == "EnsureDense" \|\| typeName == "Dropout") {
1424	// Glow does not support any of these ops now, so implement them as
1425	// no-ops. Note: Implement this as a no-op reshape because these ops may
1426	// have partition information, and we need a node to maintain the parent
1427	// Function partition it specified. This reshape will get eliminated later
1428	// on during graph optimizations.
1429	NodeValue in;
1430	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1431	ReshapeNode *RN = G_->createReshape(in.getNode()->getName(), in, in.dims());
1432	RETURN_IF_ERR(addNodeAsOutput(op, RN));
1433	return Error::success();
1434	}
1435
1436	if (typeName == "Slice") {
1437	NodeValue data;
1438	ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(`0`)));
1439
1440	std::vector<ssize_t> starts;
1441	ASSIGN_VALUE_OR_RETURN_ERR(starts, getShape<ssize_t>(dict["starts"]));
1442	std::vector<ssize_t> ends;
1443	ASSIGN_VALUE_OR_RETURN_ERR(ends, getShape<ssize_t>(dict["ends"]));
1444
1445	std::vector<dim_t> newStarts, newEnds;
1446	RETURN_ERR_IF_NOT(
1447	starts.size() == ends.size(),
1448	opErrMsg(op, strFormat(
1449	"Slice starts %lu and %lu ends must be the same size.",
1450	starts.size(), ends.size())));
1451	for (size_t i = `0`; i < starts.size(); i++) {
1452	ssize_t newStart = starts [i];
1453	if (newStart == -`1`) {
1454	newStart = data.dims()[i];
1455	}
1456	RETURN_ERR_IF_NOT(
1457	newStart >= `0`,
1458	opErrMsg(op,
1459	strFormat("Indices should never be negative, but found %lu ",
1460	newStart)));
1461	newStarts.push_back(newStart);
1462
1463	ssize_t newEnd = ends [i];
1464	if (newEnd == -`1`) {
1465	newEnd = data.dims()[i];
1466	}
1467	RETURN_ERR_IF_NOT(
1468	newEnd >= `0`,
1469	opErrMsg(op,
1470	strFormat("Indices should never be negative, but found %lu ",
1471	newEnd)));
1472	newEnds.push_back(newEnd);
1473	}
1474
1475	Node *SN = G_->createSlice(opName, data, newStarts, newEnds);
1476	RETURN_IF_ERR(addNodeAsOutput(op, SN));
1477	return Error::success();
1478	}
1479
1480	if (typeName == "Clip") {
1481	NodeValue in;
1482	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1483	float cmin = std::numeric_limits<float>::lowest();
1484	if (dict.count("min")) {
1485	ASSIGN_VALUE_OR_RETURN_ERR(cmin, loadFloat(dict.find("min")->second));
1486	}
1487
1488	float cmax = std::numeric_limits<float>::max();
1489	if (dict.count("max")) {
1490	ASSIGN_VALUE_OR_RETURN_ERR(cmax, loadFloat(dict.find("max")->second));
1491	}
1492
1493	auto *node = G_->createClip(loadOperatorName(op), in, cmin, cmax);
1494	RETURN_IF_ERR(addNodeAsOutput(op, node));
1495	return Error::success();
1496	}
1497
1498	if (typeName == "MatMul") {
1499	RETURN_IF_ERR(loadMatMul(op, dict));
1500	return Error::success();
1501	}
1502
1503	if (typeName == "Cast") {
1504	NodeValue in;
1505	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1506	int to;
1507	ASSIGN_VALUE_OR_RETURN_ERR(to, loadInt(dict["to"]));
1508
1509	switch (to) {
1510	case caffe2::TensorProto_DataType_FLOAT: {
1511	RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::FloatTy,
1512	opErrMsg(op, "Can only cast float to float."));
1513	break;
1514	}
1515	case caffe2::TensorProto_DataType_INT32: {
1516	RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::Int32ITy,
1517	opErrMsg(op, "Can only cast int32 to int32."));
1518	break;
1519	}
1520	case caffe2::TensorProto_DataType_INT64: {
1521	RETURN_ERR_IF_NOT(in.getElementType() == ElemKind::Int64ITy,
1522	opErrMsg(op, "Can only cast int64 to int64."));
1523	break;
1524	}
1525	default:
1526	return MAKE_ERR(opErrMsg(op, "Unsupported Cast type."));
1527	}
1528
1529	RETURN_IF_ERR(addNodeAsOutput(op, in));
1530	return Error::success();
1531	}
1532
1533	if (typeName == "HalfToFloat") {
1534	NodeValue in;
1535	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1536	auto convertedType =
1537	mod_.uniqueType(ElemKind::FloatTy, in.getType()->dims());
1538	auto *R = G_->createConvertTo(opName + ".ConvertInput", in, convertedType);
1539	RETURN_IF_ERR(addNodeAsOutput(op, R));
1540	return Error::success();
1541	}
1542
1543	if (typeName == "ScatterAssign") {
1544	NodeValue data;
1545	ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(`0`)));
1546	NodeValue indices;
1547	ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(`1`)));
1548	NodeValue slices;
1549	ASSIGN_VALUE_OR_RETURN_ERR(slices, getNodeValueByName(op.input(`2`)));
1550
1551	assert(indices.dims().size() == `1` && "Indices should be 1-dimensional!");
1552	NodeValue indices2D = G_->createReshape(opName + ".indices.2d", indices,
1553	{indices.dims()[`0`], `1`});
1554	Node *SAN = G_->createScatterData(opName, data, indices2D, slices);
1555	RETURN_IF_ERR(addNodeAsOutput(op, SAN));
1556	return Error::success();
1557	}
1558
1559	if (typeName == "ConstantFill" \|\| typeName == "GivenTensorIntFill" \|\|
1560	typeName == "GivenTensorInt64Fill" \|\| typeName == "GaussianFill" \|\|
1561	typeName == "UniformFill") {
1562	RETURN_IF_ERR(loadWeight(op));
1563	return Error::success();
1564	}
1565
1566	if (typeName == "SigmoidCrossEntropyWithLogits") {
1567	NodeValue logits;
1568	ASSIGN_VALUE_OR_RETURN_ERR(logits, getNodeValueByName(op.input(`0`)));
1569	NodeValue targets;
1570	ASSIGN_VALUE_OR_RETURN_ERR(targets, getNodeValueByName(op.input(`1`)));
1571	Node *SCEL =
1572	G_->createSigmoidCrossEntropyWithLogits(opName, logits, targets);
1573	RETURN_IF_ERR(addNodeAsOutput(op, SCEL));
1574	return Error::success();
1575	}
1576
1577	if (typeName == "ElementwiseLinear") {
1578	NodeValue X, w, b;
1579
1580	// If the axis argument does not exist in the protobuf, the default
1581	// value should be 1.
1582	unsigned axis = `1`;
1583
1584	ASSIGN_VALUE_OR_RETURN_ERR(X, getNodeValueByName(op.input(`0`)));
1585	ASSIGN_VALUE_OR_RETURN_ERR(w, getNodeValueByName(op.input(`1`)));
1586	ASSIGN_VALUE_OR_RETURN_ERR(b, getNodeValueByName(op.input(`2`)));
1587
1588	if (dict.count("axis")) {
1589	ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1590	}
1591
1592	Node *EL = G_->createElementwiseLinear(opName, X, w, b, axis);
1593	RETURN_IF_ERR(addNodeAsOutput(op, EL));
1594	return Error::success();
1595	}
1596
1597	if (typeName == "AveragedLoss") {
1598	NodeValue in;
1599	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1600	auto *node = G_->createBatchedReduceMean(opName, in, `0`);
1601	RETURN_IF_ERR(addNodeAsOutput(op, node));
1602	return Error::success();
1603	}
1604
1605	if (typeName == "Mod") {
1606	NodeValue in;
1607	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1608	int64_t divisor;
1609	ASSIGN_VALUE_OR_RETURN_ERR(divisor, loadInt(dict["divisor"]));
1610
1611	RETURN_ERR_IF_NOT(
1612	divisor >= `1`,
1613	opErrMsg(op,
1614	strFormat("Divisor must not be less than 1, but found %ld ",
1615	divisor)));
1616
1617	bool signFollowDivisor = false;
1618	if (dict.count("sign_follow_divisor")) {
1619	ASSIGN_VALUE_OR_RETURN_ERR(signFollowDivisor,
1620	loadInt(dict["sign_follow_divisor"]));
1621	}
1622
1623	auto *node = G_->createModulo(opName, in, divisor, signFollowDivisor);
1624	RETURN_IF_ERR(addNodeAsOutput(op, node));
1625
1626	return Error::success();
1627	}
1628
1629	if (typeName == "Scale") {
1630	NodeValue in;
1631	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1632	float scale = `1.0`;
1633	if (dict.count("scale")) {
1634	ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["scale"]));
1635	}
1636	auto scaleType = mod_.uniqueType(ElemKind::FloatTy, {in.dims()});
1637	auto scales = G_->createSplat(opName + ".scales", scaleType, scale);
1638	Node *node = G_->createMul(opName, in, scales);
1639
1640	RETURN_IF_ERR(addNodeAsOutput(op, node));
1641	return Error::success();
1642	}
1643
1644	if (typeName == "SparseLengthsWeightedSum8BitsRowwise" \|\|
1645	typeName == "SparseLengthsSum8BitsRowwise" \|\|
1646	typeName == "SparseLengthsWeightedSumFused8BitRowwise" \|\|
1647	typeName == "SparseLengthsSumFused8BitRowwise" \|\|
1648	typeName == "SparseLengthsWeightedSumFused4BitRowwise" \|\|
1649	typeName == "SparseLengthsSumFused4BitRowwise") {
1650	const bool isWeighted =
1651	typeName == "SparseLengthsWeightedSum8BitsRowwise" \|\|
1652	typeName == "SparseLengthsWeightedSumFused8BitRowwise" \|\|
1653	typeName == "SparseLengthsWeightedSumFused4BitRowwise";
1654	const bool isFused =
1655	typeName == "SparseLengthsWeightedSumFused8BitRowwise" \|\|
1656	typeName == "SparseLengthsSumFused8BitRowwise" \|\|
1657	typeName == "SparseLengthsWeightedSumFused4BitRowwise" \|\|
1658	typeName == "SparseLengthsSumFused4BitRowwise";
1659	const bool is4Bit =
1660	typeName == "SparseLengthsWeightedSumFused4BitRowwise" \|\|
1661	typeName == "SparseLengthsSumFused4BitRowwise";
1662	// If weighted, then the weights are the second input and so we need to
1663	// shift indices/lengths/scalesBiases.
1664	size_t indicesIdx = `1`;
1665	size_t lengthsIdx = `2`;
1666	size_t scalesBiasesIdx = `3`;
1667	if (isWeighted) {
1668	indicesIdx++;
1669	lengthsIdx++;
1670	scalesBiasesIdx++;
1671	}
1672
1673	NodeValue data;
1674	ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(`0`)));
1675	NodeValue weights;
1676	if (isWeighted) {
1677	ASSIGN_VALUE_OR_RETURN_ERR(weights, getNodeValueByName(op.input(`1`)));
1678	}
1679	NodeValue indices;
1680	ASSIGN_VALUE_OR_RETURN_ERR(indices,
1681	getNodeValueByName(op.input(indicesIdx)));
1682	NodeValue lengths;
1683	ASSIGN_VALUE_OR_RETURN_ERR(lengths,
1684	getNodeValueByName(op.input(lengthsIdx)));
1685	Storage *dataS = llvm::dyn_cast<Storage>(data);
1686
1687	const dim_t numRows = data.dims()[`0`];
1688
1689	// Make sure all the shapes make sense.
1690	RETURN_ERR_IF_NOT(lengths.dims().size() == `1`,
1691	opErrMsg(op, "lengths must be a vector."));
1692	RETURN_ERR_IF_NOT(indices.dims().size() == `1`,
1693	opErrMsg(op, "indices must be a vector."));
1694
1695	LengthsMode lengthsMode;
1696	ASSIGN_VALUE_OR_RETURN_ERR(lengthsMode, getLengthsMode(dict));
1697
1698	float avgLength;
1699	ASSIGN_VALUE_OR_RETURN_ERR(avgLength, getAvgLength(dict));
1700
1701	Node *node;
1702	if (isFused) {
1703	RETURN_IF_ERR(setFusedTy(dataS, is4Bit ? ElemKind::UInt4FusedFP16QTy
1704	: ElemKind::UInt8FusedQTy));
1705
1706	// No other work to do, since the data is already loaded fused, so just
1707	// create the new node with its inputs.
1708	if (isWeighted) {
1709	node = G_->createFusedRowwiseQuantizedSparseLengthsWeightedSum(
1710	opName, dataS, weights, indices, lengths,
1711	/ useFP16Accumulation / false, lengthsMode, avgLength);
1712	} else {
1713	node = G_->createFusedRowwiseQuantizedSparseLengthsSum(
1714	opName, dataS, indices, lengths, / useFP16Accumulation / false,
1715	lengthsMode, avgLength);
1716	}
1717
1718	if (is4Bit) {
1719	node = G_->createConvertTo(opName, node, ElemKind::FloatTy);
1720	}
1721	} else {
1722	NodeValue scalesBiases;
1723	ASSIGN_VALUE_OR_RETURN_ERR(scalesBiases,
1724	getNodeValueByName(op.input(scalesBiasesIdx)));
1725
1726	Constant *scalesBiasesC = llvm::dyn_cast<Constant>(scalesBiases);
1727	RETURN_ERR_IF_NOT(scalesBiasesC,
1728	opErrMsg(op, "scales_biases must be Constant."));
1729	RETURN_ERR_IF_NOT(scalesBiases.dims().size() == `2`,
1730	opErrMsg(op, "scale_bias has to be a matrix."));
1731	RETURN_ERR_IF_NOT(
1732	scalesBiases.dims()[`0`] == numRows,
1733	opErrMsg(
1734	op,
1735	strFormat("scale_bias must have the same number of rows as data, "
1736	"but found scale_bias %d and rows %d ",
1737	int(scalesBiases.dims()[`0`]), int(numRows))));
1738	RETURN_ERR_IF_NOT(
1739	scalesBiases.dims()[`1`] == `2`,
1740	opErrMsg(op,
1741	strFormat("Second dim of scale_bias has to be equal to 2 "
1742	"but found %d ",
1743	int(scalesBiases.dims()[`1`]))));
1744
1745	// Now strip out the scales and biases into their own tensors.
1746	NodeValue sliceScales =
1747	G_->createSlice(scalesBiasesC->getName().str() + "_scale",
1748	scalesBiasesC, {`0`, `0`}, {numRows, `1`});
1749	NodeValue sliceBiases =
1750	G_->createSlice(scalesBiasesC->getName().str() + "_bias",
1751	scalesBiasesC, {`0`, `1`}, {numRows, `2`});
1752	sliceScales =
1753	G_->createReshape(sliceScales.getNode()->getName().str() + "_1D",
1754	sliceScales, {numRows});
1755	sliceBiases =
1756	G_->createReshape(sliceBiases.getNode()->getName().str() + "_1D",
1757	sliceBiases, {numRows});
1758
1759	// Now create the actual node.
1760	if (isWeighted) {
1761	node = G_->createRowwiseQuantizedSparseLengthsWeightedSum(
1762	opName, dataS, sliceScales, sliceBiases, weights, indices, lengths,
1763	/ precision / ElemKind::FloatTy,
1764	/ useFP16Accumulation / false, lengthsMode, avgLength);
1765	} else {
1766	node = G_->createRowwiseQuantizedSparseLengthsSum(
1767	opName, dataS, sliceScales, sliceBiases, indices, lengths,
1768	/ precision / ElemKind::FloatTy,
1769	/ useFP16Accumulation / false, lengthsMode, avgLength);
1770	}
1771	}
1772
1773	RETURN_IF_ERR(addNodeAsOutput(op, node));
1774	return Error::success();
1775	}
1776
1777	if (typeName == "LengthsRangeFill") {
1778	NodeValue lengths;
1779	ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(`0`)));
1780	RETURN_ERR_IF_NOT(lengths.dims().size() == `1`,
1781	opErrMsg(op, "lengths must be a 1D vector."));
1782
1783	auto maxOutputSizeIt = dict.find("maxOutputSize");
1784	RETURN_ERR_IF_NOT(
1785	maxOutputSizeIt != dict.end(),
1786	opErrMsg(op, "Require maxOutputSize when loading LengthsRangeFill."));
1787	unsigned_t maxOutputSize;
1788	ASSIGN_VALUE_OR_RETURN_ERR(maxOutputSize, loadInt(maxOutputSizeIt ->second));
1789
1790	auto *LRF = G_->createLengthsRangeFill(opName, lengths, maxOutputSize);
1791	RETURN_IF_ERR(addNodeAsOutput(op, LRF));
1792
1793	return Error::success();
1794	}
1795
1796	// TODO: add checks for number of inputs and argument values
1797	if (typeName == "ReduceBackSum") {
1798	NodeValue in;
1799	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1800	RETURN_ERR_IF_NOT(in.dims().size() >= `2`,
1801	opErrMsg(op, "Input should be at least 2D."));
1802	Node *node = G_->createBatchedReduceAdd(opName, in, in.dims().size() - `1`);
1803	RETURN_IF_ERR(addNodeAsOutput(op, node));
1804	return Error::success();
1805	}
1806
1807	if (typeName == "RMSNorm") {
1808	NodeValue X, gamma, beta;
1809	ASSIGN_VALUE_OR_RETURN_ERR(X, getNodeValueByName(op.input(`0`)));
1810	RETURN_ERR_IF_NOT(X.dims().size() == `2`,
1811	opErrMsg(op, "X should be a 2D tensor."));
1812	ASSIGN_VALUE_OR_RETURN_ERR(gamma, getNodeValueByName(op.input(`1`)));
1813	RETURN_ERR_IF_NOT(gamma.dims().size() == `1`,
1814	opErrMsg(op, "gamma should be a 1D tensor."));
1815	ASSIGN_VALUE_OR_RETURN_ERR(beta, getNodeValueByName(op.input(`2`)));
1816	RETURN_ERR_IF_NOT(beta.dims().size() == `1`,
1817	opErrMsg(op, "beta should be a 1D tensor."));
1818
1819	float epsilon = `.0f`;
1820	if (dict.count("eps")) {
1821	ASSIGN_VALUE_OR_RETURN_ERR(epsilon, loadFloat(dict["eps"]));
1822	}
1823
1824	auto nodes = G_->createRMSNorm(opName, X, gamma, beta, epsilon);
1825	nodeValueByName_[op.output(`0`)] = nodes [`0`];
1826	nodeValueByName_[op.output(`1`)] = nodes [`1`];
1827	return Error::success();
1828	}
1829
1830	if (typeName == "Mean") {
1831	const unsigned numInputs = op.input_size();
1832	RETURN_ERR_IF_NOT(numInputs > `0`,
1833	opErrMsg(op, "Expect at least one input."));
1834
1835	std::vector<NodeValue> inputs;
1836	inputs.reserve(numInputs);
1837	for (unsigned i = `0`; i < numInputs; i++) {
1838	NodeValue in;
1839	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(i)));
1840	inputs.push_back(std::move(in));
1841	}
1842
1843	// Check that all inputs have the same shape
1844	const auto shape = inputs [`0`].dims();
1845	for (unsigned i = `1`; i < numInputs; i++) {
1846	RETURN_ERR_IF_NOT(
1847	shape == inputs[i].dims(),
1848	opErrMsg(op,
1849	"All inputs should have the same shape, violating input " +
1850	op.input(i)));
1851	}
1852
1853	if (numInputs == `1`) {
1854	RETURN_IF_ERR(addNodeAsOutput(op, inputs[`0`]));
1855	return Error::success();
1856	}
1857
1858	Node *node = G_->createConcat(opName + ".concat", inputs, `0`);
1859
1860	std::vector<dim_t> newShape{numInputs};
1861	newShape.insert(newShape.end(), shape.begin(), shape.end());
1862	node = G_->createReshape(opName + ".reshape", node, newShape);
1863
1864	node = G_->createBatchedReduceMean(opName + ".reduceMean", node, `0`);
1865
1866	RETURN_IF_ERR(addNodeAsOutput(op, node));
1867	return Error::success();
1868	}
1869
1870	if (typeName == "Negative") {
1871	RETURN_IF_ERR(loadNeg(op, dict));
1872	return Error::success();
1873	}
1874
1875	if (typeName == "LpNorm") {
1876	NodeValue in;
1877	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1878
1879	int p = `2`;
1880	if (dict.count("p")) {
1881	ASSIGN_VALUE_OR_RETURN_ERR(p, loadInt(dict["p"]));
1882	RETURN_ERR_IF_NOT(p == `1` \|\| p == `2`,
1883	opErrMsg(op, "p should be either 1 or 2."));
1884	}
1885	bool average = false;
1886	if (dict.count("average")) {
1887	ASSIGN_VALUE_OR_RETURN_ERR(average, loadInt(dict["average"]));
1888	}
1889	RETURN_ERR_IF_NOT(!average, opErrMsg(op, "average is not supported."));
1890
1891	Node node = nullptr*;
1892	if (p == `1`) {
1893	node = G_->createAbs(opName, in);
1894	} else {
1895	node = G_->createPow(opName, in, `2`);
1896	}
1897
1898	const auto dims1D = flattenCdr(in.dims(), in.dims().size());
1899	node = G_->createReshape(opName + ".reshape1D", node, dims1D.first);
1900
1901	auto outputType = mod_.uniqueType(in.getElementType(), {`1`});
1902	node = G_->createBatchedReduceAdd(opName + ".sum", outputType, node, `0`);
1903
1904	RETURN_IF_ERR(addNodeAsOutput(op, node));
1905	return Error::success();
1906	}
1907
1908	if (typeName == "ArgMin") {
1909	NodeValue input;
1910	ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(`0`)));
1911	int axis = `0`;
1912	if (dict.count("axis")) {
1913	ASSIGN_VALUE_OR_RETURN_ERR(axis, loadInt(dict["axis"]));
1914	}
1915	bool keepDims = true;
1916	if (dict.count("keepdims")) {
1917	ASSIGN_VALUE_OR_RETURN_ERR(keepDims, loadInt(dict.at("keepdims")));
1918	}
1919
1920	auto node = G_->createArgMin(opName, input, axis, keepDims);
1921	RETURN_IF_ERR(addNodeAsOutput(op, node));
1922	return Error::success();
1923	}
1924
1925	if (typeName == "Sign") {
1926	NodeValue in;
1927	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1928
1929	Node *zeroes = G_->createSplat(opName + ".zeroes", in.getType(), `0.f`);
1930
1931	Node *isPos = G_->createCmpLT(opName + ".isPos", zeroes, in);
1932	Node *isNeg = G_->createCmpLT(opName + ".isNeg", in, zeroes);
1933
1934	Node *posOnes = G_->createSplat(opName + ".posOnes", in.getType(), `1`);
1935	Node *negOnes = G_->createSplat(opName + ".negOnes", in.getType(), -`1`);
1936
1937	Node *node = G_->createSelect(opName + ".fillPos", isPos, posOnes, zeroes);
1938	node = G_->createSelect(opName + ".fillNeg", isNeg, negOnes, node);
1939
1940	RETURN_IF_ERR(addNodeAsOutput(op, node));
1941	return Error::success();
1942	}
1943
1944	if (typeName == "Softplus") {
1945	NodeValue in;
1946	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1947
1948	Node *node = G_->createSoftPlus(opName, in);
1949
1950	RETURN_IF_ERR(addNodeAsOutput(op, node));
1951	return Error::success();
1952	}
1953
1954	if (typeName == "TopK") {
1955	NodeValue in;
1956	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
1957	RETURN_ERR_IF_NOT(
1958	op.input_size() <= `2`,
1959	opErrMsg(
1960	op,
1961	strFormat(
1962	"TopK: Maximum number of inputs is 2, but found input size %d ",
1963	op.input_size())));
1964	unsigned_t k = `0`;
1965	if (op.input_size() > `1`) {
1966	Constant *kConst = getConstantByNameOrNull(op.input(`1`));
1967	RETURN_ERR_IF_NOT(
1968	kConst,
1969	opErrMsg(op, "TopK: Non-constant k is not supported by Glow."));
1970	RETURN_ERR_IF_NOT(
1971	kConst->getElementType() == ElemKind::Int64ITy,
1972	opErrMsg(op, strFormat(
1973	"TopK: k input must be of type Int64, but found "
1974	"input type '%s' ",
1975	kConst->getType()->getElementName().str().c_str())));
1976	auto constH = kConst->getPayload().getHandle<int64_t>();
1977	k = constH.at({`0`});
1978	} else {
1979	ASSIGN_VALUE_OR_RETURN_ERR(k, loadInt(dict["k"]));
1980	}
1981
1982	int lastDim = in.dims().size() - `1`;
1983	int axis = lastDim;
1984	if (dict.count("axis")) {
1985	ASSIGN_VALUE_OR_RETURN_ERR(axis,
1986	loadAxis<int>(dict["axis"], in.dims().size()));
1987	}
1988
1989	RETURN_ERR_IF_NOT(
1990	axis == lastDim,
1991	opErrMsg(
1992	op,
1993	strFormat(
1994	"TopK: Currently only support axis %d being last dimension %d ",
1995	axis, lastDim)));
1996
1997	TopKNode *R = G_->createTopK(opName, in, k, ElemKind::Int32ITy);
1998	RETURN_IF_ERR(addNodeAsOutput(op, R));
1999	return Error::success();
2000	}
2001
2002	if (typeName == "FillExamplesWithIndicator") {
2003	// Support FillExamplesWithIndicator
2004	NodeValue data;
2005	ASSIGN_VALUE_OR_RETURN_ERR(data, getNodeValueByName(op.input(`0`)));
2006	NodeValue indicator;
2007	ASSIGN_VALUE_OR_RETURN_ERR(indicator, getNodeValueByName(op.input(`1`)));
2008	// Validating input types and shapes
2009	RETURN_ERR_IF_NOT(
2010	indicator.getElementType() == ElemKind::Int32ITy \|\|
2011	indicator.getElementType() == ElemKind::Int64ITy,
2012	opErrMsg(op, "Indicator should be of int32 or int64 type."));
2013	RETURN_ERR_IF_NOT(indicator.dims().size() == `1`,
2014	opErrMsg(op, "Indicator should be 1D tensor."));
2015	dim_t dataReshapeDim = flattenCdr(data.dims()).second;
2016	ShapeVector outDims{indicator.dims()[`0`]};
2017	outDims.insert(outDims.end(), data.dims().begin() + `1`, data.dims().end());
2018	auto outTy2D = mod_.uniqueTypeWithNewShape(
2019	data.getType(), {indicator.dims()[`0`], dataReshapeDim});
2020
2021	auto data2D = G_->createReshape(opName + ".data2D", data,
2022	{data.dims()[`0`], dataReshapeDim});
2023	if (indicator.getElementType() == ElemKind::Int64ITy) {
2024	indicator = G_->createConvertTo(opName + ".int64ToInt32", indicator,
2025	ElemKind::Int32ITy);
2026	}
2027	// Select only takes boolean indicators, and converting from int to bool
2028	// must go from int -> float -> bool. Due to fp16 clipping, since only
2029	// int32 -> fp16 conversions are available, there is an initial conversion
2030	// from int64 to int32 if necessary.
2031	auto indicatorFloat = G_->createConvertTo(opName + ".intToFloat", indicator,
2032	ElemKind::FloatTy);
2033	auto indicatorBool = G_->createConvertTo(opName + ".floatToBool",
2034	indicatorFloat, ElemKind::BoolTy);
2035	auto nzIndices = G_->createNonZero(opName + ".nonzero", indicatorBool);
2036
2037	auto nzIndicesFixed = fixNonZero(G_, mod_, opName, nzIndices);
2038	auto nonZeroCount = data.dims()[`0`];
2039	RETURN_ERR_IF_NOT(nonZeroCount <= nzIndicesFixed->getNthResult(`0`).dims()[`0`],
2040	opErrMsg(op,
2041	"The number of "
2042	"non-zero elements in the indicator must be at "
2043	"least that of the first dimension of data"));
2044
2045	auto indices = G_->createSlice(opName + ".indices", nzIndicesFixed, {`0`, `0`},
2046	{data.dims()[`0`], `1`});
2047
2048	auto zeros = G_->createSplat(opName + ".zeros", outTy2D, `0`);
2049
2050	auto res2D = G_->createScatterData(opName + ".scatterData", zeros, indices,
2051	data2D, true);
2052	auto node = G_->createReshape(opName + ".result", res2D, outDims);
2053	RETURN_IF_ERR(addNodeAsOutput(op, node));
2054	return Error::success();
2055	}
2056
2057	if (typeName == "BatchSparseToDense") {
2058	// Support BatchSparseToDense for output second dim = 1 only
2059	NodeValue lengths;
2060	ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(`0`)));
2061	NodeValue indices;
2062	ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(`1`)));
2063	NodeValue values;
2064	ASSIGN_VALUE_OR_RETURN_ERR(values, getNodeValueByName(op.input(`2`)));
2065
2066	dim_t denseLastDim = `1`;
2067	if (dict.count("dense_last_dim")) {
2068	ASSIGN_VALUE_OR_RETURN_ERR(denseLastDim,
2069	loadInt(dict.at("dense_last_dim")));
2070	}
2071
2072	RETURN_ERR_IF_NOT(
2073	denseLastDim == `1`,
2074	opErrMsg(op, "Only output second dimension = 1 supported"));
2075	// Validating input types and shapes
2076	RETURN_ERR_IF_NOT(
2077	lengths.getElementType() == ElemKind::Int32ITy \|\|
2078	lengths.getElementType() == ElemKind::Int64ITy,
2079	opErrMsg(op, "Lengths should be of int32 or int64 type."));
2080	RETURN_ERR_IF_NOT(lengths.dims().size() == `1`,
2081	opErrMsg(op, "Lengths should be 1D tensor."));
2082	RETURN_ERR_IF_NOT(
2083	indices.getElementType() == ElemKind::Int32ITy \|\|
2084	indices.getElementType() == ElemKind::Int64ITy,
2085	opErrMsg(op, "Indices should be of int32 or int64 type."));
2086	RETURN_ERR_IF_NOT(indices.dims().size() == `1`,
2087	opErrMsg(op, "Indices should be 1D tensor."));
2088	RETURN_ERR_IF_NOT(values.getElementType() == ElemKind::FloatTy,
2089	opErrMsg(op, "Values should be of float type."));
2090	RETURN_ERR_IF_NOT(
2091	indices.dims()[`0`] == values.dims()[`0`],
2092	opErrMsg(op, "There should be the same number of values as indices."));
2093
2094	float defaultValue = `0.0`;
2095	if (dict.count("default_value")) {
2096	ASSIGN_VALUE_OR_RETURN_ERR(defaultValue,
2097	loadFloat(dict.at("default_value")));
2098	}
2099	// Select only takes boolean indicators, and converting from int to bool
2100	// must go from int -> float -> bool. Due to fp16 clipping, since only
2101	// int32 -> fp16 conversions are available, there is an initial conversion
2102	// from int64 to int32 if necessary.
2103	if (lengths.getElementType() == ElemKind::Int64ITy) {
2104	lengths = G_->createConvertTo(opName + ".int64ToInt32", lengths,
2105	ElemKind::Int32ITy);
2106	}
2107	auto lengthsIntToFloat =
2108	G_->createConvertTo(opName + ".intToFloat", lengths, ElemKind::FloatTy);
2109	auto lengthsFloatToBool = G_->createConvertTo(
2110	opName + ".floatToBool", lengthsIntToFloat, ElemKind::BoolTy);
2111	auto nonZeroIndices =
2112	G_->createNonZero(opName + ".nonzero", lengthsFloatToBool);
2113	auto nonZeroIndicesFixed = fixNonZero(G_, mod_, opName, nonZeroIndices);
2114	auto numIndices = indices.dims()[`0`];
2115	auto indicesSliced = G_->createSlice(
2116	opName + ".indicesSlice", nonZeroIndicesFixed, {`0`, `0`}, {numIndices, `1`});
2117
2118	ShapeVector outDims{lengths.dims()[`0`], `1`};
2119	auto dataTy = mod_.uniqueTypeWithNewShape(values.getType(), outDims);
2120	auto data = G_->createSplat(opName + ".data", dataTy, defaultValue);
2121	auto values2D =
2122	G_->createReshape(opName + ".reshape", values, {numIndices, `1`});
2123	auto scatterData = G_->createScatterData(opName + ".scatterData", data,
2124	indicesSliced, values2D, true);
2125
2126	RETURN_IF_ERR(addNodeAsOutput(op, scatterData));
2127	return Error::success();
2128	}
2129
2130	if (typeName == "SparseLabelSplit") {
2131	NodeValue lengths;
2132	ASSIGN_VALUE_OR_RETURN_ERR(lengths, getNodeValueByName(op.input(`0`)));
2133	NodeValue indices;
2134	ASSIGN_VALUE_OR_RETURN_ERR(indices, getNodeValueByName(op.input(`1`)));
2135	NodeValue values;
2136	ASSIGN_VALUE_OR_RETURN_ERR(values, getNodeValueByName(op.input(`2`)));
2137
2138	dim_t numLabels = `0`;
2139	RETURN_ERR_IF_NOT(dict.count("num_labels"),
2140	opErrMsg(op, "num_labels was not provided."));
2141	ASSIGN_VALUE_OR_RETURN_ERR(numLabels, loadInt(dict.at("num_labels")));
2142
2143	bool keepGradientOffsetMap = false;
2144	if (dict.count("keep_gradient_offset_map")) {
2145	ASSIGN_VALUE_OR_RETURN_ERR(keepGradientOffsetMap,
2146	loadInt(dict.at("keep_gradient_offset_map")));
2147	}
2148
2149	// Validating input types and shapes
2150	RETURN_ERR_IF_NOT(lengths.getElementType() == ElemKind::Int32ITy,
2151	opErrMsg(op, "Lengths should be of int32 type."));
2152	RETURN_ERR_IF_NOT(lengths.dims().size() == `1` \|\| lengths.dims().size() == `2`,
2153	opErrMsg(op, "Lengths should be 1D or 2D tensor."));
2154	RETURN_ERR_IF_NOT(indices.getElementType() == ElemKind::Int64ITy,
2155	opErrMsg(op, "Indices should be of int64 type."));
2156	RETURN_ERR_IF_NOT(indices.dims().size() == `1` \|\| indices.dims().size() == `2`,
2157	opErrMsg(op, "Indices should be 1D or 2D tensor."));
2158	RETURN_ERR_IF_NOT(values.getElementType() == ElemKind::FloatTy,
2159	opErrMsg(op, "Values should be of float type."));
2160	RETURN_ERR_IF_NOT(values.dims().size() == `1` \|\| values.dims().size() == `2`,
2161	opErrMsg(op, "Values should be 1D or 2D tensor."));
2162	RETURN_ERR_IF_NOT(
2163	indices.dims() == values.dims(),
2164	opErrMsg(op, "Indices and values should have the same shape."));
2165
2166	// Optional conversion from 2D to 1D inputs
2167	if (lengths.dims().size() == `2`) {
2168	RETURN_ERR_IF_NOT(
2169	lengths.dims()[`1`] == `1`,
2170	opErrMsg(op, "Second dimension should be 1 in lengths."));
2171	lengths = G_->createReshape(opName + ".lengths1D", lengths,
2172	{lengths.dims()[`0`]});
2173	}
2174	if (indices.dims().size() == `2`) {
2175	RETURN_ERR_IF_NOT(
2176	indices.dims()[`1`] == `1`,
2177	opErrMsg(op, "Second dimension should be 1 in indices."));
2178	indices = G_->createReshape(opName + ".indices1D", indices,
2179	{indices.dims()[`0`]});
2180	}
2181	if (values.dims().size() == `2`) {
2182	RETURN_ERR_IF_NOT(
2183	values.dims()[`1`] == `1`,
2184	opErrMsg(op, "Second dimension should be 1 in values."));
2185	values =
2186	G_->createReshape(opName + ".values1D", values, {values.dims()[`0`]});
2187	}
2188
2189	SparseLabelSplitNode *node =
2190	G_->createSparseLabelSplit(opName, lengths, indices, values, numLabels);
2191
2192	std::vector<SliceNode *> labelValueSlices;
2193	G_->createSplit(opName + ".splitLabelValues",
2194	node->getNthResult(SparseLabelSplitNode::LabelValuesIdx),
2195	numLabels, `0`, {}, labelValueSlices);
2196
2197	std::vector<SliceNode *> exampleIdSlices;
2198	G_->createSplit(opName + ".splitExampleIds",
2199	node->getNthResult(SparseLabelSplitNode::ExampleIdsIdx),
2200	numLabels, `0`, {}, exampleIdSlices);
2201
2202	const auto numItems = indices.dims()[`0`] / numLabels;
2203
2204	std::vector<Node *> labelValues;
2205	for (auto slice : labelValueSlices) {
2206	labelValues.push_back(
2207	G_->createReshape(opName + ".reshapeLabelValue", slice, {numItems}));
2208	}
2209
2210	std::vector<Node *> exampleIds;
2211	for (auto slice : exampleIdSlices) {
2212	exampleIds.push_back(
2213	G_->createReshape(opName + ".reshapeExamplId", slice, {numItems}));
2214	}
2215
2216	for (dim_t i = `0`; i < numLabels; ++i) {
2217	nodeValueByName_[op.output(i)] = labelValues [i];
2218	}
2219	for (dim_t i = `0`; i < numLabels; ++i) {
2220	nodeValueByName_[op.output(numLabels + i)] = exampleIds [i];
2221	}
2222	if (keepGradientOffsetMap) {
2223	nodeValueByName_[op.output(`2` * numLabels)] =
2224	node->getNthResult(SparseLabelSplitNode::GradientOffsetMapIdx);
2225	}
2226	return Error::success();
2227	}
2228
2229	if (typeName == "Log1p") {
2230	NodeValue in;
2231	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
2232
2233	Node *ones = G_->createSplat(opName + ".ones", in.getType(), `1.0f`);
2234	Node *add = G_->createAdd(opName + ".add", in, ones);
2235	Node *node = G_->createLog(opName + ".log", add);
2236
2237	RETURN_IF_ERR(addNodeAsOutput(op, node));
2238	return Error::success();
2239	}
2240
2241	if (typeName == "ReduceBackMean") {
2242	const unsigned numInputs = op.input_size();
2243	RETURN_ERR_IF_NOT(numInputs == `1`,
2244	opErrMsg(op, "Only single input is supported."));
2245
2246	NodeValue in;
2247	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
2248	RETURN_ERR_IF_NOT(in.dims().size() >= `2`,
2249	opErrMsg(op, "Input should be at least 2D."));
2250
2251	int numReduceDim = `1`;
2252	if (dict.count("num_reduce_dim")) {
2253	ASSIGN_VALUE_OR_RETURN_ERR(numReduceDim, loadInt(dict["num_reduce_dim"]));
2254	}
2255	// TODO: check maybe we can support more dimensions to be reduced
2256	RETURN_ERR_IF_NOT(numReduceDim == `1`,
2257	opErrMsg(op, "Supporting reducing only one dimension."));
2258
2259	Node *node = G_->createBatchedReduceMean(opName, in, in.dims().size() - `1`);
2260	RETURN_IF_ERR(addNodeAsOutput(op, node));
2261	return Error::success();
2262	}
2263
2264	return MAKE_ERR(unexpectedNodeErrorMessage(op, "Unsupported operator."));
2265	}
2266
2267	template <class TensorProtoType>
2268	Error Caffe2ModelLoader::loadInputsWithTensorProtoType(
2269	const caffe2::NetDef &net,
2270	const std::unordered_set<std::string> &initializers,
2271	const TensorProtoType &in) {
2272	// Skip static weights
2273	if (getConstantByNameOrNull(in.name())) {
2274	return Error::success();
2275	}
2276
2277	if (getStaticPlaceholderByNameOrNull(in.name())) {
2278	return Error::success();
2279	}
2280
2281	LoadWeightResult loadRes;
2282	if (auto resOrErr = createAndSetTensorType(in)) {
2283	loadRes = std::move(*resOrErr);
2284	} else {
2285	RETURN_ERR(resOrErr.takeError());
2286	}
2287
2288	bool multiQParamsLoaded = loadRes.scales \|\| loadRes.offsets;
2289	RETURN_ERR_IF_NOT(
2290	(!multiQParamsLoaded \|\| (loadRes.scales && loadRes.offsets)),
2291	"For tensors with separate qparams, both scales and offsets must be "
2292	"loaded");
2293
2294	bool isInput = !initializers.count(in.name());
2295	if (isInput) {
2296	RETURN_ERR_IF_NOT(!clipQuantRangeToFP16_ \|\|
2297	!loadRes.t ->getType().isQuantizedType() \|\|
2298	loadRes.t ->getType().isFusedQuantizedType(),
2299	"Do not support clipQuantRangeToFP16 with unfused "
2300	"quantized input Placeholders: " +
2301	in.name());
2302	Placeholder *placeholder;
2303	ASSIGN_VALUE_OR_RETURN_ERR(
2304	placeholder,
2305	createAndRegisterPlaceholder(in.name(), &loadRes.t ->getType()));
2306
2307	inputVarsByName_.try_emplace(in.name(), placeholder);
2308
2309	if (multiQParamsLoaded) {
2310	auto offsetsName = strFormat("%s_loaded_offsets", in.name().c_str());
2311	auto scalesName = strFormat("%s_loaded_scales", in.name().c_str());
2312	Placeholder *offsetsPlaceholder;
2313	Placeholder *scalesPlaceholder;
2314
2315	ASSIGN_VALUE_OR_RETURN_ERR(offsetsPlaceholder,
2316	createAndRegisterPlaceholder(
2317	offsetsName, &loadRes.offsets ->getType()));
2318	inputVarsByName_.try_emplace(offsetsName, offsetsPlaceholder);
2319
2320	ASSIGN_VALUE_OR_RETURN_ERR(
2321	scalesPlaceholder,
2322	createAndRegisterPlaceholder(scalesName, &loadRes.scales ->getType()));
2323	inputVarsByName_.try_emplace(scalesName, scalesPlaceholder);
2324	}
2325	} else {
2326	RETURN_IF_ERR(createAndRegisterConstant(in.name(), std::move(*loadRes.t)));
2327
2328	if (multiQParamsLoaded) {
2329	auto offsetsName = strFormat("%s_loaded_offsets", in.name().c_str());
2330	auto scalesName = strFormat("%s_loaded_scales", in.name().c_str());
2331	RETURN_IF_ERR(
2332	createAndRegisterConstant(offsetsName, std::move(*loadRes.offsets)));
2333	RETURN_IF_ERR(
2334	createAndRegisterConstant(scalesName, std::move(*loadRes.scales)));
2335	}
2336	}
2337	return Error::success();
2338	}
2339
2340	Error Caffe2ModelLoader::loadInputs(
2341	const caffe2::NetDef &net,
2342	const std::unordered_set<std::string> &initializers) {
2343	const caffe2::Argument arg = nullptr, qarg = nullptr;
2344	for (auto i = `0`, e = net.arg_size(); i < e && (!arg \|\| !qarg); ++i) {
2345	if (net.arg(i).name() == "input_shape_info") {
2346	arg = &net.arg(i);
2347	} else if (net.arg(i).name() == "input_qshape_info") {
2348	qarg = &net.arg(i);
2349	}
2350	}
2351
2352	// Load all regular tensor input
2353	if (arg) {
2354	for (const auto &in : arg->tensors()) {
2355	RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::TensorProto>(
2356	net, initializers, in));
2357	}
2358	}
2359
2360	// Load all quantized tensor input
2361	if (qarg) {
2362	for (const auto &in : qarg->qtensors()) {
2363	RETURN_IF_ERR(loadInputsWithTensorProtoType<caffe2::QTensorProto>(
2364	net, initializers, in));
2365	}
2366	}
2367
2368	return Error::success();
2369	}
2370
2371	Error Caffe2ModelLoader::loadNetwork(caffe2::NetDef &net) {
2372	// Make a claim on the unique name of all output Placeholders.
2373	for (int i = `0`; i < net.external_output_size(); i++) {
2374	auto &outputName = net.external_output(i);
2375	mod_.registerStorageName(legalizeName(outputName));
2376	}
2377
2378	/// Load the network operators:
2379	for (int i = `0`; i < net.op_size(); i++) {
2380	auto &op = net.op(i);
2381
2382	// Set up current partition to load into if relevant.
2383	if (partNameToFun_.size()) {
2384	auto &pName = op.device_option().node_name();
2385	auto it = partNameToFun_.find(pName);
2386	RETURN_ERR_IF_NOT(
2387	it != partNameToFun_.end(),
2388	strFormat("Did not find partition with name %s", pName.c_str()));
2389	G_ = it ->second;
2390	}
2391	RETURN_ERR_IF_NOT(G_, "Internal Glow error; Graph was not valid.");
2392
2393	if (constFoldInLoader_) {
2394	auto tryFold = foldOperator(op);
2395	if (!tryFold) {
2396	// Error during constant folding; load the op normally below.
2397	const std::string errStr = ERR_TO_STRING(tryFold.takeError());
2398	VLOG(`1`) << "Error while trying to ConstantFold " << loadOperatorName(op)
2399	<< ": " << errStr;
2400	} else if (tryFold.get()) {
2401	// Folded successfully, so skip loading the op below.
2402	continue;
2403	}
2404	}
2405	RETURN_IF_ERR(loadOperator(op));
2406	}
2407
2408	RETURN_ERR_IF_NOT(net.external_output_size(),
2409	"Network needs external outputs defined.");
2410
2411	for (int i = `0`; i < net.external_output_size(); i++) {
2412	auto &outputName = net.external_output(i);
2413	NodeValue r;
2414	// We want to create the save node in the same Function as the original
2415	// NodeValue. Thus here we ignore the source function when getting the NV,
2416	// which avoids copying the NV to whatever G_ currently is via an
2417	// intermediate Placeholder.
2418	ASSIGN_VALUE_OR_RETURN_ERR(
2419	r, getNodeValueByName(outputName, / ignoreSrcFun / true));
2420
2421	PlaceholderList &PHList = mod_.getPlaceholders();
2422	// Create a Placeholder with the previously claimed name.
2423	auto *PH =
2424	new Placeholder (legalizeName(outputName), mod_.uniqueType(*r.getType()),
2425	false, ANY_LAYOUT);
2426	PHList.push_back(PH);
2427	// If r is storage then just use the current last Function to save, since
2428	// we're just saving directly from a Storage node anyway.
2429	Function *F = llvm::isa<Storage>(r) ? G_ : r.getNode()->getParent();
2430	assert(F && "F must be valid here.");
2431	auto *SN = F->createSave(outputName, r, PH);
2432	outputVarsByName_[outputName] = SN->getPlaceholder();
2433	}
2434	return Error::success();
2435	}
2436
2437	/// Fills \p T with data from \p values.
2438	template <typename ElemTy, typename RangeTy>
2439	static Error fillTensor(Tensor &T, ElemKind kind, llvm::ArrayRef<dim_t> dim,
2440	RangeTy values) {
2441	T.reset(kind, dim);
2442	auto TH = T.getHandle<ElemTy>();
2443	RETURN_ERR_IF_NOT((size_t)values.size() == T.size(),
2444	llvm::formatv("Wrong number of values for GivenTensorFill "
2445	"({0} given, {1} expected)",
2446	values.size(), T.size())
2447	.str());
2448	size_t i = `0`;
2449	for (auto num : values) {
2450	TH.raw(i++) = num;
2451	}
2452	return Error::success();
2453	}
2454
2455	Error Caffe2ModelLoader::loadWeight(const caffe2::OperatorDef &op) {
2456	ArgumentDictionaryTy dict = loadArgumentMap(op);
2457	const std::string &typeName = op.type();
2458	const std::string &opName = loadOperatorName(op);
2459	// Load tensors with values:
2460	if (typeName == "GivenTensorFill" \|\| typeName == "GivenTensorFp16Fill" \|\|
2461	typeName == "GivenTensorIntFill" \|\| typeName == "GivenTensorInt64Fill") {
2462	/*
2463	* op {
2464	* output: "conv1_w"
2465	* name: ""
2466	* type: "GivenTensorFill"
2467	* arg {
2468	* name: "shape"
2469	* ints: 96
2470	* ints: 3
2471	* ints: 11
2472	* ints: 11
2473	* }
2474	* arg {
2475	* name: "values"
2476	* floats: -0.028315347
2477	* ...
2478	* }
2479	* }
2480	*/
2481
2482	// Note: Explicitly allow for an empty dim here, representing a scalar value
2483	// will be loaded below.
2484	std::vector<dim_t> dim;
2485	ASSIGN_VALUE_OR_RETURN_ERR(
2486	dim, getShape<dim_t>(dict["shape"], / allowEmptyShape / true));
2487	auto const &values = dict ["values"];
2488	RETURN_ERR_IF_NOT(
2489	op.output_size() == `1`,
2490	opErrMsg(
2491	op, strFormat(
2492	"GivenTensorFill must have exactly 1 output, but found %d ",
2493	op.output_size())));
2494	Tensor T;
2495	if (typeName == "GivenTensorFill") {
2496	RETURN_IF_ERR(
2497	fillTensor<float>(T, ElemKind::FloatTy, dim, values->floats()));
2498	} else if (typeName == "GivenTensorFp16Fill") {
2499	RETURN_IF_ERR(
2500	fillTensor<float16_t>(T, ElemKind::Float16Ty, dim, values->floats()));
2501	} else if (typeName == "GivenTensorIntFill") {
2502	RETURN_IF_ERR(
2503	fillTensor<int32_t>(T, ElemKind::Int32ITy, dim, values->ints()));
2504	} else if (typeName == "GivenTensorInt64Fill") {
2505	RETURN_IF_ERR(
2506	fillTensor<int64_t>(T, ElemKind::Int64ITy, dim, values->ints()));
2507	} else {
2508	return MAKE_ERR(
2509	strFormat("Unhandled tensor fill type: %s", typeName.c_str()));
2510	}
2511	RETURN_IF_ERR(createAndRegisterConstant(op.output().Get(`0`), std::move(T)));
2512	return Error::success();
2513	}
2514
2515	if (typeName == "GivenTensorByteStringToUInt8Fill") {
2516	/*
2517	output: "data"
2518	type: "GivenTensorByteStringToUInt8Fill"
2519	arg {
2520	name: "shape"
2521	ints: 3
2522	ints: 10
2523	}
2524	arg {
2525	name: "values"
2526	s:
2527	"\000\377\152\232\115\072\000\000\200\077\000\377\050\132\215\073\063\063\023\100\000\377\314\063\232\073\000\000\220\100"
2528	}
2529	*/
2530
2531	for (auto &o : op.output()) {
2532	Tensor T;
2533	if (getConstantByNameOrNull(o)) {
2534	continue;
2535	}
2536	std::vector<dim_t> dim;
2537	ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape"]));
2538	T.reset(ElemKind::UInt8QTy, dim, `0.0`, `0`);
2539	auto TH = T.getHandle<uint8_t>();
2540	RETURN_ERR_IF_NOT(
2541	dict["values"]->strings().size() == `1`,
2542	"Expect single string input for GivenTensorByteStringToUInt8Fill");
2543	const std::string &str = dict ["values"]->strings().Get(`0`);
2544
2545	size_t pos;
2546	for (pos = `0`; pos < str.size(); pos++) {
2547	TH.raw(pos) = (uint8_t)str [pos];
2548	}
2549
2550	RETURN_ERR_IF_NOT(
2551	pos == T.size(),
2552	strFormat("The number of serialized values (%li) does not "
2553	"match the size of the tensor (%li).",
2554	pos, (size_t)T.size()));
2555	RETURN_IF_ERR(createAndRegisterConstant(o, std::move(T)));
2556	}
2557	return Error::success();
2558	}
2559
2560	// Load quantized tensors:
2561	if (typeName == "Int8GivenTensorFill" \|\|
2562	typeName == "Int8GivenIntTensorFill") {
2563	/*
2564	output: "conv1_w"
2565	name: ""
2566	type: "Int8GivenTensorFill"
2567	arg {
2568	name: "shape"
2569	ints: 96
2570	ints: 3
2571	ints: 11
2572	ints: 11
2573	}
2574	arg {
2575	name: "values"
2576	s: "\x7f\x80\x80\x7"
2577	}
2578	arg {
2579	name: "Y_scale"
2580	f: 0.00044428
2581	}
2582	arg {
2583	name: "Y_zero_point"
2584	i: 127
2585	}
2586	*/
2587	for (auto &o : op.output()) {
2588	Tensor T;
2589	if (getConstantByNameOrNull(o)) {
2590	continue;
2591	}
2592
2593	std::vector<dim_t> dim;
2594	ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape"]));
2595
2596	RETURN_ERR_IF_NOT(dict.count("Y_zero_point"),
2597	("missing zero point for quantized output type"));
2598	RETURN_ERR_IF_NOT(dict.count("Y_scale"),
2599	("missing Y_scale for quantized output type"));
2600
2601	float scale;
2602	ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["Y_scale"]));
2603	(void)scale;
2604	int32_t offset;
2605	ASSIGN_VALUE_OR_RETURN_ERR(offset, loadInt(dict["Y_zero_point"]));
2606	(void)offset;
2607	size_t i = `0`;
2608	if (typeName == "Int8GivenTensorFill") {
2609	// Although in Caffe2 quantized model, the weights is int8 quantized,
2610	// the weights is stored in uint8_t format due to that Caffe2 requires
2611	// the type of input and weights must be the same. Therefore, we need
2612	// to convert it to int8 by subtracting 128.
2613	TypeRef ty;
2614	ASSIGN_VALUE_OR_RETURN_ERR(
2615	ty, loadQuantTy(o, ElemKind::Int8QTy, dim, dict,
2616	/ skipClipQuantRangeToFP16 / true));
2617	T.reset(*ty);
2618	auto TH = T.getHandle<int8_t>();
2619	std::string str = dict ["values"]->s();
2620	for (; i < str.size(); i++) {
2621	TH.raw(i) = ((uint8_t)(str.c_str()[i]) - UINT8_TO_INT8_SHIFT);
2622	}
2623	} else {
2624	TypeRef ty;
2625	ASSIGN_VALUE_OR_RETURN_ERR(
2626	ty, loadQuantTy(o, ElemKind::Int32QTy, dim, dict,
2627	/ skipClipQuantRangeToFP16 / true));
2628	T.reset(*ty);
2629	auto TH = T.getHandle<int32_t>();
2630	for (auto num : dict ["values"]->ints()) {
2631	TH.raw(i++) = num;
2632	}
2633	}
2634
2635	// If we're clipping quantized ranges tp FP16, then we need to rescale the
2636	// Tensor and update its type.
2637	if (clipQuantRangeToFP16_) {
2638	const ElemKind k = T.getType().getElementType();
2639	const auto qMinMax = getQuantizedValueRange(T.getType().getScale(),
2640	T.getType().getOffset(), k);
2641	const float newMin = std::max(qMinMax.first, kMinFP16);
2642	const float newMax = std::min(qMinMax.second, kMaxFP16);
2643	if (newMin != qMinMax.first \|\| newMax != qMinMax.second) {
2644	auto rescaledT = glow::make_unique<Tensor>();
2645	dispatchQuantizedImpl(rescaleQTensor, k, T, *rescaledT, newMin,
2646	newMax);
2647	T = std::move(*rescaledT);
2648	}
2649	}
2650
2651	RETURN_ERR_IF_NOT(
2652	i == T.size(),
2653	strFormat("The number of serialized values (%li) does not "
2654	"match the size of the tensor (%li).",
2655	i, (size_t)T.size()));
2656
2657	RETURN_IF_ERR(createAndRegisterConstant(o, std::move(T)));
2658	}
2659
2660	return Error::success();
2661	}
2662
2663	// Load tensors with constant fill:
2664	if (typeName == "ConstantFill") {
2665	/*
2666	output: "data"
2667	name: ""
2668	type: "ConstantFill"
2669	arg {
2670	name: "shape"
2671	ints: 1
2672	}
2673	*/
2674
2675	const auto &name = op.output(`0`);
2676	// If the tensor is pre-populated by the user of this class then we don't
2677	// need to allocate a new tensor.
2678	if (getConstantByNameOrNull(name)) {
2679	return Error::success();
2680	}
2681
2682	// The shape is set either the shape argument, or from another input
2683	// tensor. Shape takes priority over input.
2684	std::vector<dim_t> dims;
2685	if (dict.count("shape")) {
2686	ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["shape"]));
2687	} else {
2688	RETURN_ERR_IF_NOT(op.input_size() > `0`,
2689	"If no shape provided, must have input shape.");
2690
2691	bool inputAsShape = false;
2692	if (dict.count("input_as_shape")) {
2693	ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape,
2694	loadInt(dict["input_as_shape"]));
2695	}
2696
2697	if (inputAsShape) {
2698	// It must be registered as a Constant because it must be statically set
2699	// already, as shapes must be statically known.
2700	Constant *in;
2701	ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(`0`)));
2702	RETURN_ERR_IF_NOT(in->dims().size() == `1`,
2703	opErrMsg(op, "Input must be 1D tensor."));
2704	RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy,
2705	opErrMsg(op, "Input must be of int64 type."));
2706	const auto handle = in->getHandle<int64_t>();
2707	dims.reserve(in->dims().size());
2708	for (auto dim : handle) {
2709	dims.push_back(dim);
2710	}
2711	} else {
2712	NodeValue in;
2713	ASSIGN_VALUE_OR_RETURN_ERR(in, getNodeValueByName(op.input(`0`)));
2714	dims = in.dims();
2715	}
2716	}
2717
2718	int to = caffe2::TensorProto_DataType_FLOAT;
2719	if (dict.count("dtype")) {
2720	ASSIGN_VALUE_OR_RETURN_ERR(to, loadInt(dict["dtype"]));
2721	}
2722
2723	SplatNode splatNode{nullptr*};
2724
2725	switch (to) {
2726	case caffe2::TensorProto_DataType_FLOAT: {
2727	float f = `0.0f`;
2728	if ((dict.count("value") && dict ["value"]->has_f())) {
2729	ASSIGN_VALUE_OR_RETURN_ERR(f, loadFloat(dict["value"]));
2730	}
2731	splatNode =
2732	G_->createSplat(opName, mod_.uniqueType(ElemKind::FloatTy, dims), f);
2733	break;
2734	}
2735	case caffe2::TensorProto_DataType_INT32: {
2736	int i = `0`;
2737	if ((dict.count("value") && dict ["value"]->has_i())) {
2738	ASSIGN_VALUE_OR_RETURN_ERR(i, loadInt(dict["value"]));
2739	}
2740	splatNode =
2741	G_->createSplat(opName, mod_.uniqueType(ElemKind::Int32ITy, dims), i);
2742	break;
2743	}
2744	case caffe2::TensorProto_DataType_INT64:
2745	case caffe2::TensorProto_DataType_BOOL: {
2746	int i = `0`;
2747	if ((dict.count("value") && dict ["value"]->has_i())) {
2748	ASSIGN_VALUE_OR_RETURN_ERR(i, loadInt(dict["value"]));
2749	}
2750	splatNode =
2751	G_->createSplat(opName, mod_.uniqueType(ElemKind::Int64ITy, dims), i);
2752	break;
2753	}
2754	default:
2755	return MAKE_ERR("Unsupported datatype for ConstantFill.");
2756	}
2757
2758	RETURN_IF_ERR(addNodeAsOutput(op, splatNode));
2759
2760	return Error::success();
2761	}
2762
2763	if (typeName == "UniformFill") {
2764	/*
2765	output: "fc/w"
2766	name: ""
2767	type: "UniformFill"
2768	arg {
2769	name: "max"
2770	f: 0.25
2771	}
2772	arg {
2773	name: "shape"
2774	ints: 1
2775	ints: 16
2776	}
2777	arg {
2778	name: "min"
2779	f: -0.25
2780	}
2781	*/
2782	const auto &name = op.output(`0`);
2783	Tensor T;
2784	std::vector<dim_t> dim;
2785	if (dict.count("shape")) {
2786	ASSIGN_VALUE_OR_RETURN_ERR(dim, getShape<dim_t>(dict["shape"]));
2787	} else {
2788	RETURN_ERR_IF_NOT(op.input_size() > `0`,
2789	"If no shape provided, must have input shape.");
2790
2791	bool inputAsShape = false;
2792	if (dict.count("input_as_shape")) {
2793	ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape,
2794	loadInt(dict["input_as_shape"]));
2795	}
2796
2797	if (inputAsShape) {
2798	Constant *in;
2799	ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(`0`)));
2800	RETURN_ERR_IF_NOT(in->dims().size() == `1`,
2801	opErrMsg(op, "Input must be 1D tensor."));
2802	RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy,
2803	opErrMsg(op, "Input must be of int64 type."));
2804	const auto handle = in->getHandle<int64_t>();
2805	dim.reserve(in->dims().size());
2806	for (auto d : handle) {
2807	dim.push_back(d);
2808	}
2809	} else {
2810	NodeValue input;
2811	ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(`0`)));
2812	dim = input.dims();
2813	}
2814	}
2815	T.reset(ElemKind::FloatTy, dim);
2816	auto TH = T.getHandle<>();
2817	float tensorMin;
2818	ASSIGN_VALUE_OR_RETURN_ERR(tensorMin, loadFloat(dict["min"]));
2819	float tensorMax;
2820	ASSIGN_VALUE_OR_RETURN_ERR(tensorMax, loadFloat(dict["max"]));
2821
2822	DLOG(INFO)
2823	<< "The model contains UniformFill operator, which generates random "
2824	"numbers. This could be source of discrepancy.";
2825
2826	// Uniformly generate random numbers in [tensorMin; tensorMax).
2827	for (auto &elem : TH) {
2828	elem = mod_.getPRNG().nextRandReal(tensorMin, tensorMax);
2829	}
2830
2831	RETURN_IF_ERR(createAndRegisterConstant(name, std::move(T)));
2832
2833	return Error::success();
2834	}
2835
2836	// Load tensors with constant fill:
2837	if (typeName == "GaussianFill") {
2838	/*
2839	output: "data"
2840	name: ""
2841	type: "GaussianFill"
2842	arg {
2843	name: "mean"
2844	f: 0.0
2845	}
2846	arg {
2847	name: "std"
2848	f: 1.0
2849	}
2850	arg {
2851	name: "shape"
2852	ints: 1
2853	ints: 16
2854	}
2855	*/
2856
2857	const auto &name = op.output(`0`);
2858	if (getConstantByNameOrNull(name)) {
2859	return Error::success();
2860	}
2861
2862	// The shape of the output is set by shape, if provided. Otherwise, it is
2863	// set by the shape of the input or the shape indicated by input if
2864	// input_as_shape is true
2865	NodeValue input;
2866	std::vector<dim_t> dims;
2867	if (dict.count("shape")) {
2868	ASSIGN_VALUE_OR_RETURN_ERR(dims, getShape<dim_t>(dict["shape"]));
2869	} else {
2870	RETURN_ERR_IF_NOT(op.input_size() > `0`,
2871	"If no shape provided, must have input shape.");
2872
2873	bool inputAsShape = false;
2874	if (dict.count("input_as_shape")) {
2875	ASSIGN_VALUE_OR_RETURN_ERR(inputAsShape,
2876	loadInt(dict["input_as_shape"]));
2877	}
2878
2879	if (inputAsShape) {
2880	Constant *in;
2881	ASSIGN_VALUE_OR_RETURN_ERR(in, getConstantByName(op.input(`0`)));
2882	RETURN_ERR_IF_NOT(in->dims().size() == `1`,
2883	opErrMsg(op, "Input must be 1D tensor."));
2884	RETURN_ERR_IF_NOT(in->getElementType() == ElemKind::Int64ITy,
2885	opErrMsg(op, "Input must be of int64 type."));
2886	const auto handle = in->getHandle<int64_t>();
2887	dims.reserve(in->dims().size());
2888	for (auto dim : handle) {
2889	dims.push_back(dim);
2890	}
2891	} else {
2892	ASSIGN_VALUE_OR_RETURN_ERR(input, getNodeValueByName(op.input(`0`)));
2893	dims = input.dims();
2894	}
2895
2896	if (dict.count("extra_shape")) {
2897	std::vector<dim_t> extra_shape;
2898	ASSIGN_VALUE_OR_RETURN_ERR(extra_shape,
2899	getShape<dim_t>(dict["extra_shape"]));
2900	dims.insert(dims.end(), extra_shape.begin(), extra_shape.end());
2901	}
2902	}
2903	if ((!input && !dims.empty()) \|\| input.dims().vec() != dims) {
2904	input =
2905	G_->createSplat("in", mod_.uniqueType(ElemKind::FloatTy, dims), `0.`);
2906	}
2907	float mean;
2908	ASSIGN_VALUE_OR_RETURN_ERR(mean, loadFloat(dict["mean"]));
2909	float scale;
2910	ASSIGN_VALUE_OR_RETURN_ERR(scale, loadFloat(dict["std"]));
2911
2912	auto GF = G_->createGaussianFill(opName, input, mean, scale,
2913	std::random_device {}());
2914	auto outputType =
2915	mod_.uniqueType(ElemKind::FloatTy, GF->getResult().dims());
2916	auto node = G_->createConvertTo(opName + ".ConvertOutput", GF, outputType);
2917	RETURN_IF_ERR(addNodeAsOutput(op, node));
2918
2919	return Error::success();
2920	}
2921
2922	return MAKE_ERR(unexpectedNodeErrorMessage(op, "Unsupported weight kind"));
2923	}
2924
2925	Error Caffe2ModelLoader::loadWeightsFromNet(caffe2::NetDef &net) {
2926	for (auto &op : net.op()) {
2927	RETURN_IF_ERR(loadWeight(op));
2928	}
2929	return Error::success();
2930	}
2931
2932	Caffe2ModelLoader::Caffe2ModelLoader(Function &F, Error *errPtr)
2933	: CommonOperatorLoader ({}, {}, &F, errPtr) {
2934	deleteUnusedConstants();
2935	}
2936
2937	Caffe2ModelLoader::Caffe2ModelLoader(
2938	const std::string &netDescFilename, const std::string &netWeightFilename,
2939	llvm::ArrayRef<const char *> names, llvm::ArrayRef<TypeRef> types,
2940	Function &F, Error errPtr, OriginNameToTQPMap originNameToTQPMap,
2941	bool loadUniquedDummyQParams, bool zeroScaleFP16Clip,
2942	bool clipQuantRangeToFP16)
2943	: CommonOperatorLoader (names, types, &F, errPtr,
2944	/ loadIntoExistingModule / false,
2945	originNameToTQPMap, loadUniquedDummyQParams,
2946	zeroScaleFP16Clip, clipQuantRangeToFP16) {
2947	// if errPtr already contains an error then don't continue with constructor
2948	if (errPtr && *errPtr) {
2949	return;
2950	}
2951
2952	// Lambda to setup the Caffe2ModelLoader and return any Errors that
2953	// were raised.
2954	auto setup = [&]() -> Error {
2955	// The caffe2 network descriptor that we are deserializing.
2956	caffe2::NetDef networkDef;
2957	ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProtoFile(netDescFilename));
2958
2959	// The caffe2 weights that we are deserializing.
2960	caffe2::NetDef weightsDef;
2961	ASSIGN_VALUE_OR_RETURN_ERR(weightsDef, loadProtoFile(netWeightFilename));
2962
2963	RETURN_IF_ERR(loadWeightsFromNet(weightsDef));
2964	RETURN_IF_ERR(loadNetwork(networkDef));
2965
2966	// This is to ensure that the same processing done with
2967	// the same network, even if order of operators is different.
2968	F.orderNodes();
2969	RETURN_ERR_IF_NOT(F.verify(), "Function verification failed.");
2970
2971	deleteUnusedConstants();
2972
2973	RETURN_IF_ERR(verifyDummyQParams());
2974
2975	return Error::success();
2976	};
2977
2978	if (errPtr) {
2979	*errPtr = setup ();
2980	} else {
2981	EXIT_ON_ERR(setup());
2982	}
2983	}
2984
2985	Error Caffe2ModelLoader::initWithModule(caffe2::NetDef &networkDef,
2986	llvm::StringRef funNamePrefix,
2987	runtime::PrePartitionedConfig *PPC) {
2988	// Look for any partitions that will be needed. If there is no
2989	// partition_info then we create a single Function to load into. Otherwise
2990	// we create multiple Functions and switch between them as we load each
2991	// operator.
2992	std::unordered_map<Function *, std::vector<runtime::DeviceIDTy>> funToIDs;
2993	std::unordered_map<Function *, BackendSpecificOptions> funToOpts;
2994	if (networkDef.partition_info_size() == `0`) {
2995	G_ = mod_.createFunction(funNamePrefix);
2996	} else {
2997	for (int i = `0`; i < networkDef.partition_info_size(); i++) {
2998	const std::string &pName = networkDef.partition_info(i).name();
2999	const std::string funName = funNamePrefix.str() + "_" + pName;
3000	Function *PF = mod_.createFunction(funName);
3001	partNameToFun_[pName] = PF;
3002	for (auto id : networkDef.partition_info(i).device_id()) {
3003	funToIDs [PF].push_back(id);
3004	}
3005
3006	// Now set up device options for this partition.
3007	auto &optsMap = funToOpts [PF];
3008	for (auto &backendOpts : networkDef.partition_info(i).backend_options()) {
3009	const std::string &backendName = backendOpts.backend_name();
3010	for (auto &keyVal : backendOpts.option()) {
3011	optsMap [backendName + "_" + keyVal.key()] = keyVal.val();
3012	}
3013	}
3014	}
3015	}
3016
3017	RETURN_IF_ERR(loadNetwork(networkDef));
3018
3019	// Now setup the pre-partitioned config if relevant.
3020	if (partNameToFun_.size()) {
3021	RETURN_ERR_IF_NOT(
3022	PPC, "Partitioned model but no config to store meta information in.");
3023	PPC->funcName = funNamePrefix.str();
3024
3025	PPC->funcs.reserve(partNameToFun_.size());
3026	PPC->logicalIDs.reserve(partNameToFun_.size());
3027	for (auto &SF : partNameToFun_) {
3028	Function *F = SF.getValue();
3029	// Remove unused Functions from the module and skip them.
3030	if (F->getNodes().size() == `0`) {
3031	mod_.eraseFunction(SF.getValue());
3032	continue;
3033	}
3034	// This is to ensure that the same processing done with
3035	// the same network, even if order of operators is different.
3036	F->orderNodes();
3037	PPC->funcs.push_back(F);
3038	PPC->logicalIDs.emplace_back(funToIDs [F]);
3039	PPC->backendSpecificOpts.emplace_back(funToOpts [F]);
3040	// Replication counts not currently loaded through C2, so default to 1.
3041	PPC->replicationCounts.emplace_back(`1`);
3042	// Backend hints not currently loaded through C2, so use default.
3043	PPC->backendHints.emplace_back();
3044	RETURN_ERR_IF_NOT(F->verify(), "Function verification failed.");
3045	}
3046	}
3047
3048	deleteUnusedConstants();
3049
3050	RETURN_IF_ERR(verifyDummyQParams());
3051
3052	return Error::success();
3053	}
3054
3055	Caffe2ModelLoader::Caffe2ModelLoader(const std::string &netDescFilename,
3056	const std::string &netWeightFilename,
3057	llvm::ArrayRef<const char *> names,
3058	llvm::ArrayRef<TypeRef> types, Module &mod,
3059	llvm::StringRef funNamePrefix,
3060	runtime::PrePartitionedConfig *PPC,
3061	Error *errPtr)
3062	: CommonOperatorLoader (names, types, mod, errPtr) {
3063	// if errPtr already contains an error then don't continue with constructor
3064	if (errPtr && *errPtr) {
3065	return;
3066	}
3067
3068	// Lambda to setup the Caffe2ModelLoader and return any Errors that
3069	// were raised.
3070	auto setup = [&]() -> Error {
3071	// The caffe2 network descriptor that we are deserializing.
3072	caffe2::NetDef networkDef;
3073	ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProtoFile(netDescFilename));
3074
3075	// The caffe2 weights that we are deserializing.
3076	caffe2::NetDef weightsDef;
3077	ASSIGN_VALUE_OR_RETURN_ERR(weightsDef, loadProtoFile(netWeightFilename));
3078
3079	RETURN_IF_ERR(loadWeightsFromNet(weightsDef));
3080
3081	return initWithModule(networkDef, funNamePrefix, PPC);
3082	};
3083
3084	if (errPtr) {
3085	*errPtr = setup ();
3086	} else {
3087	EXIT_ON_ERR(setup());
3088	}
3089	}
3090
3091	Caffe2ModelLoader::Caffe2ModelLoader(
3092	const std::string &modelStr, uint32_t weightsCount,
3093	const onnxTensorDescriptorV1 *weightDescriptors, Module &dummyMod,
3094	Error errPtr, OriginNameToTQPMap originNameToTQPMap,
3095	bool clipQuantRangeToFP16)
3096	: CommonOperatorLoader (
3097	{}, {}, dummyMod, errPtr,
3098	/ loadIntoExistingModule / false, originNameToTQPMap,
3099	/ loadUniquedDummyQParams / false, / replaceDummyTQPs / false,
3100	/ zeroScaleFP16Clip / false, clipQuantRangeToFP16) {
3101	if (errPtr && *errPtr) {
3102	return;
3103	}
3104
3105	constFoldInLoader_ = false;
3106
3107	// Lambda to setup the Caffe2ModelLoader and return any Errors that were
3108	// raised.
3109	auto setup = [&]() -> Error {
3110	caffe2::NetDef networkDef;
3111	RETURN_ERR_IF_NOT(
3112	google::protobuf::TextFormat::ParseFromString(modelStr, &networkDef),
3113	"Error loading model from string");
3114
3115	ArgumentDictionaryTy dict = loadArgumentMap(networkDef);
3116
3117	std::unordered_set<std::string> initializers;
3118	if (dict.count("initializers")) {
3119	const auto &strings = dict.at("initializers")->strings();
3120	for (const auto &s : strings) {
3121	initializers.insert(s);
3122	}
3123	}
3124
3125	RETURN_IF_ERR(loadWeights(weightsCount, weightDescriptors));
3126
3127	RETURN_IF_ERR(loadInputs(networkDef, initializers));
3128
3129	// Identify primary input sequence
3130	std::unordered_set<std::string> weights;
3131	for (uint32_t i = `0`; i < weightsCount; ++i) {
3132	weights.emplace(weightDescriptors[i].name);
3133	}
3134
3135	runtime::PrePartitionedConfig dummyPPC;
3136	return initWithModule(networkDef, "dummy", &dummyPPC);
3137	};
3138
3139	*errPtr = setup ();
3140	}
3141
3142	Caffe2ModelLoader::Caffe2ModelLoader(
3143	const void *model, uint32_t modelSize, uint32_t weightsCount,
3144	const onnxTensorDescriptorV1 *weightDescriptors, Module &mod,
3145	llvm::StringRef funNamePrefix, runtime::PrePartitionedConfig *PPC,
3146	Error errPtr, bool* constFoldInLoader,
3147	OriginNameToTQPMap originNameToTQPMap, bool* loadUniquedDummyQParams,
3148	bool zeroScaleFP16Clip, bool clipQuantRangeToFP16)
3149	: CommonOperatorLoader ({}, {}, mod, errPtr,
3150	/ loadIntoExistingModule / false,
3151	originNameToTQPMap, loadUniquedDummyQParams,
3152	/ replaceDummyTQPs / false, zeroScaleFP16Clip,
3153	clipQuantRangeToFP16) {
3154	// if errPtr already contains an error then don't continue with constructor
3155	if (errPtr && *errPtr) {
3156	return;
3157	}
3158
3159	// Always override the default for folding in this constructor.
3160	constFoldInLoader_ = constFoldInLoader;
3161
3162	// Lambda to setup the Caffe2ModelLoader and return any Errors that were
3163	// raised.
3164	auto setup = [&]() -> Error {
3165	caffe2::NetDef networkDef;
3166	ASSIGN_VALUE_OR_RETURN_ERR(networkDef, loadProto(model, modelSize));
3167
3168	ArgumentDictionaryTy dict = loadArgumentMap(networkDef);
3169
3170	std::unordered_set<std::string> initializers;
3171	if (dict.count("initializers")) {
3172	const auto &strings = dict.at("initializers")->strings();
3173	for (const auto &s : strings) {
3174	initializers.insert(s);
3175	}
3176	}
3177
3178	RETURN_IF_ERR(loadWeights(weightsCount, weightDescriptors));
3179
3180	RETURN_IF_ERR(loadInputs(networkDef, initializers));
3181
3182	// Identify primary input sequence
3183	std::unordered_set<std::string> weights;
3184	for (uint32_t i = `0`; i < weightsCount; ++i) {
3185	weights.emplace(weightDescriptors[i].name);
3186	}
3187	for (const auto &input : networkDef.external_input()) {
3188	if (!weights.count(input)) {
3189	positionalInputNames_.emplace_back(input);
3190	}
3191	}
3192	for (const auto &output : networkDef.external_output()) {
3193	positionalOutputNames_.emplace_back(output);
3194	}
3195
3196	return initWithModule(networkDef, funNamePrefix, PPC);
3197	};
3198
3199	if (errPtr) {
3200	*errPtr = setup ();
3201	} else {
3202	EXIT_ON_ERR(setup());
3203	}
3204	}
3205

Browse the source code of glow/lib/Importer/Caffe2ModelLoader.cpp