rcnn_op.cc source code [tvm/src/relay/op/vision/rcnn_op.cc]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file rcnn_op.cc
22	* \brief Faster RCNN and Mask RCNN operators
23	*/
24	#include <tvm/relay/attrs/vision.h>
25	#include <tvm/relay/op.h>
26	#include <tvm/relay/op_attr_types.h>
27
28	#include "../../transforms/infer_layout_utils.h"
29
30	namespace tvm {
31	namespace relay {
32
33	TVM_REGISTER_NODE_TYPE(ROIAlignAttrs);
34
35	bool ROIAlignRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
36	const TypeReporter& reporter) {
37	auto roi_align_attrs = attrs.as<ROIAlignAttrs>();
38	ICHECK_EQ(types.size(), `3`);
39	const auto* data = types [`0`].as<TensorTypeNode>();
40	const auto* rois = types [`1`].as<TensorTypeNode>();
41	ICHECK(data);
42	ICHECK(rois);
43	const auto& dshape = data->shape;
44	const auto& rshape = rois->shape;
45	ICHECK(roi_align_attrs);
46	ICHECK_EQ(dshape.size(), `4`) << "Input data should be 4-D.";
47	ICHECK_EQ(rshape.size(), `2`) << "Input rois should be 2-D.";
48	// assign output type
49	std::vector<IndexExpr> oshape;
50	if (roi_align_attrs->layout == "NCHW") {
51	oshape = {rshape [`0`], dshape [`1`], roi_align_attrs->pooled_size [`0`],
52	roi_align_attrs->pooled_size [`1`]};
53	} else {
54	ICHECK_EQ(roi_align_attrs->layout, "NHWC") << "Unexpected ROI Align layout";
55	oshape = {rshape [`0`], roi_align_attrs->pooled_size [`0`], roi_align_attrs->pooled_size [`1`],
56	dshape [`3`]};
57	}
58
59	reporter ->Assign(types [`2`], TensorType (oshape, data->dtype));
60	return true;
61	}
62
63	template <typename T>
64	InferCorrectLayoutOutput ROIAlignInferCorrectLayout(const Attrs& attrs,
65	const Array<Layout>& new_in_layouts,
66	const Array<Layout>& old_in_layouts,
67	const Array<tvm::relay::Type>& old_in_types) {
68	const T* params = attrs.as<T>();
69	Layout data_layout = params->layout;
70
71	// Layout inference needs to define the layout for all inputs and output data layouts.
72	// For roi_align, the second inputs is 2-D tensor with shape [num_roi, 5].
73	// So, we set the layout as "N5".
74	return InferCorrectLayoutOutput ({data_layout, Layout("N5")}, {data_layout}, attrs);
75	}
76
77	Expr MakeROIAlign(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale,
78	int sample_ratio, String layout, String mode) {
79	auto attrs = make_object<ROIAlignAttrs>();
80	attrs ->pooled_size = pooled_size;
81	attrs ->spatial_scale = spatial_scale;
82	attrs ->sample_ratio = sample_ratio;
83	attrs ->layout = layout;
84	attrs ->mode = mode;
85	static const Op& op = Op::Get("vision.roi_align");
86	return Call (op, {data, rois}, Attrs (attrs), {});
87	}
88
89	TVM_REGISTER_GLOBAL("relay.op.vision._make.roi_align").set_body_typed(MakeROIAlign);
90
91	RELAY_REGISTER_OP("vision.roi_align")
92	.describe(R"doc(ROI Align operator.
93
94	- data: This depends on the `layout` parameter. Input is 4D array of shape
95	(batch_size, channels, height, width) if `layout` is `NCHW`.
96	- rois: 2D array of shape (num_roi, 5). The last dimension should be in format of
97	[batch_index, w_start, h_start, w_end, h_end].
98	- out: This depends on the `layout` parameter. Output is 4D array of shape
99	(num_roi, channels, pooled_height, pooled_width) if `layout` is `NCHW`.
100	)doc" TVM_ADD_FILELINE)
101	.set_num_inputs(`2`)
102	.add_argument("data", "Tensor", "The input tensor.")
103	.add_argument("rois", "Tensor", "The input rois")
104	.set_support_level(`5`)
105	.add_type_rel("ROIAlign", ROIAlignRel)
106	.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
107	ROIAlignInferCorrectLayout<ROIAlignAttrs>);
108
109	TVM_REGISTER_NODE_TYPE(ROIPoolAttrs);
110
111	bool ROIPoolRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
112	const TypeReporter& reporter) {
113	auto roi_pool_attrs = attrs.as<ROIPoolAttrs>();
114	ICHECK_EQ(types.size(), `3`);
115	const auto* data = types [`0`].as<TensorTypeNode>();
116	const auto* rois = types [`1`].as<TensorTypeNode>();
117	const auto& dshape = data->shape;
118	const auto& rshape = rois->shape;
119	ICHECK(roi_pool_attrs);
120	ICHECK_EQ(dshape.size(), `4`) << "Input data should be 4-D.";
121	ICHECK_EQ(rshape.size(), `2`) << "Input rois should be 2-D.";
122	// assign output type
123	std::vector<IndexExpr> oshape;
124	if (roi_pool_attrs->layout == "NCHW") {
125	oshape = {rshape [`0`], dshape [`1`], roi_pool_attrs->pooled_size [`0`], roi_pool_attrs->pooled_size [`1`]};
126	} else if (roi_pool_attrs->layout == "NHWC") {
127	oshape = {rshape [`0`], roi_pool_attrs->pooled_size [`0`], roi_pool_attrs->pooled_size [`1`], dshape [`3`]};
128	} else {
129	LOG(FATAL) << "vision.roi_pool does not support " << roi_pool_attrs->layout << " layout";
130	}
131
132	reporter ->Assign(types [`2`], TensorType (oshape, data->dtype));
133	return true;
134	}
135
136	template <typename T>
137	InferCorrectLayoutOutput ROIPoolInferCorrectLayout(const Attrs& attrs,
138	const Array<Layout>& new_in_layouts,
139	const Array<Layout>& old_in_layouts,
140	const Array<tvm::relay::Type>& old_in_types) {
141	const T* params = attrs.as<T>();
142	Layout data_layout = params->layout;
143
144	// Layout inference needs to define the layout for all inputs and output data layouts.
145	// For roi_pool, the second inputs is 2-D tensor with shape [num_roi, 5].
146	// So, we set the layout as "N5".
147	return InferCorrectLayoutOutput ({data_layout, Layout("N5")}, {data_layout}, attrs);
148	}
149
150	Expr MakeROIPool(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale,
151	String layout) {
152	auto attrs = make_object<ROIPoolAttrs>();
153	attrs ->pooled_size = pooled_size;
154	attrs ->spatial_scale = spatial_scale;
155	attrs ->layout = layout;
156	static const Op& op = Op::Get("vision.roi_pool");
157	return Call (op, {data, rois}, Attrs (attrs), {});
158	}
159
160	TVM_REGISTER_GLOBAL("relay.op.vision._make.roi_pool").set_body_typed(MakeROIPool);
161
162	RELAY_REGISTER_OP("vision.roi_pool")
163	.describe(R"doc(ROI Pool operator.
164
165	- data: This depends on the `layout` parameter. Input is 4D array of shape
166	(batch_size, channels, height, width) if `layout` is `NCHW`.
167	- rois: 2D array of shape (num_roi, 5). The last dimension should be in format of
168	[batch_index, w_start, h_start, w_end, h_end].
169	- out: This depends on the `layout` parameter. Output is 4D array of shape
170	(num_roi, channels, pooled_height, pooled_width) if `layout` is `NCHW`.
171	)doc" TVM_ADD_FILELINE)
172	.set_num_inputs(`2`)
173	.add_argument("data", "Tensor", "The input tensor.")
174	.add_argument("rois", "Tensor", "The input rois")
175	.set_support_level(`5`)
176	.add_type_rel("ROIPool", ROIPoolRel)
177	.set_attr<FInferCorrectLayout>("FInferCorrectLayout", ROIPoolInferCorrectLayout<ROIPoolAttrs>);
178
179	TVM_REGISTER_NODE_TYPE(ProposalAttrs);
180
181	bool ProposalRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
182	const TypeReporter& reporter) {
183	auto proposal_attrs = attrs.as<ProposalAttrs>();
184	ICHECK_EQ(types.size(), `4`);
185	const auto* cls_prob = types [`0`].as<TensorTypeNode>();
186	const auto* bbox_pred = types [`1`].as<TensorTypeNode>();
187	const auto* im_info = types [`2`].as<TensorTypeNode>();
188
189	if (!cls_prob \|\| !bbox_pred \|\| !im_info) {
190	return false;
191	}
192
193	ICHECK_EQ(cls_prob->shape.size(), `4U`)
194	<< "The dimension of class probability should be 4, but received " << cls_prob->shape.size();
195	ICHECK_EQ(bbox_pred->shape.size(), `4U`)
196	<< "The dimension of box prediction should be 4, but received " << bbox_pred->shape.size();
197	ICHECK_EQ(im_info->shape.size(), `2U`)
198	<< "The dimension of image info should be 2, but received " << im_info->shape.size();
199	ICHECK(reporter ->AssertEQ(im_info->shape[`1`], `3`));
200
201	auto batch = cls_prob->shape [`0`];
202
203	std::vector<IndexExpr> oshape({batch * proposal_attrs->rpn_post_nms_top_n, `5`});
204	reporter ->Assign(types [`3`], TensorType (oshape, cls_prob->dtype));
205	return true;
206	}
207
208	Expr MakeProposal(Expr cls_prob, Expr bbox_pred, Expr im_info, Array<IndexExpr> scales,
209	Array<IndexExpr> ratios, int feature_stride, double threshold,
210	int rpn_pre_nms_top_n, int rpn_post_nms_top_n, int rpn_min_size, bool iou_loss) {
211	auto attrs = make_object<ProposalAttrs>();
212	attrs ->scales = scales;
213	attrs ->ratios = ratios;
214	attrs ->feature_stride = feature_stride;
215	attrs ->threshold = threshold;
216	attrs ->rpn_pre_nms_top_n = rpn_pre_nms_top_n;
217	attrs ->rpn_post_nms_top_n = rpn_post_nms_top_n;
218	attrs ->rpn_min_size = rpn_min_size;
219	attrs ->iou_loss = iou_loss;
220	static const Op& op = Op::Get("vision.proposal");
221	return Call (op, {cls_prob, bbox_pred, im_info}, Attrs (attrs), {});
222	}
223
224	TVM_REGISTER_GLOBAL("relay.op.vision._make.proposal").set_body_typed(MakeProposal);
225
226	RELAY_REGISTER_OP("vision.proposal")
227	.describe(R"code(Generate region proposals via RPN.
228
229	- cls_prob: 4-D with shape [batch, 2 * num_anchors, height, width].
230	- bbox_pred: 4-D with shape [batch, 4 * num_anchors, height, width].
231	- im_info: 2-D with shape [batch, 3].
232	- out: 2-D with shape [batch * rpn_post_nms_top_n, 5].
233	)code" TVM_ADD_FILELINE)
234	.set_num_inputs(`3`)
235	.add_argument("cls_prob", "Tensor", "Score of how likely proposal is object")
236	.add_argument("bbox_pred", "Tensor", "BBox predicted deltas from anchors for proposals")
237	.add_argument("im_info", "Tensor", "Image size and scale")
238	.set_support_level(`5`)
239	.add_type_rel("Proposal", ProposalRel);
240
241	} // namespace relay
242	} // namespace tvm
243

Browse the source code of tvm/src/relay/op/vision/rcnn_op.cc