1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file rcnn_op.cc
22 * \brief Faster RCNN and Mask RCNN operators
23 */
24#include <tvm/relay/attrs/vision.h>
25#include <tvm/relay/op.h>
26#include <tvm/relay/op_attr_types.h>
27
28#include "../../transforms/infer_layout_utils.h"
29
30namespace tvm {
31namespace relay {
32
33TVM_REGISTER_NODE_TYPE(ROIAlignAttrs);
34
35bool ROIAlignRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
36 const TypeReporter& reporter) {
37 auto roi_align_attrs = attrs.as<ROIAlignAttrs>();
38 ICHECK_EQ(types.size(), 3);
39 const auto* data = types[0].as<TensorTypeNode>();
40 const auto* rois = types[1].as<TensorTypeNode>();
41 ICHECK(data);
42 ICHECK(rois);
43 const auto& dshape = data->shape;
44 const auto& rshape = rois->shape;
45 ICHECK(roi_align_attrs);
46 ICHECK_EQ(dshape.size(), 4) << "Input data should be 4-D.";
47 ICHECK_EQ(rshape.size(), 2) << "Input rois should be 2-D.";
48 // assign output type
49 std::vector<IndexExpr> oshape;
50 if (roi_align_attrs->layout == "NCHW") {
51 oshape = {rshape[0], dshape[1], roi_align_attrs->pooled_size[0],
52 roi_align_attrs->pooled_size[1]};
53 } else {
54 ICHECK_EQ(roi_align_attrs->layout, "NHWC") << "Unexpected ROI Align layout";
55 oshape = {rshape[0], roi_align_attrs->pooled_size[0], roi_align_attrs->pooled_size[1],
56 dshape[3]};
57 }
58
59 reporter->Assign(types[2], TensorType(oshape, data->dtype));
60 return true;
61}
62
63template <typename T>
64InferCorrectLayoutOutput ROIAlignInferCorrectLayout(const Attrs& attrs,
65 const Array<Layout>& new_in_layouts,
66 const Array<Layout>& old_in_layouts,
67 const Array<tvm::relay::Type>& old_in_types) {
68 const T* params = attrs.as<T>();
69 Layout data_layout = params->layout;
70
71 // Layout inference needs to define the layout for all inputs and output data layouts.
72 // For roi_align, the second inputs is 2-D tensor with shape [num_roi, 5].
73 // So, we set the layout as "N5".
74 return InferCorrectLayoutOutput({data_layout, Layout("N5")}, {data_layout}, attrs);
75}
76
77Expr MakeROIAlign(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale,
78 int sample_ratio, String layout, String mode) {
79 auto attrs = make_object<ROIAlignAttrs>();
80 attrs->pooled_size = pooled_size;
81 attrs->spatial_scale = spatial_scale;
82 attrs->sample_ratio = sample_ratio;
83 attrs->layout = layout;
84 attrs->mode = mode;
85 static const Op& op = Op::Get("vision.roi_align");
86 return Call(op, {data, rois}, Attrs(attrs), {});
87}
88
89TVM_REGISTER_GLOBAL("relay.op.vision._make.roi_align").set_body_typed(MakeROIAlign);
90
91RELAY_REGISTER_OP("vision.roi_align")
92 .describe(R"doc(ROI Align operator.
93
94 - **data**: This depends on the `layout` parameter. Input is 4D array of shape
95 (batch_size, channels, height, width) if `layout` is `NCHW`.
96 - **rois**: 2D array of shape (num_roi, 5). The last dimension should be in format of
97 [batch_index, w_start, h_start, w_end, h_end].
98 - **out**: This depends on the `layout` parameter. Output is 4D array of shape
99 (num_roi, channels, pooled_height, pooled_width) if `layout` is `NCHW`.
100 )doc" TVM_ADD_FILELINE)
101 .set_num_inputs(2)
102 .add_argument("data", "Tensor", "The input tensor.")
103 .add_argument("rois", "Tensor", "The input rois")
104 .set_support_level(5)
105 .add_type_rel("ROIAlign", ROIAlignRel)
106 .set_attr<FInferCorrectLayout>("FInferCorrectLayout",
107 ROIAlignInferCorrectLayout<ROIAlignAttrs>);
108
109TVM_REGISTER_NODE_TYPE(ROIPoolAttrs);
110
111bool ROIPoolRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
112 const TypeReporter& reporter) {
113 auto roi_pool_attrs = attrs.as<ROIPoolAttrs>();
114 ICHECK_EQ(types.size(), 3);
115 const auto* data = types[0].as<TensorTypeNode>();
116 const auto* rois = types[1].as<TensorTypeNode>();
117 const auto& dshape = data->shape;
118 const auto& rshape = rois->shape;
119 ICHECK(roi_pool_attrs);
120 ICHECK_EQ(dshape.size(), 4) << "Input data should be 4-D.";
121 ICHECK_EQ(rshape.size(), 2) << "Input rois should be 2-D.";
122 // assign output type
123 std::vector<IndexExpr> oshape;
124 if (roi_pool_attrs->layout == "NCHW") {
125 oshape = {rshape[0], dshape[1], roi_pool_attrs->pooled_size[0], roi_pool_attrs->pooled_size[1]};
126 } else if (roi_pool_attrs->layout == "NHWC") {
127 oshape = {rshape[0], roi_pool_attrs->pooled_size[0], roi_pool_attrs->pooled_size[1], dshape[3]};
128 } else {
129 LOG(FATAL) << "vision.roi_pool does not support " << roi_pool_attrs->layout << " layout";
130 }
131
132 reporter->Assign(types[2], TensorType(oshape, data->dtype));
133 return true;
134}
135
136template <typename T>
137InferCorrectLayoutOutput ROIPoolInferCorrectLayout(const Attrs& attrs,
138 const Array<Layout>& new_in_layouts,
139 const Array<Layout>& old_in_layouts,
140 const Array<tvm::relay::Type>& old_in_types) {
141 const T* params = attrs.as<T>();
142 Layout data_layout = params->layout;
143
144 // Layout inference needs to define the layout for all inputs and output data layouts.
145 // For roi_pool, the second inputs is 2-D tensor with shape [num_roi, 5].
146 // So, we set the layout as "N5".
147 return InferCorrectLayoutOutput({data_layout, Layout("N5")}, {data_layout}, attrs);
148}
149
150Expr MakeROIPool(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale,
151 String layout) {
152 auto attrs = make_object<ROIPoolAttrs>();
153 attrs->pooled_size = pooled_size;
154 attrs->spatial_scale = spatial_scale;
155 attrs->layout = layout;
156 static const Op& op = Op::Get("vision.roi_pool");
157 return Call(op, {data, rois}, Attrs(attrs), {});
158}
159
160TVM_REGISTER_GLOBAL("relay.op.vision._make.roi_pool").set_body_typed(MakeROIPool);
161
162RELAY_REGISTER_OP("vision.roi_pool")
163 .describe(R"doc(ROI Pool operator.
164
165 - **data**: This depends on the `layout` parameter. Input is 4D array of shape
166 (batch_size, channels, height, width) if `layout` is `NCHW`.
167 - **rois**: 2D array of shape (num_roi, 5). The last dimension should be in format of
168 [batch_index, w_start, h_start, w_end, h_end].
169 - **out**: This depends on the `layout` parameter. Output is 4D array of shape
170 (num_roi, channels, pooled_height, pooled_width) if `layout` is `NCHW`.
171 )doc" TVM_ADD_FILELINE)
172 .set_num_inputs(2)
173 .add_argument("data", "Tensor", "The input tensor.")
174 .add_argument("rois", "Tensor", "The input rois")
175 .set_support_level(5)
176 .add_type_rel("ROIPool", ROIPoolRel)
177 .set_attr<FInferCorrectLayout>("FInferCorrectLayout", ROIPoolInferCorrectLayout<ROIPoolAttrs>);
178
179TVM_REGISTER_NODE_TYPE(ProposalAttrs);
180
181bool ProposalRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
182 const TypeReporter& reporter) {
183 auto proposal_attrs = attrs.as<ProposalAttrs>();
184 ICHECK_EQ(types.size(), 4);
185 const auto* cls_prob = types[0].as<TensorTypeNode>();
186 const auto* bbox_pred = types[1].as<TensorTypeNode>();
187 const auto* im_info = types[2].as<TensorTypeNode>();
188
189 if (!cls_prob || !bbox_pred || !im_info) {
190 return false;
191 }
192
193 ICHECK_EQ(cls_prob->shape.size(), 4U)
194 << "The dimension of class probability should be 4, but received " << cls_prob->shape.size();
195 ICHECK_EQ(bbox_pred->shape.size(), 4U)
196 << "The dimension of box prediction should be 4, but received " << bbox_pred->shape.size();
197 ICHECK_EQ(im_info->shape.size(), 2U)
198 << "The dimension of image info should be 2, but received " << im_info->shape.size();
199 ICHECK(reporter->AssertEQ(im_info->shape[1], 3));
200
201 auto batch = cls_prob->shape[0];
202
203 std::vector<IndexExpr> oshape({batch * proposal_attrs->rpn_post_nms_top_n, 5});
204 reporter->Assign(types[3], TensorType(oshape, cls_prob->dtype));
205 return true;
206}
207
208Expr MakeProposal(Expr cls_prob, Expr bbox_pred, Expr im_info, Array<IndexExpr> scales,
209 Array<IndexExpr> ratios, int feature_stride, double threshold,
210 int rpn_pre_nms_top_n, int rpn_post_nms_top_n, int rpn_min_size, bool iou_loss) {
211 auto attrs = make_object<ProposalAttrs>();
212 attrs->scales = scales;
213 attrs->ratios = ratios;
214 attrs->feature_stride = feature_stride;
215 attrs->threshold = threshold;
216 attrs->rpn_pre_nms_top_n = rpn_pre_nms_top_n;
217 attrs->rpn_post_nms_top_n = rpn_post_nms_top_n;
218 attrs->rpn_min_size = rpn_min_size;
219 attrs->iou_loss = iou_loss;
220 static const Op& op = Op::Get("vision.proposal");
221 return Call(op, {cls_prob, bbox_pred, im_info}, Attrs(attrs), {});
222}
223
224TVM_REGISTER_GLOBAL("relay.op.vision._make.proposal").set_body_typed(MakeProposal);
225
226RELAY_REGISTER_OP("vision.proposal")
227 .describe(R"code(Generate region proposals via RPN.
228
229 - **cls_prob**: 4-D with shape [batch, 2 * num_anchors, height, width].
230 - **bbox_pred**: 4-D with shape [batch, 4 * num_anchors, height, width].
231 - **im_info**: 2-D with shape [batch, 3].
232 - **out**: 2-D with shape [batch * rpn_post_nms_top_n, 5].
233 )code" TVM_ADD_FILELINE)
234 .set_num_inputs(3)
235 .add_argument("cls_prob", "Tensor", "Score of how likely proposal is object")
236 .add_argument("bbox_pred", "Tensor", "BBox predicted deltas from anchors for proposals")
237 .add_argument("im_info", "Tensor", "Image size and scale")
238 .set_support_level(5)
239 .add_type_rel("Proposal", ProposalRel);
240
241} // namespace relay
242} // namespace tvm
243