1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file rcnn_op.cc |
22 | * \brief Faster RCNN and Mask RCNN operators |
23 | */ |
24 | #include <tvm/relay/attrs/vision.h> |
25 | #include <tvm/relay/op.h> |
26 | #include <tvm/relay/op_attr_types.h> |
27 | |
28 | #include "../../transforms/infer_layout_utils.h" |
29 | |
30 | namespace tvm { |
31 | namespace relay { |
32 | |
33 | TVM_REGISTER_NODE_TYPE(ROIAlignAttrs); |
34 | |
35 | bool ROIAlignRel(const Array<Type>& types, int num_inputs, const Attrs& attrs, |
36 | const TypeReporter& reporter) { |
37 | auto roi_align_attrs = attrs.as<ROIAlignAttrs>(); |
38 | ICHECK_EQ(types.size(), 3); |
39 | const auto* data = types[0].as<TensorTypeNode>(); |
40 | const auto* rois = types[1].as<TensorTypeNode>(); |
41 | ICHECK(data); |
42 | ICHECK(rois); |
43 | const auto& dshape = data->shape; |
44 | const auto& rshape = rois->shape; |
45 | ICHECK(roi_align_attrs); |
46 | ICHECK_EQ(dshape.size(), 4) << "Input data should be 4-D." ; |
47 | ICHECK_EQ(rshape.size(), 2) << "Input rois should be 2-D." ; |
48 | // assign output type |
49 | std::vector<IndexExpr> oshape; |
50 | if (roi_align_attrs->layout == "NCHW" ) { |
51 | oshape = {rshape[0], dshape[1], roi_align_attrs->pooled_size[0], |
52 | roi_align_attrs->pooled_size[1]}; |
53 | } else { |
54 | ICHECK_EQ(roi_align_attrs->layout, "NHWC" ) << "Unexpected ROI Align layout" ; |
55 | oshape = {rshape[0], roi_align_attrs->pooled_size[0], roi_align_attrs->pooled_size[1], |
56 | dshape[3]}; |
57 | } |
58 | |
59 | reporter->Assign(types[2], TensorType(oshape, data->dtype)); |
60 | return true; |
61 | } |
62 | |
63 | template <typename T> |
64 | InferCorrectLayoutOutput ROIAlignInferCorrectLayout(const Attrs& attrs, |
65 | const Array<Layout>& new_in_layouts, |
66 | const Array<Layout>& old_in_layouts, |
67 | const Array<tvm::relay::Type>& old_in_types) { |
68 | const T* params = attrs.as<T>(); |
69 | Layout data_layout = params->layout; |
70 | |
71 | // Layout inference needs to define the layout for all inputs and output data layouts. |
72 | // For roi_align, the second inputs is 2-D tensor with shape [num_roi, 5]. |
73 | // So, we set the layout as "N5". |
74 | return InferCorrectLayoutOutput({data_layout, Layout("N5" )}, {data_layout}, attrs); |
75 | } |
76 | |
77 | Expr MakeROIAlign(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale, |
78 | int sample_ratio, String layout, String mode) { |
79 | auto attrs = make_object<ROIAlignAttrs>(); |
80 | attrs->pooled_size = pooled_size; |
81 | attrs->spatial_scale = spatial_scale; |
82 | attrs->sample_ratio = sample_ratio; |
83 | attrs->layout = layout; |
84 | attrs->mode = mode; |
85 | static const Op& op = Op::Get("vision.roi_align" ); |
86 | return Call(op, {data, rois}, Attrs(attrs), {}); |
87 | } |
88 | |
89 | TVM_REGISTER_GLOBAL("relay.op.vision._make.roi_align" ).set_body_typed(MakeROIAlign); |
90 | |
91 | RELAY_REGISTER_OP("vision.roi_align" ) |
92 | .describe(R"doc(ROI Align operator. |
93 | |
94 | - **data**: This depends on the `layout` parameter. Input is 4D array of shape |
95 | (batch_size, channels, height, width) if `layout` is `NCHW`. |
96 | - **rois**: 2D array of shape (num_roi, 5). The last dimension should be in format of |
97 | [batch_index, w_start, h_start, w_end, h_end]. |
98 | - **out**: This depends on the `layout` parameter. Output is 4D array of shape |
99 | (num_roi, channels, pooled_height, pooled_width) if `layout` is `NCHW`. |
100 | )doc" TVM_ADD_FILELINE) |
101 | .set_num_inputs(2) |
102 | .add_argument("data" , "Tensor" , "The input tensor." ) |
103 | .add_argument("rois" , "Tensor" , "The input rois" ) |
104 | .set_support_level(5) |
105 | .add_type_rel("ROIAlign" , ROIAlignRel) |
106 | .set_attr<FInferCorrectLayout>("FInferCorrectLayout" , |
107 | ROIAlignInferCorrectLayout<ROIAlignAttrs>); |
108 | |
109 | TVM_REGISTER_NODE_TYPE(ROIPoolAttrs); |
110 | |
111 | bool ROIPoolRel(const Array<Type>& types, int num_inputs, const Attrs& attrs, |
112 | const TypeReporter& reporter) { |
113 | auto roi_pool_attrs = attrs.as<ROIPoolAttrs>(); |
114 | ICHECK_EQ(types.size(), 3); |
115 | const auto* data = types[0].as<TensorTypeNode>(); |
116 | const auto* rois = types[1].as<TensorTypeNode>(); |
117 | const auto& dshape = data->shape; |
118 | const auto& rshape = rois->shape; |
119 | ICHECK(roi_pool_attrs); |
120 | ICHECK_EQ(dshape.size(), 4) << "Input data should be 4-D." ; |
121 | ICHECK_EQ(rshape.size(), 2) << "Input rois should be 2-D." ; |
122 | // assign output type |
123 | std::vector<IndexExpr> oshape; |
124 | if (roi_pool_attrs->layout == "NCHW" ) { |
125 | oshape = {rshape[0], dshape[1], roi_pool_attrs->pooled_size[0], roi_pool_attrs->pooled_size[1]}; |
126 | } else if (roi_pool_attrs->layout == "NHWC" ) { |
127 | oshape = {rshape[0], roi_pool_attrs->pooled_size[0], roi_pool_attrs->pooled_size[1], dshape[3]}; |
128 | } else { |
129 | LOG(FATAL) << "vision.roi_pool does not support " << roi_pool_attrs->layout << " layout" ; |
130 | } |
131 | |
132 | reporter->Assign(types[2], TensorType(oshape, data->dtype)); |
133 | return true; |
134 | } |
135 | |
136 | template <typename T> |
137 | InferCorrectLayoutOutput ROIPoolInferCorrectLayout(const Attrs& attrs, |
138 | const Array<Layout>& new_in_layouts, |
139 | const Array<Layout>& old_in_layouts, |
140 | const Array<tvm::relay::Type>& old_in_types) { |
141 | const T* params = attrs.as<T>(); |
142 | Layout data_layout = params->layout; |
143 | |
144 | // Layout inference needs to define the layout for all inputs and output data layouts. |
145 | // For roi_pool, the second inputs is 2-D tensor with shape [num_roi, 5]. |
146 | // So, we set the layout as "N5". |
147 | return InferCorrectLayoutOutput({data_layout, Layout("N5" )}, {data_layout}, attrs); |
148 | } |
149 | |
150 | Expr MakeROIPool(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale, |
151 | String layout) { |
152 | auto attrs = make_object<ROIPoolAttrs>(); |
153 | attrs->pooled_size = pooled_size; |
154 | attrs->spatial_scale = spatial_scale; |
155 | attrs->layout = layout; |
156 | static const Op& op = Op::Get("vision.roi_pool" ); |
157 | return Call(op, {data, rois}, Attrs(attrs), {}); |
158 | } |
159 | |
160 | TVM_REGISTER_GLOBAL("relay.op.vision._make.roi_pool" ).set_body_typed(MakeROIPool); |
161 | |
162 | RELAY_REGISTER_OP("vision.roi_pool" ) |
163 | .describe(R"doc(ROI Pool operator. |
164 | |
165 | - **data**: This depends on the `layout` parameter. Input is 4D array of shape |
166 | (batch_size, channels, height, width) if `layout` is `NCHW`. |
167 | - **rois**: 2D array of shape (num_roi, 5). The last dimension should be in format of |
168 | [batch_index, w_start, h_start, w_end, h_end]. |
169 | - **out**: This depends on the `layout` parameter. Output is 4D array of shape |
170 | (num_roi, channels, pooled_height, pooled_width) if `layout` is `NCHW`. |
171 | )doc" TVM_ADD_FILELINE) |
172 | .set_num_inputs(2) |
173 | .add_argument("data" , "Tensor" , "The input tensor." ) |
174 | .add_argument("rois" , "Tensor" , "The input rois" ) |
175 | .set_support_level(5) |
176 | .add_type_rel("ROIPool" , ROIPoolRel) |
177 | .set_attr<FInferCorrectLayout>("FInferCorrectLayout" , ROIPoolInferCorrectLayout<ROIPoolAttrs>); |
178 | |
179 | TVM_REGISTER_NODE_TYPE(ProposalAttrs); |
180 | |
181 | bool ProposalRel(const Array<Type>& types, int num_inputs, const Attrs& attrs, |
182 | const TypeReporter& reporter) { |
183 | auto proposal_attrs = attrs.as<ProposalAttrs>(); |
184 | ICHECK_EQ(types.size(), 4); |
185 | const auto* cls_prob = types[0].as<TensorTypeNode>(); |
186 | const auto* bbox_pred = types[1].as<TensorTypeNode>(); |
187 | const auto* im_info = types[2].as<TensorTypeNode>(); |
188 | |
189 | if (!cls_prob || !bbox_pred || !im_info) { |
190 | return false; |
191 | } |
192 | |
193 | ICHECK_EQ(cls_prob->shape.size(), 4U) |
194 | << "The dimension of class probability should be 4, but received " << cls_prob->shape.size(); |
195 | ICHECK_EQ(bbox_pred->shape.size(), 4U) |
196 | << "The dimension of box prediction should be 4, but received " << bbox_pred->shape.size(); |
197 | ICHECK_EQ(im_info->shape.size(), 2U) |
198 | << "The dimension of image info should be 2, but received " << im_info->shape.size(); |
199 | ICHECK(reporter->AssertEQ(im_info->shape[1], 3)); |
200 | |
201 | auto batch = cls_prob->shape[0]; |
202 | |
203 | std::vector<IndexExpr> oshape({batch * proposal_attrs->rpn_post_nms_top_n, 5}); |
204 | reporter->Assign(types[3], TensorType(oshape, cls_prob->dtype)); |
205 | return true; |
206 | } |
207 | |
208 | Expr MakeProposal(Expr cls_prob, Expr bbox_pred, Expr im_info, Array<IndexExpr> scales, |
209 | Array<IndexExpr> ratios, int feature_stride, double threshold, |
210 | int rpn_pre_nms_top_n, int rpn_post_nms_top_n, int rpn_min_size, bool iou_loss) { |
211 | auto attrs = make_object<ProposalAttrs>(); |
212 | attrs->scales = scales; |
213 | attrs->ratios = ratios; |
214 | attrs->feature_stride = feature_stride; |
215 | attrs->threshold = threshold; |
216 | attrs->rpn_pre_nms_top_n = rpn_pre_nms_top_n; |
217 | attrs->rpn_post_nms_top_n = rpn_post_nms_top_n; |
218 | attrs->rpn_min_size = rpn_min_size; |
219 | attrs->iou_loss = iou_loss; |
220 | static const Op& op = Op::Get("vision.proposal" ); |
221 | return Call(op, {cls_prob, bbox_pred, im_info}, Attrs(attrs), {}); |
222 | } |
223 | |
224 | TVM_REGISTER_GLOBAL("relay.op.vision._make.proposal" ).set_body_typed(MakeProposal); |
225 | |
226 | RELAY_REGISTER_OP("vision.proposal" ) |
227 | .describe(R"code(Generate region proposals via RPN. |
228 | |
229 | - **cls_prob**: 4-D with shape [batch, 2 * num_anchors, height, width]. |
230 | - **bbox_pred**: 4-D with shape [batch, 4 * num_anchors, height, width]. |
231 | - **im_info**: 2-D with shape [batch, 3]. |
232 | - **out**: 2-D with shape [batch * rpn_post_nms_top_n, 5]. |
233 | )code" TVM_ADD_FILELINE) |
234 | .set_num_inputs(3) |
235 | .add_argument("cls_prob" , "Tensor" , "Score of how likely proposal is object" ) |
236 | .add_argument("bbox_pred" , "Tensor" , "BBox predicted deltas from anchors for proposals" ) |
237 | .add_argument("im_info" , "Tensor" , "Image size and scale" ) |
238 | .set_support_level(5) |
239 | .add_type_rel("Proposal" , ProposalRel); |
240 | |
241 | } // namespace relay |
242 | } // namespace tvm |
243 | |