1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file src/contrib/ethosu/cascader/graph.h
22 * \brief Graph objects (Tensor and Part) for the Ethos-U cascader
23 */
24#ifndef TVM_CONTRIB_ETHOSU_CASCADER_GRAPH_H_
25#define TVM_CONTRIB_ETHOSU_CASCADER_GRAPH_H_
26
27#include <tvm/runtime/data_type.h>
28#include <tvm/runtime/object.h>
29#include <tvm/te/operation.h>
30#include <tvm/te/tensor.h>
31
32#include <unordered_map>
33#include <utility>
34#include <vector>
35
36#include "block_config.h"
37#include "propagator.h"
38
39namespace tvm {
40namespace contrib {
41namespace ethosu {
42namespace cascader {
43
44class Tensor;
45class Part;
46class StripeConfig;
47
48/*!
49 * \brief The buffering mode to use when realizing a tensor.
50 * RECOMPUTE - The 'default' behaviour of TVM. Overlapping stripes will be recomputed.
51 * ROLLING - Apply both the sliding window and storage folding optimizations to the tensor
52 * realization.
53 */
54enum BufferMode { RECOMPUTE, ROLLING };
55
56/*! \brief A struct to hold a Tensor Expression subgraph */
57struct TESubgraph {
58 /*! \brief The input te::Tensors to the subgraph */
59 std::vector<te::Tensor> input_tensors;
60 /*! \brief The output te::Tensor of the subgraph */
61 te::Tensor output_tensor;
62};
63
64/*! \brief Node to hold performance information for a Part */
65class PerformanceInfoNode : public Object {
66 public:
67 void VisitAttrs(AttrVisitor* v);
68
69 /*! \brief The cycles to compute a block */
70 int64_t compute_cycles;
71 /*! \brief The number of bytes read per input tensor */
72 std::vector<int64_t> read_bytes;
73 /*! \brief The number of bytes written to the output tensor */
74 int64_t write_bytes;
75 /*! \brief The block config used for this performance point */
76 BlockConfig block_config;
77
78 static constexpr const char* _type_key = "contrib.ethosu.cascader.PerformanceInfo";
79 TVM_DECLARE_FINAL_OBJECT_INFO(PerformanceInfoNode, Object);
80};
81
82/*!
83 * \brief A class to hold the performance information for a Part.
84 * \note The performance information for a Part is composed of 3 factors: the compute cycles,
85 * the number of bytes read from each input tensor and the number of bytes written to the output
86 * tensor. Bytes read/written is reported in favour of read/write bandwidth cycles so the
87 * calculation of the performance information can be re-used with different memory homing.
88 */
89class PerformanceInfo : public ObjectRef {
90 public:
91 PerformanceInfo(int64_t compute_cycles, std::vector<int64_t> read_bytes, int64_t write_bytes,
92 BlockConfig block_config) {
93 auto n = make_object<PerformanceInfoNode>();
94 n->compute_cycles = compute_cycles;
95 n->read_bytes = std::move(read_bytes);
96 n->write_bytes = write_bytes;
97 n->block_config = block_config;
98 data_ = std::move(n);
99 }
100
101 TVM_DEFINE_OBJECT_REF_METHODS(PerformanceInfo, ObjectRef, PerformanceInfoNode);
102};
103
104/*! \brief Node to represent a Tensor */
105class TensorNode : public Object {
106 public:
107 void VisitAttrs(AttrVisitor* v);
108
109 /*! \return The shape of the tensor */
110 std::vector<int> GetShape() const { return shape_; }
111 /*! \return The data type of the tensor */
112 DataType GetDataType() const { return dtype_; }
113 /*! \return Whether the tensor stores a constant value */
114 bool IsConstant() const { return is_constant_; }
115 /*! \return The compression ratio of the tensor */
116 float GetCompressionRatio() const { return compression_ratio_; }
117 /*! \return The producers of the tensor */
118 const std::vector<Part> GetProducers() const { return producers_; }
119 /*! \return The consumers of the tensor */
120 const std::vector<Part> GetConsumers() const { return consumers_; }
121 /*! \return The size of the tensor in bytes */
122 int GetSize() const { return size_ * compression_ratio_; }
123
124 /*! \brief Add a producer of the tensor */
125 inline void AddProducer(const Part& part) { producers_.push_back(part); }
126 /*! \brief Add a consumer of the tensor */
127 inline void AddConsumer(const Part& part) { consumers_.push_back(part); }
128
129 static constexpr const char* _type_key = "contrib.ethosu.cascader.Tensor";
130 TVM_DECLARE_FINAL_OBJECT_INFO(TensorNode, Object);
131
132 protected:
133 friend class Tensor;
134
135 /*! \brief The shape of the tensor */
136 std::vector<int> shape_;
137 /*! \brief The data type of the tensor */
138 DataType dtype_;
139 /*! \brief Whether the tensor stores a constant value */
140 bool is_constant_;
141 /*! \brief The compression ratio of the tensor */
142 float compression_ratio_;
143 /*! \brief The producers of the tensor */
144 std::vector<Part> producers_;
145 /*! \brief The consumers of the tensor */
146 std::vector<Part> consumers_;
147 /*! \brief The size of the tensor in bytes */
148 int size_;
149};
150
151/*!
152 * \brief A class to describe a Tensor in a Cascader graph.
153 * \note Cascader graphs consist of two object types: Tensors and Parts. This class
154 * defines the Tensors which represent the tensors that are consumed and produced
155 * as part of the graph. They are augmented with information about their 'kind'
156 * (input/output/constant/intermediate), their default memory home (which memory they
157 * are expected to be allocated in) and a compression ratio where applicable (weights
158 * for instance are compressed).
159 */
160class Tensor : public ObjectRef {
161 public:
162 Tensor(const std::vector<int>& shape, DataType dtype, bool is_constant, float compression_ratio);
163
164 TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Tensor, ObjectRef, TensorNode);
165};
166
167/*! \brief Node to represent a Part */
168class PartNode : public Object {
169 public:
170 virtual void VisitAttrs(AttrVisitor* v);
171
172 /*! \return The TE subgraph represented by the Part */
173 const TESubgraph GetSubgraph() const { return subgraph_; }
174 /*! \return The output->input propagators */
175 const std::vector<Propagator> GetPropagators() const { return propagators_; }
176 /*! \return Whether the Part is inline */
177 bool IsInline() const { return in_line_; }
178 /*! \return The input tensors */
179 const std::vector<Tensor> GetInputTensors() const { return input_tensors_; }
180 /*! \return The output tensor */
181 const Tensor GetOutputTensor() const { return output_tensor_; }
182
183 /*! \brief Add a producer of the tensor */
184 void SetInput(uint64_t input_index, const Tensor& input_tensor);
185 /*! \brief Add a consumer of the tensor */
186 void SetOutput(const Tensor& output_tensor) { output_tensor_ = output_tensor; }
187 /*!
188 * \brief Calculate the input stripe configs for a given output stripe config using the
189 * Propagators. \param output_stripe_config The output stripe config to propagate. \return The
190 * calculated input stripe configs.
191 */
192 std::vector<StripeConfig> CalculateInputStripeConfigs(const StripeConfig& output_stripe_config);
193 /*!
194 * \brief Get the preferred alignment in each axis for a stripe of the Part.
195 * \note This is used to bias the selection of StripeConfigs towards those that are integer
196 * multiples of a tensor intrinsic used to compute the Part.
197 */
198 virtual const std::vector<int> GetStripeAlignHint() const;
199 /*!
200 * \brief Get the performance information for a given output stripe config.
201 * \param output_stripe_config The output stripe config to compute the performance for.
202 * \param is_rolling Whether the output config should be computed as a rolling buffer.
203 * \return The performance information containing the compute cycles and read/write bytes.
204 */
205 virtual const PerformanceInfo GetPerformanceInfo(const StripeConfig& output_stripe_config,
206 BufferMode buffer_mode) = 0;
207
208 static constexpr const char* _type_key = "contrib.ethosu.cascader.Part";
209 TVM_DECLARE_BASE_OBJECT_INFO(PartNode, Object);
210
211 protected:
212 friend class Part;
213
214 /*! \brief The Tensor Expression subgraph represented by the Part */
215 TESubgraph subgraph_;
216 /*! \brief The output->input propagators */
217 std::vector<Propagator> propagators_;
218 /*! \brief Whether the Part is computed in-line */
219 bool in_line_;
220 /*! \brief The input tensors */
221 std::vector<Tensor> input_tensors_;
222 /*! \brief The output tensor */
223 Tensor output_tensor_;
224};
225
226/*!
227 * \brief A class to describe a Part in a Cascader graph.
228 * \note Cascader graphs consist of two object types: Tensors and Parts. This class
229 * defines the Parts which represent the operations which produce and consume Tensors.
230 *
231 * A Part can represent one or more Tensor Expression compute operations but the subgraph
232 * it represents must have only a single output. Multiple TE compute operations should be
233 * represented under a single Part if the intermediate tensors between them won't be
234 * realized. This is a common pattern in Ethos-U where a sequence of TE compute operations
235 * are used to represent a single hardware primitive operation.
236 *
237 * Parts contain a Propagator per input which describes how a given output stripe config
238 * should be transformed into an input stripe config for each input. This is essential
239 * to analyse both the performance of Parts (determining the data that will be read) and
240 * in cascading Parts together (determining compatible stripe config choices).
241 *
242 * A Part can be marked as 'in_line', in which case it is assumed that it doesn't need to
243 * allocate space for its output tensor.
244 *
245 * This is only a base class and concrete Parts must be derived from it, implementing a
246 * function to model the performance of the Part as well as to determine its compute
247 * quantum.
248 */
249class Part : public ObjectRef {
250 public:
251 TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Part, ObjectRef, PartNode);
252};
253
254/*! \brief Node to represent a CascaderGraph */
255class CascaderGraphNode : public Object {
256 public:
257 CascaderGraphNode() {}
258 CascaderGraphNode(std::vector<Tensor> input_tensors, std::vector<Tensor> output_tensors);
259
260 void VisitAttrs(AttrVisitor* v);
261
262 /*! \return The input Tensors of the CascaderGraph */
263 std::vector<Tensor> GetInputTensors() const { return input_tensors_; }
264 /*! \return The output Tensors of the CascaderGraph */
265 std::vector<Tensor> GetOutputTensors() const { return output_tensors_; }
266 /*! \return The order of the Parts in the CascaderGraph */
267 std::vector<Part> GetPartOrder() const { return part_order_; }
268 /*!
269 * \brief Get the ID of a Part in the CascaderGraph.
270 * \param part The Part to get the ID of.
271 * \return The ID of the Part in the CascaderGraph.
272 * \note Each Part is given a unique ID within the CascaderGraph.
273 */
274 int GetPartID(const Part& part) const;
275 /*!
276 * \brief Get the ID of a Tensor in the CascaderGraph.
277 * \param tensor The Tensor to get the ID of.
278 * \return The ID of the Tensor in the CascaderGraph.
279 * \note Each Tensor is given a unique ID within the CascaderGraph.
280 */
281 int GetTensorID(const Tensor& tensor) const;
282
283 static constexpr const char* _type_key = "contrib.ethosu.cascader.CascaderGraph";
284 TVM_DECLARE_FINAL_OBJECT_INFO(CascaderGraphNode, Object);
285
286 protected:
287 /*!
288 * \brief Initialize the CascaderGraph by defining a topological ordering.
289 * \note This will traverse the Parts and Tensors using a depth-first
290 * visiting pattern and use the traversal order to initialize both the
291 * 'order' vectors and the ID maps. The order vectors define the ordering
292 * that the cascader expects the CascaderGraph to be executed in, but reversed.
293 * The ID maps assign a unique integer ID to each Part and Tensor corresponding
294 * to their position in their respective order vector.
295 */
296 void Init_();
297
298 /*! \brief The input Tensors of the CascaderGraph */
299 std::vector<Tensor> input_tensors_;
300 /*! \brief The output Tensors of the CascaderGraph */
301 std::vector<Tensor> output_tensors_;
302 /*! \brief The order of the Tensors in the CascaderGraph */
303 std::vector<Tensor> tensor_order_;
304 /*! \brief The order of the Parts in the CascaderGraph */
305 std::vector<Part> part_order_;
306 /*! \brief A map between Parts in the CascaderGraph and their IDs */
307 std::unordered_map<Part, int, ObjectPtrHash, ObjectPtrEqual> part_id_map_;
308 /*! \brief A map between Tensors in the CascaderGraph and their IDs */
309 std::unordered_map<Tensor, int, ObjectPtrHash, ObjectPtrEqual> tensor_id_map_;
310};
311
312/*!
313 * \brief A class to describe a graph of Parts and Tensors used by the cascader.
314 * \note This class describes a graph consisting of two object types: Tensors and Parts.
315 * It defines a topological ordering on the graph such that each Part and Tensor has a
316 * position in the ordering. This ordering is used by the Plan and Proposal generation
317 * algorithms. It is also the ordering the Parts are expected to be executed in.
318 *
319 * In addition to defining an ordering, the Parts and Tensors are also all given unique
320 * IDs which they can be referred to by.
321 */
322class CascaderGraph : public ObjectRef {
323 public:
324 CascaderGraph(std::vector<Tensor> input_tensors, std::vector<Tensor> output_tensors);
325
326 TVM_DEFINE_OBJECT_REF_METHODS(CascaderGraph, ObjectRef, CascaderGraphNode);
327};
328
329} // namespace cascader
330} // namespace ethosu
331} // namespace contrib
332} // namespace tvm
333
334#endif // TVM_CONTRIB_ETHOSU_CASCADER_GRAPH_H_
335