1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19#include <tvm/runtime/container/array.h>
20#include <tvm/runtime/container/map.h>
21#include <tvm/runtime/object.h>
22#include <tvm/runtime/registry.h>
23#include <tvm/support/parallel_for.h>
24
25#include <algorithm>
26#include <unordered_map>
27#include <unordered_set>
28#include <vector>
29
30#include "cascader_options.h"
31#include "graph.h"
32#include "pareto.h"
33#include "plan.h"
34#include "plan_generator.h"
35#include "proposal.h"
36#include "stripe_config.h"
37#include "tensor_config.h"
38
39namespace tvm {
40namespace contrib {
41namespace ethosu {
42namespace cascader {
43
44std::unordered_set<TensorConfig> GetPlanBoundaryConfigs(const Plan& plan) {
45 std::unordered_set<TensorConfig> boundary_configs;
46 for (const auto& config : plan->GetTensorConfigs()) {
47 if (config->GetState() == TensorConfigState::BOUNDARY) {
48 boundary_configs.insert(config);
49 }
50 }
51 return boundary_configs;
52}
53
54bool IsPlanCompatible(const Proposal& proposal, const std::vector<Part>& plan_part_group,
55 const std::unordered_set<TensorConfig>& plan_boundary_configs) {
56 // Check the Plan Part group is disjoint with the Proposal Part group
57 for (const auto& plan_part : plan_part_group) {
58 for (const auto& proposal_part : proposal->GetPartGroup()) {
59 if (plan_part == proposal_part) {
60 return false;
61 }
62 }
63 }
64 // If the Plan and Proposal disagree on the memory home of a Tensor, they
65 // are incompatible and can't be used to create a new Proposal
66 auto tensor_configs = proposal->GetInputTensorConfigs();
67 for (const auto& plan_config : plan_boundary_configs) {
68 if (tensor_configs.find(plan_config->GetTensor()) != tensor_configs.end()) {
69 auto proposal_config = tensor_configs.at(plan_config->GetTensor());
70 if (proposal_config->GetHomeRegion() != plan_config->GetHomeRegion()) {
71 return false;
72 }
73 }
74 }
75 return true;
76}
77
78std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> CreatePlansByPart(
79 const std::unordered_map<std::vector<Part>, std::vector<Plan>>& plans_by_group,
80 const CascaderGraph& graph) {
81 std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> plans_by_part;
82 for (const auto& it : plans_by_group) {
83 auto part_group = it.first;
84 auto plans = it.second;
85 int highest_index = 0;
86 Part& index_part = part_group.front();
87 // Determine the Part in the Part group with the highest ID - this will be used to index
88 // the Plans
89 for (const auto& part : part_group) {
90 int pid = graph->GetPartID(part);
91 if (pid >= highest_index) {
92 index_part = part;
93 highest_index = pid;
94 }
95 }
96 plans_by_part[index_part].insert(plans_by_part[index_part].begin(), plans.begin(), plans.end());
97 }
98 return plans_by_part;
99}
100
101Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan,
102 const std::unordered_set<TensorConfig>& plan_boundary_configs) {
103 std::vector<Plan> new_plans = proposal->GetPlans();
104 new_plans.push_back(plan);
105 TensorConfigMap new_configs = proposal->GetInputTensorConfigs();
106 // Add input configs from the Plan if they're homed in the cascade region
107 for (const auto& config : plan_boundary_configs) {
108 if (config->GetHomeRegion() == proposal->GetCascadeRegion()) {
109 new_configs[config->GetTensor()] = config;
110 }
111 }
112 // Remove the Plan's output config from the new_configs if it's present because
113 // it won't be an input to the Proposal any more
114 if (new_configs.find(plan->GetOutputConfig()->GetTensor()) != new_configs.end()) {
115 new_configs.erase(plan->GetOutputConfig()->GetTensor());
116 }
117 // The updated memory usage is the memory required to run the Plan plus the
118 // non-local memory that's required in the Proposal at that point in time
119 int new_memory_usage = plan->GetMemoryUsage();
120 for (const auto& it : new_configs) {
121 if (plan_boundary_configs.find(it.second) == plan_boundary_configs.end()) {
122 new_memory_usage += it.first->GetSize();
123 }
124 }
125 new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage());
126 int new_cycles = proposal->GetCycles() + plan->GetCycles();
127 std::vector<Part> new_part_group = proposal->GetPartGroup();
128 new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(),
129 plan->GetPartGroup().end());
130 std::sort(new_part_group.begin(), new_part_group.end());
131 return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs,
132 proposal->GetCascadeRegion(), new_memory_usage, new_cycles);
133}
134
135std::vector<Proposal> GeneratePartialProposals(
136 const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions options,
137 const std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual>& plans_by_part,
138 const std::vector<Part>& partial_proposal_group,
139 std::unordered_map<std::vector<Part>, std::vector<Proposal>>* proposals_by_group) {
140 if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) {
141 return proposals_by_group->at(partial_proposal_group);
142 }
143 if (partial_proposal_group.size() == 0) {
144 (*proposals_by_group)[partial_proposal_group] =
145 std::vector<Proposal>{Proposal(graph, std::vector<Part>(), std::vector<Plan>(),
146 TensorConfigMap(), options->cascade_region, 0, 0)};
147 } else {
148 Part part = partial_proposal_group.back();
149 const auto& plans = plans_by_part.at(part);
150 for (const auto& plan : plans) {
151 if (plan->GetInteriorRegion() == options->cascade_region) {
152 // Doing this isn't very efficient, but it improves the performance of the Plan
153 // generator
154 std::unordered_set<TensorConfig> plan_boundary_configs = GetPlanBoundaryConfigs(plan);
155 // The residual_proposal_group is a Part group indicating the Parts which aren't
156 // covered by the current Plan. It's the group for which we must find 'residual
157 // Proposals', meaning Proposals which cover the rest of the CascaderGraph assuming we
158 // pick the current Plan.
159 std::vector<Part> residual_proposal_group;
160 std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(),
161 std::back_inserter(residual_proposal_group), [&plan](Part value) {
162 return std::find(plan->GetPartGroup().begin(), plan->GetPartGroup().end(),
163 value) == plan->GetPartGroup().end();
164 });
165 // std::sort(residual_proposal_group.begin(), residual_proposal_group.end());
166 const auto& residual_proposals = GeneratePartialProposals(
167 graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group);
168 auto plan_output_tensor = plan->GetOutputConfig()->GetTensor();
169 ICHECK_LE(plan_output_tensor->GetProducers().size(), 1)
170 << "All tensors must have at most one producer.";
171 for (const auto& residual_proposal : residual_proposals) {
172 if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) {
173 (*proposals_by_group)[partial_proposal_group].push_back(
174 AddPlanToProposal(residual_proposal, plan, plan_boundary_configs));
175 }
176 }
177 }
178 }
179 (*proposals_by_group)[partial_proposal_group] =
180 ParetoCullProposals(proposals_by_group->at(partial_proposal_group), options->max_proposals,
181 options->disable_pareto_proposals);
182 }
183 return proposals_by_group->at(partial_proposal_group);
184}
185
186std::vector<Proposal> GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map,
187 const CascaderOptions options) {
188 // First generate all the Pareto optimal Plans for the CascaderGraph
189 auto plans_by_group = GenerateGraphPlans(graph, home_map, options);
190 // First create a map between every Part in the CascaderGraph and all the Plans for which that
191 // Part is the lowest ID Part within the Plan's Part group
192 std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> plans_by_part =
193 CreatePlansByPart(plans_by_group, graph);
194 // The Part group that partial Proposals are current being generated for
195 std::vector<Part> partial_proposal_group = graph->GetPartOrder();
196 // A map of Proposals indexed by the Part group they cover
197 std::unordered_map<std::vector<Part>, std::vector<Proposal>> proposals_by_group;
198 return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group,
199 &proposals_by_group);
200}
201
202TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals")
203 .set_body_typed([](CascaderGraph graph, Map<Tensor, Array<MemoryRegion>> home_map,
204 CascaderOptions options) {
205 std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>
206 mhome_map;
207 for (const auto& it : home_map) {
208 std::vector<MemoryRegion> home_regions;
209 for (const auto& i : it.second) {
210 home_regions.push_back(i);
211 }
212 mhome_map[it.first] = home_regions;
213 }
214 return Array<Proposal>(GenerateProposals(graph, mhome_map, options));
215 });
216
217} // namespace cascader
218} // namespace ethosu
219} // namespace contrib
220} // namespace tvm
221