1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | #include <tvm/runtime/container/array.h> |
20 | #include <tvm/runtime/container/map.h> |
21 | #include <tvm/runtime/object.h> |
22 | #include <tvm/runtime/registry.h> |
23 | #include <tvm/support/parallel_for.h> |
24 | |
25 | #include <algorithm> |
26 | #include <unordered_map> |
27 | #include <unordered_set> |
28 | #include <vector> |
29 | |
30 | #include "cascader_options.h" |
31 | #include "graph.h" |
32 | #include "pareto.h" |
33 | #include "plan.h" |
34 | #include "plan_generator.h" |
35 | #include "proposal.h" |
36 | #include "stripe_config.h" |
37 | #include "tensor_config.h" |
38 | |
39 | namespace tvm { |
40 | namespace contrib { |
41 | namespace ethosu { |
42 | namespace cascader { |
43 | |
44 | std::unordered_set<TensorConfig> GetPlanBoundaryConfigs(const Plan& plan) { |
45 | std::unordered_set<TensorConfig> boundary_configs; |
46 | for (const auto& config : plan->GetTensorConfigs()) { |
47 | if (config->GetState() == TensorConfigState::BOUNDARY) { |
48 | boundary_configs.insert(config); |
49 | } |
50 | } |
51 | return boundary_configs; |
52 | } |
53 | |
54 | bool IsPlanCompatible(const Proposal& proposal, const std::vector<Part>& plan_part_group, |
55 | const std::unordered_set<TensorConfig>& plan_boundary_configs) { |
56 | // Check the Plan Part group is disjoint with the Proposal Part group |
57 | for (const auto& plan_part : plan_part_group) { |
58 | for (const auto& proposal_part : proposal->GetPartGroup()) { |
59 | if (plan_part == proposal_part) { |
60 | return false; |
61 | } |
62 | } |
63 | } |
64 | // If the Plan and Proposal disagree on the memory home of a Tensor, they |
65 | // are incompatible and can't be used to create a new Proposal |
66 | auto tensor_configs = proposal->GetInputTensorConfigs(); |
67 | for (const auto& plan_config : plan_boundary_configs) { |
68 | if (tensor_configs.find(plan_config->GetTensor()) != tensor_configs.end()) { |
69 | auto proposal_config = tensor_configs.at(plan_config->GetTensor()); |
70 | if (proposal_config->GetHomeRegion() != plan_config->GetHomeRegion()) { |
71 | return false; |
72 | } |
73 | } |
74 | } |
75 | return true; |
76 | } |
77 | |
78 | std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> CreatePlansByPart( |
79 | const std::unordered_map<std::vector<Part>, std::vector<Plan>>& plans_by_group, |
80 | const CascaderGraph& graph) { |
81 | std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> plans_by_part; |
82 | for (const auto& it : plans_by_group) { |
83 | auto part_group = it.first; |
84 | auto plans = it.second; |
85 | int highest_index = 0; |
86 | Part& index_part = part_group.front(); |
87 | // Determine the Part in the Part group with the highest ID - this will be used to index |
88 | // the Plans |
89 | for (const auto& part : part_group) { |
90 | int pid = graph->GetPartID(part); |
91 | if (pid >= highest_index) { |
92 | index_part = part; |
93 | highest_index = pid; |
94 | } |
95 | } |
96 | plans_by_part[index_part].insert(plans_by_part[index_part].begin(), plans.begin(), plans.end()); |
97 | } |
98 | return plans_by_part; |
99 | } |
100 | |
101 | Proposal AddPlanToProposal(const Proposal& proposal, const Plan& plan, |
102 | const std::unordered_set<TensorConfig>& plan_boundary_configs) { |
103 | std::vector<Plan> new_plans = proposal->GetPlans(); |
104 | new_plans.push_back(plan); |
105 | TensorConfigMap new_configs = proposal->GetInputTensorConfigs(); |
106 | // Add input configs from the Plan if they're homed in the cascade region |
107 | for (const auto& config : plan_boundary_configs) { |
108 | if (config->GetHomeRegion() == proposal->GetCascadeRegion()) { |
109 | new_configs[config->GetTensor()] = config; |
110 | } |
111 | } |
112 | // Remove the Plan's output config from the new_configs if it's present because |
113 | // it won't be an input to the Proposal any more |
114 | if (new_configs.find(plan->GetOutputConfig()->GetTensor()) != new_configs.end()) { |
115 | new_configs.erase(plan->GetOutputConfig()->GetTensor()); |
116 | } |
117 | // The updated memory usage is the memory required to run the Plan plus the |
118 | // non-local memory that's required in the Proposal at that point in time |
119 | int new_memory_usage = plan->GetMemoryUsage(); |
120 | for (const auto& it : new_configs) { |
121 | if (plan_boundary_configs.find(it.second) == plan_boundary_configs.end()) { |
122 | new_memory_usage += it.first->GetSize(); |
123 | } |
124 | } |
125 | new_memory_usage = std::max(new_memory_usage, proposal->GetMemoryUsage()); |
126 | int new_cycles = proposal->GetCycles() + plan->GetCycles(); |
127 | std::vector<Part> new_part_group = proposal->GetPartGroup(); |
128 | new_part_group.insert(new_part_group.end(), plan->GetPartGroup().begin(), |
129 | plan->GetPartGroup().end()); |
130 | std::sort(new_part_group.begin(), new_part_group.end()); |
131 | return Proposal(proposal->GetGraph(), new_part_group, new_plans, new_configs, |
132 | proposal->GetCascadeRegion(), new_memory_usage, new_cycles); |
133 | } |
134 | |
135 | std::vector<Proposal> GeneratePartialProposals( |
136 | const CascaderGraph& graph, const HomeMap& home_map, const CascaderOptions options, |
137 | const std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual>& plans_by_part, |
138 | const std::vector<Part>& partial_proposal_group, |
139 | std::unordered_map<std::vector<Part>, std::vector<Proposal>>* proposals_by_group) { |
140 | if (proposals_by_group->find(partial_proposal_group) != proposals_by_group->end()) { |
141 | return proposals_by_group->at(partial_proposal_group); |
142 | } |
143 | if (partial_proposal_group.size() == 0) { |
144 | (*proposals_by_group)[partial_proposal_group] = |
145 | std::vector<Proposal>{Proposal(graph, std::vector<Part>(), std::vector<Plan>(), |
146 | TensorConfigMap(), options->cascade_region, 0, 0)}; |
147 | } else { |
148 | Part part = partial_proposal_group.back(); |
149 | const auto& plans = plans_by_part.at(part); |
150 | for (const auto& plan : plans) { |
151 | if (plan->GetInteriorRegion() == options->cascade_region) { |
152 | // Doing this isn't very efficient, but it improves the performance of the Plan |
153 | // generator |
154 | std::unordered_set<TensorConfig> plan_boundary_configs = GetPlanBoundaryConfigs(plan); |
155 | // The residual_proposal_group is a Part group indicating the Parts which aren't |
156 | // covered by the current Plan. It's the group for which we must find 'residual |
157 | // Proposals', meaning Proposals which cover the rest of the CascaderGraph assuming we |
158 | // pick the current Plan. |
159 | std::vector<Part> residual_proposal_group; |
160 | std::copy_if(partial_proposal_group.begin(), partial_proposal_group.end(), |
161 | std::back_inserter(residual_proposal_group), [&plan](Part value) { |
162 | return std::find(plan->GetPartGroup().begin(), plan->GetPartGroup().end(), |
163 | value) == plan->GetPartGroup().end(); |
164 | }); |
165 | // std::sort(residual_proposal_group.begin(), residual_proposal_group.end()); |
166 | const auto& residual_proposals = GeneratePartialProposals( |
167 | graph, home_map, options, plans_by_part, residual_proposal_group, proposals_by_group); |
168 | auto plan_output_tensor = plan->GetOutputConfig()->GetTensor(); |
169 | ICHECK_LE(plan_output_tensor->GetProducers().size(), 1) |
170 | << "All tensors must have at most one producer." ; |
171 | for (const auto& residual_proposal : residual_proposals) { |
172 | if (IsPlanCompatible(residual_proposal, plan->GetPartGroup(), plan_boundary_configs)) { |
173 | (*proposals_by_group)[partial_proposal_group].push_back( |
174 | AddPlanToProposal(residual_proposal, plan, plan_boundary_configs)); |
175 | } |
176 | } |
177 | } |
178 | } |
179 | (*proposals_by_group)[partial_proposal_group] = |
180 | ParetoCullProposals(proposals_by_group->at(partial_proposal_group), options->max_proposals, |
181 | options->disable_pareto_proposals); |
182 | } |
183 | return proposals_by_group->at(partial_proposal_group); |
184 | } |
185 | |
186 | std::vector<Proposal> GenerateProposals(const CascaderGraph& graph, const HomeMap& home_map, |
187 | const CascaderOptions options) { |
188 | // First generate all the Pareto optimal Plans for the CascaderGraph |
189 | auto plans_by_group = GenerateGraphPlans(graph, home_map, options); |
190 | // First create a map between every Part in the CascaderGraph and all the Plans for which that |
191 | // Part is the lowest ID Part within the Plan's Part group |
192 | std::unordered_map<Part, std::vector<Plan>, ObjectPtrHash, ObjectPtrEqual> plans_by_part = |
193 | CreatePlansByPart(plans_by_group, graph); |
194 | // The Part group that partial Proposals are current being generated for |
195 | std::vector<Part> partial_proposal_group = graph->GetPartOrder(); |
196 | // A map of Proposals indexed by the Part group they cover |
197 | std::unordered_map<std::vector<Part>, std::vector<Proposal>> proposals_by_group; |
198 | return GeneratePartialProposals(graph, home_map, options, plans_by_part, partial_proposal_group, |
199 | &proposals_by_group); |
200 | } |
201 | |
202 | TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateProposals" ) |
203 | .set_body_typed([](CascaderGraph graph, Map<Tensor, Array<MemoryRegion>> home_map, |
204 | CascaderOptions options) { |
205 | std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual> |
206 | mhome_map; |
207 | for (const auto& it : home_map) { |
208 | std::vector<MemoryRegion> home_regions; |
209 | for (const auto& i : it.second) { |
210 | home_regions.push_back(i); |
211 | } |
212 | mhome_map[it.first] = home_regions; |
213 | } |
214 | return Array<Proposal>(GenerateProposals(graph, mhome_map, options)); |
215 | }); |
216 | |
217 | } // namespace cascader |
218 | } // namespace ethosu |
219 | } // namespace contrib |
220 | } // namespace tvm |
221 | |