1 | // Copyright (c) 2015-2016 The Khronos Group Inc. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef SOURCE_CFA_H_ |
16 | #define SOURCE_CFA_H_ |
17 | |
18 | #include <algorithm> |
19 | #include <cassert> |
20 | #include <cstdint> |
21 | #include <functional> |
22 | #include <map> |
23 | #include <unordered_map> |
24 | #include <unordered_set> |
25 | #include <utility> |
26 | #include <vector> |
27 | |
28 | namespace spvtools { |
29 | |
30 | // Control Flow Analysis of control flow graphs of basic block nodes |BB|. |
31 | template <class BB> |
32 | class CFA { |
33 | using bb_ptr = BB*; |
34 | using cbb_ptr = const BB*; |
35 | using bb_iter = typename std::vector<BB*>::const_iterator; |
36 | using get_blocks_func = std::function<const std::vector<BB*>*(const BB*)>; |
37 | |
38 | struct block_info { |
39 | cbb_ptr block; ///< pointer to the block |
40 | bb_iter iter; ///< Iterator to the current child node being processed |
41 | }; |
42 | |
43 | /// Returns true if a block with @p id is found in the @p work_list vector |
44 | /// |
45 | /// @param[in] work_list Set of blocks visited in the depth first |
46 | /// traversal |
47 | /// of the CFG |
48 | /// @param[in] id The ID of the block being checked |
49 | /// |
50 | /// @return true if the edge work_list.back().block->id() => id is a back-edge |
51 | static bool FindInWorkList(const std::vector<block_info>& work_list, |
52 | uint32_t id); |
53 | |
54 | public: |
55 | /// @brief Depth first traversal starting from the \p entry BasicBlock |
56 | /// |
57 | /// This function performs a depth first traversal from the \p entry |
58 | /// BasicBlock and calls the pre/postorder functions when it needs to process |
59 | /// the node in pre order, post order. It also calls the backedge function |
60 | /// when a back edge is encountered. |
61 | /// |
62 | /// @param[in] entry The root BasicBlock of a CFG |
63 | /// @param[in] successor_func A function which will return a pointer to the |
64 | /// successor nodes |
65 | /// @param[in] preorder A function that will be called for every block in a |
66 | /// CFG following preorder traversal semantics |
67 | /// @param[in] postorder A function that will be called for every block in a |
68 | /// CFG following postorder traversal semantics |
69 | /// @param[in] backedge A function that will be called when a backedge is |
70 | /// encountered during a traversal |
71 | /// @param[in] terminal A function that will be called to determine if the |
72 | /// search should stop at the given node. |
73 | /// NOTE: The @p successor_func and predecessor_func each return a pointer to |
74 | /// a |
75 | /// collection such that iterators to that collection remain valid for the |
76 | /// lifetime of the algorithm. |
77 | static void DepthFirstTraversal( |
78 | const BB* entry, get_blocks_func successor_func, |
79 | std::function<void(cbb_ptr)> preorder, |
80 | std::function<void(cbb_ptr)> postorder, |
81 | std::function<void(cbb_ptr, cbb_ptr)> backedge, |
82 | std::function<bool(cbb_ptr)> terminal); |
83 | |
84 | /// @brief Calculates dominator edges for a set of blocks |
85 | /// |
86 | /// Computes dominators using the algorithm of Cooper, Harvey, and Kennedy |
87 | /// "A Simple, Fast Dominance Algorithm", 2001. |
88 | /// |
89 | /// The algorithm assumes there is a unique root node (a node without |
90 | /// predecessors), and it is therefore at the end of the postorder vector. |
91 | /// |
92 | /// This function calculates the dominator edges for a set of blocks in the |
93 | /// CFG. |
94 | /// Uses the dominator algorithm by Cooper et al. |
95 | /// |
96 | /// @param[in] postorder A vector of blocks in post order traversal |
97 | /// order |
98 | /// in a CFG |
99 | /// @param[in] predecessor_func Function used to get the predecessor nodes of |
100 | /// a |
101 | /// block |
102 | /// |
103 | /// @return the dominator tree of the graph, as a vector of pairs of nodes. |
104 | /// The first node in the pair is a node in the graph. The second node in the |
105 | /// pair is its immediate dominator in the sense of Cooper et.al., where a |
106 | /// block |
107 | /// without predecessors (such as the root node) is its own immediate |
108 | /// dominator. |
109 | static std::vector<std::pair<BB*, BB*>> CalculateDominators( |
110 | const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func); |
111 | |
112 | // Computes a minimal set of root nodes required to traverse, in the forward |
113 | // direction, the CFG represented by the given vector of blocks, and successor |
114 | // and predecessor functions. When considering adding two nodes, each having |
115 | // predecessors, favour using the one that appears earlier on the input blocks |
116 | // list. |
117 | static std::vector<BB*> TraversalRoots(const std::vector<BB*>& blocks, |
118 | get_blocks_func succ_func, |
119 | get_blocks_func pred_func); |
120 | |
121 | static void ComputeAugmentedCFG( |
122 | std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block, |
123 | BB* pseudo_exit_block, |
124 | std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map, |
125 | std::unordered_map<const BB*, std::vector<BB*>>* |
126 | augmented_predecessors_map, |
127 | get_blocks_func succ_func, get_blocks_func pred_func); |
128 | }; |
129 | |
130 | template <class BB> |
131 | bool CFA<BB>::FindInWorkList(const std::vector<block_info>& work_list, |
132 | uint32_t id) { |
133 | for (const auto& b : work_list) { |
134 | if (b.block->id() == id) return true; |
135 | } |
136 | return false; |
137 | } |
138 | |
139 | template <class BB> |
140 | void CFA<BB>::DepthFirstTraversal( |
141 | const BB* entry, get_blocks_func successor_func, |
142 | std::function<void(cbb_ptr)> preorder, |
143 | std::function<void(cbb_ptr)> postorder, |
144 | std::function<void(cbb_ptr, cbb_ptr)> backedge, |
145 | std::function<bool(cbb_ptr)> terminal) { |
146 | std::unordered_set<uint32_t> processed; |
147 | |
148 | /// NOTE: work_list is the sequence of nodes from the root node to the node |
149 | /// being processed in the traversal |
150 | std::vector<block_info> work_list; |
151 | work_list.reserve(10); |
152 | |
153 | work_list.push_back({entry, std::begin(*successor_func(entry))}); |
154 | preorder(entry); |
155 | processed.insert(entry->id()); |
156 | |
157 | while (!work_list.empty()) { |
158 | block_info& top = work_list.back(); |
159 | if (terminal(top.block) || top.iter == end(*successor_func(top.block))) { |
160 | postorder(top.block); |
161 | work_list.pop_back(); |
162 | } else { |
163 | BB* child = *top.iter; |
164 | top.iter++; |
165 | if (FindInWorkList(work_list, child->id())) { |
166 | backedge(top.block, child); |
167 | } |
168 | if (processed.count(child->id()) == 0) { |
169 | preorder(child); |
170 | work_list.emplace_back( |
171 | block_info{child, std::begin(*successor_func(child))}); |
172 | processed.insert(child->id()); |
173 | } |
174 | } |
175 | } |
176 | } |
177 | |
178 | template <class BB> |
179 | std::vector<std::pair<BB*, BB*>> CFA<BB>::CalculateDominators( |
180 | const std::vector<cbb_ptr>& postorder, get_blocks_func predecessor_func) { |
181 | struct block_detail { |
182 | size_t dominator; ///< The index of blocks's dominator in post order array |
183 | size_t postorder_index; ///< The index of the block in the post order array |
184 | }; |
185 | const size_t undefined_dom = postorder.size(); |
186 | |
187 | std::unordered_map<cbb_ptr, block_detail> idoms; |
188 | for (size_t i = 0; i < postorder.size(); i++) { |
189 | idoms[postorder[i]] = {undefined_dom, i}; |
190 | } |
191 | idoms[postorder.back()].dominator = idoms[postorder.back()].postorder_index; |
192 | |
193 | bool changed = true; |
194 | while (changed) { |
195 | changed = false; |
196 | for (auto b = postorder.rbegin() + 1; b != postorder.rend(); ++b) { |
197 | const std::vector<BB*>& predecessors = *predecessor_func(*b); |
198 | // Find the first processed/reachable predecessor that is reachable |
199 | // in the forward traversal. |
200 | auto res = std::find_if(std::begin(predecessors), std::end(predecessors), |
201 | [&idoms, undefined_dom](BB* pred) { |
202 | return idoms.count(pred) && |
203 | idoms[pred].dominator != undefined_dom; |
204 | }); |
205 | if (res == end(predecessors)) continue; |
206 | const BB* idom = *res; |
207 | size_t idom_idx = idoms[idom].postorder_index; |
208 | |
209 | // all other predecessors |
210 | for (const auto* p : predecessors) { |
211 | if (idom == p) continue; |
212 | // Only consider nodes reachable in the forward traversal. |
213 | // Otherwise the intersection doesn't make sense and will never |
214 | // terminate. |
215 | if (!idoms.count(p)) continue; |
216 | if (idoms[p].dominator != undefined_dom) { |
217 | size_t finger1 = idoms[p].postorder_index; |
218 | size_t finger2 = idom_idx; |
219 | while (finger1 != finger2) { |
220 | while (finger1 < finger2) { |
221 | finger1 = idoms[postorder[finger1]].dominator; |
222 | } |
223 | while (finger2 < finger1) { |
224 | finger2 = idoms[postorder[finger2]].dominator; |
225 | } |
226 | } |
227 | idom_idx = finger1; |
228 | } |
229 | } |
230 | if (idoms[*b].dominator != idom_idx) { |
231 | idoms[*b].dominator = idom_idx; |
232 | changed = true; |
233 | } |
234 | } |
235 | } |
236 | |
237 | std::vector<std::pair<bb_ptr, bb_ptr>> out; |
238 | for (auto idom : idoms) { |
239 | // NOTE: performing a const cast for convenient usage with |
240 | // UpdateImmediateDominators |
241 | out.push_back({const_cast<BB*>(std::get<0>(idom)), |
242 | const_cast<BB*>(postorder[std::get<1>(idom).dominator])}); |
243 | } |
244 | |
245 | // Sort by postorder index to generate a deterministic ordering of edges. |
246 | std::sort( |
247 | out.begin(), out.end(), |
248 | [&idoms](const std::pair<bb_ptr, bb_ptr>& lhs, |
249 | const std::pair<bb_ptr, bb_ptr>& rhs) { |
250 | assert(lhs.first); |
251 | assert(lhs.second); |
252 | assert(rhs.first); |
253 | assert(rhs.second); |
254 | auto lhs_indices = std::make_pair(idoms[lhs.first].postorder_index, |
255 | idoms[lhs.second].postorder_index); |
256 | auto rhs_indices = std::make_pair(idoms[rhs.first].postorder_index, |
257 | idoms[rhs.second].postorder_index); |
258 | return lhs_indices < rhs_indices; |
259 | }); |
260 | return out; |
261 | } |
262 | |
263 | template <class BB> |
264 | std::vector<BB*> CFA<BB>::TraversalRoots(const std::vector<BB*>& blocks, |
265 | get_blocks_func succ_func, |
266 | get_blocks_func pred_func) { |
267 | // The set of nodes which have been visited from any of the roots so far. |
268 | std::unordered_set<const BB*> visited; |
269 | |
270 | auto mark_visited = [&visited](const BB* b) { visited.insert(b); }; |
271 | auto ignore_block = [](const BB*) {}; |
272 | auto ignore_blocks = [](const BB*, const BB*) {}; |
273 | auto no_terminal_blocks = [](const BB*) { return false; }; |
274 | |
275 | auto traverse_from_root = [&mark_visited, &succ_func, &ignore_block, |
276 | &ignore_blocks, |
277 | &no_terminal_blocks](const BB* entry) { |
278 | DepthFirstTraversal(entry, succ_func, mark_visited, ignore_block, |
279 | ignore_blocks, no_terminal_blocks); |
280 | }; |
281 | |
282 | std::vector<BB*> result; |
283 | |
284 | // First collect nodes without predecessors. |
285 | for (auto block : blocks) { |
286 | if (pred_func(block)->empty()) { |
287 | assert(visited.count(block) == 0 && "Malformed graph!" ); |
288 | result.push_back(block); |
289 | traverse_from_root(block); |
290 | } |
291 | } |
292 | |
293 | // Now collect other stranded nodes. These must be in unreachable cycles. |
294 | for (auto block : blocks) { |
295 | if (visited.count(block) == 0) { |
296 | result.push_back(block); |
297 | traverse_from_root(block); |
298 | } |
299 | } |
300 | |
301 | return result; |
302 | } |
303 | |
304 | template <class BB> |
305 | void CFA<BB>::ComputeAugmentedCFG( |
306 | std::vector<BB*>& ordered_blocks, BB* pseudo_entry_block, |
307 | BB* pseudo_exit_block, |
308 | std::unordered_map<const BB*, std::vector<BB*>>* augmented_successors_map, |
309 | std::unordered_map<const BB*, std::vector<BB*>>* augmented_predecessors_map, |
310 | get_blocks_func succ_func, get_blocks_func pred_func) { |
311 | // Compute the successors of the pseudo-entry block, and |
312 | // the predecessors of the pseudo exit block. |
313 | auto sources = TraversalRoots(ordered_blocks, succ_func, pred_func); |
314 | |
315 | // For the predecessor traversals, reverse the order of blocks. This |
316 | // will affect the post-dominance calculation as follows: |
317 | // - Suppose you have blocks A and B, with A appearing before B in |
318 | // the list of blocks. |
319 | // - Also, A branches only to B, and B branches only to A. |
320 | // - We want to compute A as dominating B, and B as post-dominating B. |
321 | // By using reversed blocks for predecessor traversal roots discovery, |
322 | // we'll add an edge from B to the pseudo-exit node, rather than from A. |
323 | // All this is needed to correctly process the dominance/post-dominance |
324 | // constraint when A is a loop header that points to itself as its |
325 | // own continue target, and B is the latch block for the loop. |
326 | std::vector<BB*> reversed_blocks(ordered_blocks.rbegin(), |
327 | ordered_blocks.rend()); |
328 | auto sinks = TraversalRoots(reversed_blocks, pred_func, succ_func); |
329 | |
330 | // Wire up the pseudo entry block. |
331 | (*augmented_successors_map)[pseudo_entry_block] = sources; |
332 | for (auto block : sources) { |
333 | auto& augmented_preds = (*augmented_predecessors_map)[block]; |
334 | const auto preds = pred_func(block); |
335 | augmented_preds.reserve(1 + preds->size()); |
336 | augmented_preds.push_back(pseudo_entry_block); |
337 | augmented_preds.insert(augmented_preds.end(), preds->begin(), preds->end()); |
338 | } |
339 | |
340 | // Wire up the pseudo exit block. |
341 | (*augmented_predecessors_map)[pseudo_exit_block] = sinks; |
342 | for (auto block : sinks) { |
343 | auto& augmented_succ = (*augmented_successors_map)[block]; |
344 | const auto succ = succ_func(block); |
345 | augmented_succ.reserve(1 + succ->size()); |
346 | augmented_succ.push_back(pseudo_exit_block); |
347 | augmented_succ.insert(augmented_succ.end(), succ->begin(), succ->end()); |
348 | } |
349 | } |
350 | |
351 | } // namespace spvtools |
352 | |
353 | #endif // SOURCE_CFA_H_ |
354 | |