1 | //===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===// |
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | /// \file |
10 | /// |
11 | /// This header provides classes for managing passes over SCCs of the call |
12 | /// graph. These passes form an important component of LLVM's interprocedural |
13 | /// optimizations. Because they operate on the SCCs of the call graph, and they |
14 | /// traverse the graph in post-order, they can effectively do pair-wise |
15 | /// interprocedural optimizations for all call edges in the program while |
16 | /// incrementally refining it and improving the context of these pair-wise |
17 | /// optimizations. At each call site edge, the callee has already been |
18 | /// optimized as much as is possible. This in turn allows very accurate |
19 | /// analysis of it for IPO. |
20 | /// |
21 | /// A secondary more general goal is to be able to isolate optimization on |
22 | /// unrelated parts of the IR module. This is useful to ensure our |
23 | /// optimizations are principled and don't miss oportunities where refinement |
24 | /// of one part of the module influence transformations in another part of the |
25 | /// module. But this is also useful if we want to parallelize the optimizations |
26 | /// across common large module graph shapes which tend to be very wide and have |
27 | /// large regions of unrelated cliques. |
28 | /// |
29 | /// To satisfy these goals, we use the LazyCallGraph which provides two graphs |
30 | /// nested inside each other (and built lazily from the bottom-up): the call |
31 | /// graph proper, and a reference graph. The reference graph is super set of |
32 | /// the call graph and is a conservative approximation of what could through |
33 | /// scalar or CGSCC transforms *become* the call graph. Using this allows us to |
34 | /// ensure we optimize functions prior to them being introduced into the call |
35 | /// graph by devirtualization or other technique, and thus ensures that |
36 | /// subsequent pair-wise interprocedural optimizations observe the optimized |
37 | /// form of these functions. The (potentially transitive) reference |
38 | /// reachability used by the reference graph is a conservative approximation |
39 | /// that still allows us to have independent regions of the graph. |
40 | /// |
41 | /// FIXME: There is one major drawback of the reference graph: in its naive |
42 | /// form it is quadratic because it contains a distinct edge for each |
43 | /// (potentially indirect) reference, even if are all through some common |
44 | /// global table of function pointers. This can be fixed in a number of ways |
45 | /// that essentially preserve enough of the normalization. While it isn't |
46 | /// expected to completely preclude the usability of this, it will need to be |
47 | /// addressed. |
48 | /// |
49 | /// |
50 | /// All of these issues are made substantially more complex in the face of |
51 | /// mutations to the call graph while optimization passes are being run. When |
52 | /// mutations to the call graph occur we want to achieve two different things: |
53 | /// |
54 | /// - We need to update the call graph in-flight and invalidate analyses |
55 | /// cached on entities in the graph. Because of the cache-based analysis |
56 | /// design of the pass manager, it is essential to have stable identities for |
57 | /// the elements of the IR that passes traverse, and to invalidate any |
58 | /// analyses cached on these elements as the mutations take place. |
59 | /// |
60 | /// - We want to preserve the incremental and post-order traversal of the |
61 | /// graph even as it is refined and mutated. This means we want optimization |
62 | /// to observe the most refined form of the call graph and to do so in |
63 | /// post-order. |
64 | /// |
65 | /// To address this, the CGSCC manager uses both worklists that can be expanded |
66 | /// by passes which transform the IR, and provides invalidation tests to skip |
67 | /// entries that become dead. This extra data is provided to every SCC pass so |
68 | /// that it can carefully update the manager's traversal as the call graph |
69 | /// mutates. |
70 | /// |
71 | /// We also provide support for running function passes within the CGSCC walk, |
72 | /// and there we provide automatic update of the call graph including of the |
73 | /// pass manager to reflect call graph changes that fall out naturally as part |
74 | /// of scalar transformations. |
75 | /// |
76 | /// The patterns used to ensure the goals of post-order visitation of the fully |
77 | /// refined graph: |
78 | /// |
79 | /// 1) Sink toward the "bottom" as the graph is refined. This means that any |
80 | /// iteration continues in some valid post-order sequence after the mutation |
81 | /// has altered the structure. |
82 | /// |
83 | /// 2) Enqueue in post-order, including the current entity. If the current |
84 | /// entity's shape changes, it and everything after it in post-order needs |
85 | /// to be visited to observe that shape. |
86 | /// |
87 | //===----------------------------------------------------------------------===// |
88 | |
89 | #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H |
90 | #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H |
91 | |
92 | #include "llvm/ADT/DenseSet.h" |
93 | #include "llvm/ADT/PriorityWorklist.h" |
94 | #include "llvm/ADT/STLExtras.h" |
95 | #include "llvm/ADT/SmallPtrSet.h" |
96 | #include "llvm/ADT/SmallVector.h" |
97 | #include "llvm/Analysis/LazyCallGraph.h" |
98 | #include "llvm/IR/CallSite.h" |
99 | #include "llvm/IR/Function.h" |
100 | #include "llvm/IR/InstIterator.h" |
101 | #include "llvm/IR/PassManager.h" |
102 | #include "llvm/IR/ValueHandle.h" |
103 | #include "llvm/Support/Debug.h" |
104 | #include "llvm/Support/raw_ostream.h" |
105 | #include <algorithm> |
106 | #include <cassert> |
107 | #include <utility> |
108 | |
109 | namespace llvm { |
110 | |
111 | struct CGSCCUpdateResult; |
112 | class Module; |
113 | |
114 | // Allow debug logging in this inline function. |
115 | #define DEBUG_TYPE "cgscc" |
116 | |
117 | /// Extern template declaration for the analysis set for this IR unit. |
118 | extern template class AllAnalysesOn<LazyCallGraph::SCC>; |
119 | |
120 | extern template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; |
121 | |
122 | /// The CGSCC analysis manager. |
123 | /// |
124 | /// See the documentation for the AnalysisManager template for detail |
125 | /// documentation. This type serves as a convenient way to refer to this |
126 | /// construct in the adaptors and proxies used to integrate this into the larger |
127 | /// pass manager infrastructure. |
128 | using CGSCCAnalysisManager = |
129 | AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>; |
130 | |
131 | // Explicit specialization and instantiation declarations for the pass manager. |
132 | // See the comments on the definition of the specialization for details on how |
133 | // it differs from the primary template. |
134 | template <> |
135 | PreservedAnalyses |
136 | PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, |
137 | CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC, |
138 | CGSCCAnalysisManager &AM, |
139 | LazyCallGraph &G, CGSCCUpdateResult &UR); |
140 | extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, |
141 | LazyCallGraph &, CGSCCUpdateResult &>; |
142 | |
143 | /// The CGSCC pass manager. |
144 | /// |
145 | /// See the documentation for the PassManager template for details. It runs |
146 | /// a sequence of SCC passes over each SCC that the manager is run over. This |
147 | /// type serves as a convenient way to refer to this construct. |
148 | using CGSCCPassManager = |
149 | PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, |
150 | CGSCCUpdateResult &>; |
151 | |
152 | /// An explicit specialization of the require analysis template pass. |
153 | template <typename AnalysisT> |
154 | struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager, |
155 | LazyCallGraph &, CGSCCUpdateResult &> |
156 | : PassInfoMixin<RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, |
157 | CGSCCAnalysisManager, LazyCallGraph &, |
158 | CGSCCUpdateResult &>> { |
159 | PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, |
160 | LazyCallGraph &CG, CGSCCUpdateResult &) { |
161 | (void)AM.template getResult<AnalysisT>(C, CG); |
162 | return PreservedAnalyses::all(); |
163 | } |
164 | }; |
165 | |
166 | /// A proxy from a \c CGSCCAnalysisManager to a \c Module. |
167 | using CGSCCAnalysisManagerModuleProxy = |
168 | InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; |
169 | |
170 | /// We need a specialized result for the \c CGSCCAnalysisManagerModuleProxy so |
171 | /// it can have access to the call graph in order to walk all the SCCs when |
172 | /// invalidating things. |
173 | template <> class CGSCCAnalysisManagerModuleProxy::Result { |
174 | public: |
175 | explicit Result(CGSCCAnalysisManager &InnerAM, LazyCallGraph &G) |
176 | : InnerAM(&InnerAM), G(&G) {} |
177 | |
178 | /// Accessor for the analysis manager. |
179 | CGSCCAnalysisManager &getManager() { return *InnerAM; } |
180 | |
181 | /// Handler for invalidation of the Module. |
182 | /// |
183 | /// If the proxy analysis itself is preserved, then we assume that the set of |
184 | /// SCCs in the Module hasn't changed. Thus any pointers to SCCs in the |
185 | /// CGSCCAnalysisManager are still valid, and we don't need to call \c clear |
186 | /// on the CGSCCAnalysisManager. |
187 | /// |
188 | /// Regardless of whether this analysis is marked as preserved, all of the |
189 | /// analyses in the \c CGSCCAnalysisManager are potentially invalidated based |
190 | /// on the set of preserved analyses. |
191 | bool invalidate(Module &M, const PreservedAnalyses &PA, |
192 | ModuleAnalysisManager::Invalidator &Inv); |
193 | |
194 | private: |
195 | CGSCCAnalysisManager *InnerAM; |
196 | LazyCallGraph *G; |
197 | }; |
198 | |
199 | /// Provide a specialized run method for the \c CGSCCAnalysisManagerModuleProxy |
200 | /// so it can pass the lazy call graph to the result. |
201 | template <> |
202 | CGSCCAnalysisManagerModuleProxy::Result |
203 | CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM); |
204 | |
205 | // Ensure the \c CGSCCAnalysisManagerModuleProxy is provided as an extern |
206 | // template. |
207 | extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>; |
208 | |
209 | extern template class OuterAnalysisManagerProxy< |
210 | ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>; |
211 | |
212 | /// A proxy from a \c ModuleAnalysisManager to an \c SCC. |
213 | using ModuleAnalysisManagerCGSCCProxy = |
214 | OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC, |
215 | LazyCallGraph &>; |
216 | |
217 | /// Support structure for SCC passes to communicate updates the call graph back |
218 | /// to the CGSCC pass manager infrsatructure. |
219 | /// |
220 | /// The CGSCC pass manager runs SCC passes which are allowed to update the call |
221 | /// graph and SCC structures. This means the structure the pass manager works |
222 | /// on is mutating underneath it. In order to support that, there needs to be |
223 | /// careful communication about the precise nature and ramifications of these |
224 | /// updates to the pass management infrastructure. |
225 | /// |
226 | /// All SCC passes will have to accept a reference to the management layer's |
227 | /// update result struct and use it to reflect the results of any CG updates |
228 | /// performed. |
229 | /// |
230 | /// Passes which do not change the call graph structure in any way can just |
231 | /// ignore this argument to their run method. |
232 | struct CGSCCUpdateResult { |
233 | /// Worklist of the RefSCCs queued for processing. |
234 | /// |
235 | /// When a pass refines the graph and creates new RefSCCs or causes them to |
236 | /// have a different shape or set of component SCCs it should add the RefSCCs |
237 | /// to this worklist so that we visit them in the refined form. |
238 | /// |
239 | /// This worklist is in reverse post-order, as we pop off the back in order |
240 | /// to observe RefSCCs in post-order. When adding RefSCCs, clients should add |
241 | /// them in reverse post-order. |
242 | SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> &RCWorklist; |
243 | |
244 | /// Worklist of the SCCs queued for processing. |
245 | /// |
246 | /// When a pass refines the graph and creates new SCCs or causes them to have |
247 | /// a different shape or set of component functions it should add the SCCs to |
248 | /// this worklist so that we visit them in the refined form. |
249 | /// |
250 | /// Note that if the SCCs are part of a RefSCC that is added to the \c |
251 | /// RCWorklist, they don't need to be added here as visiting the RefSCC will |
252 | /// be sufficient to re-visit the SCCs within it. |
253 | /// |
254 | /// This worklist is in reverse post-order, as we pop off the back in order |
255 | /// to observe SCCs in post-order. When adding SCCs, clients should add them |
256 | /// in reverse post-order. |
257 | SmallPriorityWorklist<LazyCallGraph::SCC *, 1> &CWorklist; |
258 | |
259 | /// The set of invalidated RefSCCs which should be skipped if they are found |
260 | /// in \c RCWorklist. |
261 | /// |
262 | /// This is used to quickly prune out RefSCCs when they get deleted and |
263 | /// happen to already be on the worklist. We use this primarily to avoid |
264 | /// scanning the list and removing entries from it. |
265 | SmallPtrSetImpl<LazyCallGraph::RefSCC *> &InvalidatedRefSCCs; |
266 | |
267 | /// The set of invalidated SCCs which should be skipped if they are found |
268 | /// in \c CWorklist. |
269 | /// |
270 | /// This is used to quickly prune out SCCs when they get deleted and happen |
271 | /// to already be on the worklist. We use this primarily to avoid scanning |
272 | /// the list and removing entries from it. |
273 | SmallPtrSetImpl<LazyCallGraph::SCC *> &InvalidatedSCCs; |
274 | |
275 | /// If non-null, the updated current \c RefSCC being processed. |
276 | /// |
277 | /// This is set when a graph refinement takes place an the "current" point in |
278 | /// the graph moves "down" or earlier in the post-order walk. This will often |
279 | /// cause the "current" RefSCC to be a newly created RefSCC object and the |
280 | /// old one to be added to the above worklist. When that happens, this |
281 | /// pointer is non-null and can be used to continue processing the "top" of |
282 | /// the post-order walk. |
283 | LazyCallGraph::RefSCC *UpdatedRC; |
284 | |
285 | /// If non-null, the updated current \c SCC being processed. |
286 | /// |
287 | /// This is set when a graph refinement takes place an the "current" point in |
288 | /// the graph moves "down" or earlier in the post-order walk. This will often |
289 | /// cause the "current" SCC to be a newly created SCC object and the old one |
290 | /// to be added to the above worklist. When that happens, this pointer is |
291 | /// non-null and can be used to continue processing the "top" of the |
292 | /// post-order walk. |
293 | LazyCallGraph::SCC *UpdatedC; |
294 | |
295 | /// A hacky area where the inliner can retain history about inlining |
296 | /// decisions that mutated the call graph's SCC structure in order to avoid |
297 | /// infinite inlining. See the comments in the inliner's CG update logic. |
298 | /// |
299 | /// FIXME: Keeping this here seems like a big layering issue, we should look |
300 | /// for a better technique. |
301 | SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> |
302 | &InlinedInternalEdges; |
303 | }; |
304 | |
305 | /// The core module pass which does a post-order walk of the SCCs and |
306 | /// runs a CGSCC pass over each one. |
307 | /// |
308 | /// Designed to allow composition of a CGSCCPass(Manager) and |
309 | /// a ModulePassManager. Note that this pass must be run with a module analysis |
310 | /// manager as it uses the LazyCallGraph analysis. It will also run the |
311 | /// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC |
312 | /// pass over the module to enable a \c FunctionAnalysisManager to be used |
313 | /// within this run safely. |
314 | template <typename CGSCCPassT> |
315 | class ModuleToPostOrderCGSCCPassAdaptor |
316 | : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> { |
317 | public: |
318 | explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) |
319 | : Pass(std::move(Pass)) {} |
320 | |
321 | // We have to explicitly define all the special member functions because MSVC |
322 | // refuses to generate them. |
323 | ModuleToPostOrderCGSCCPassAdaptor( |
324 | const ModuleToPostOrderCGSCCPassAdaptor &Arg) |
325 | : Pass(Arg.Pass) {} |
326 | |
327 | ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) |
328 | : Pass(std::move(Arg.Pass)) {} |
329 | |
330 | friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS, |
331 | ModuleToPostOrderCGSCCPassAdaptor &RHS) { |
332 | std::swap(LHS.Pass, RHS.Pass); |
333 | } |
334 | |
335 | ModuleToPostOrderCGSCCPassAdaptor & |
336 | operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) { |
337 | swap(*this, RHS); |
338 | return *this; |
339 | } |
340 | |
341 | /// Runs the CGSCC pass across every SCC in the module. |
342 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { |
343 | // Setup the CGSCC analysis manager from its proxy. |
344 | CGSCCAnalysisManager &CGAM = |
345 | AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); |
346 | |
347 | // Get the call graph for this module. |
348 | LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); |
349 | |
350 | // We keep worklists to allow us to push more work onto the pass manager as |
351 | // the passes are run. |
352 | SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; |
353 | SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; |
354 | |
355 | // Keep sets for invalidated SCCs and RefSCCs that should be skipped when |
356 | // iterating off the worklists. |
357 | SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; |
358 | SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; |
359 | |
360 | SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> |
361 | InlinedInternalEdges; |
362 | |
363 | CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet, |
364 | InvalidSCCSet, nullptr, nullptr, |
365 | InlinedInternalEdges}; |
366 | |
367 | // Request PassInstrumentation from analysis manager, will use it to run |
368 | // instrumenting callbacks for the passes later. |
369 | PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); |
370 | |
371 | PreservedAnalyses PA = PreservedAnalyses::all(); |
372 | CG.buildRefSCCs(); |
373 | for (auto RCI = CG.postorder_ref_scc_begin(), |
374 | RCE = CG.postorder_ref_scc_end(); |
375 | RCI != RCE;) { |
376 | assert(RCWorklist.empty() && |
377 | "Should always start with an empty RefSCC worklist" ); |
378 | // The postorder_ref_sccs range we are walking is lazily constructed, so |
379 | // we only push the first one onto the worklist. The worklist allows us |
380 | // to capture *new* RefSCCs created during transformations. |
381 | // |
382 | // We really want to form RefSCCs lazily because that makes them cheaper |
383 | // to update as the program is simplified and allows us to have greater |
384 | // cache locality as forming a RefSCC touches all the parts of all the |
385 | // functions within that RefSCC. |
386 | // |
387 | // We also eagerly increment the iterator to the next position because |
388 | // the CGSCC passes below may delete the current RefSCC. |
389 | RCWorklist.insert(&*RCI++); |
390 | |
391 | do { |
392 | LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); |
393 | if (InvalidRefSCCSet.count(RC)) { |
394 | LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n" ); |
395 | continue; |
396 | } |
397 | |
398 | assert(CWorklist.empty() && |
399 | "Should always start with an empty SCC worklist" ); |
400 | |
401 | LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC |
402 | << "\n" ); |
403 | |
404 | // Push the initial SCCs in reverse post-order as we'll pop off the |
405 | // back and so see this in post-order. |
406 | for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) |
407 | CWorklist.insert(&C); |
408 | |
409 | do { |
410 | LazyCallGraph::SCC *C = CWorklist.pop_back_val(); |
411 | // Due to call graph mutations, we may have invalid SCCs or SCCs from |
412 | // other RefSCCs in the worklist. The invalid ones are dead and the |
413 | // other RefSCCs should be queued above, so we just need to skip both |
414 | // scenarios here. |
415 | if (InvalidSCCSet.count(C)) { |
416 | LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n" ); |
417 | continue; |
418 | } |
419 | if (&C->getOuterRefSCC() != RC) { |
420 | LLVM_DEBUG(dbgs() |
421 | << "Skipping an SCC that is now part of some other " |
422 | "RefSCC...\n" ); |
423 | continue; |
424 | } |
425 | |
426 | do { |
427 | // Check that we didn't miss any update scenario. |
428 | assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!" ); |
429 | assert(C->begin() != C->end() && "Cannot have an empty SCC!" ); |
430 | assert(&C->getOuterRefSCC() == RC && |
431 | "Processing an SCC in a different RefSCC!" ); |
432 | |
433 | UR.UpdatedRC = nullptr; |
434 | UR.UpdatedC = nullptr; |
435 | |
436 | // Check the PassInstrumentation's BeforePass callbacks before |
437 | // running the pass, skip its execution completely if asked to |
438 | // (callback returns false). |
439 | if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) |
440 | continue; |
441 | |
442 | PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR); |
443 | |
444 | if (UR.InvalidatedSCCs.count(C)) |
445 | PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); |
446 | else |
447 | PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); |
448 | |
449 | // Update the SCC and RefSCC if necessary. |
450 | C = UR.UpdatedC ? UR.UpdatedC : C; |
451 | RC = UR.UpdatedRC ? UR.UpdatedRC : RC; |
452 | |
453 | // If the CGSCC pass wasn't able to provide a valid updated SCC, |
454 | // the current SCC may simply need to be skipped if invalid. |
455 | if (UR.InvalidatedSCCs.count(C)) { |
456 | LLVM_DEBUG(dbgs() |
457 | << "Skipping invalidated root or island SCC!\n" ); |
458 | break; |
459 | } |
460 | // Check that we didn't miss any update scenario. |
461 | assert(C->begin() != C->end() && "Cannot have an empty SCC!" ); |
462 | |
463 | // We handle invalidating the CGSCC analysis manager's information |
464 | // for the (potentially updated) SCC here. Note that any other SCCs |
465 | // whose structure has changed should have been invalidated by |
466 | // whatever was updating the call graph. This SCC gets invalidated |
467 | // late as it contains the nodes that were actively being |
468 | // processed. |
469 | CGAM.invalidate(*C, PassPA); |
470 | |
471 | // Then intersect the preserved set so that invalidation of module |
472 | // analyses will eventually occur when the module pass completes. |
473 | PA.intersect(std::move(PassPA)); |
474 | |
475 | // The pass may have restructured the call graph and refined the |
476 | // current SCC and/or RefSCC. We need to update our current SCC and |
477 | // RefSCC pointers to follow these. Also, when the current SCC is |
478 | // refined, re-run the SCC pass over the newly refined SCC in order |
479 | // to observe the most precise SCC model available. This inherently |
480 | // cannot cycle excessively as it only happens when we split SCCs |
481 | // apart, at most converging on a DAG of single nodes. |
482 | // FIXME: If we ever start having RefSCC passes, we'll want to |
483 | // iterate there too. |
484 | if (UR.UpdatedC) |
485 | LLVM_DEBUG(dbgs() |
486 | << "Re-running SCC passes after a refinement of the " |
487 | "current SCC: " |
488 | << *UR.UpdatedC << "\n" ); |
489 | |
490 | // Note that both `C` and `RC` may at this point refer to deleted, |
491 | // invalid SCC and RefSCCs respectively. But we will short circuit |
492 | // the processing when we check them in the loop above. |
493 | } while (UR.UpdatedC); |
494 | } while (!CWorklist.empty()); |
495 | |
496 | // We only need to keep internal inlined edge information within |
497 | // a RefSCC, clear it to save on space and let the next time we visit |
498 | // any of these functions have a fresh start. |
499 | InlinedInternalEdges.clear(); |
500 | } while (!RCWorklist.empty()); |
501 | } |
502 | |
503 | // By definition we preserve the call garph, all SCC analyses, and the |
504 | // analysis proxies by handling them above and in any nested pass managers. |
505 | PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); |
506 | PA.preserve<LazyCallGraphAnalysis>(); |
507 | PA.preserve<CGSCCAnalysisManagerModuleProxy>(); |
508 | PA.preserve<FunctionAnalysisManagerModuleProxy>(); |
509 | return PA; |
510 | } |
511 | |
512 | private: |
513 | CGSCCPassT Pass; |
514 | }; |
515 | |
516 | /// A function to deduce a function pass type and wrap it in the |
517 | /// templated adaptor. |
518 | template <typename CGSCCPassT> |
519 | ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> |
520 | createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { |
521 | return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass)); |
522 | } |
523 | |
524 | /// A proxy from a \c FunctionAnalysisManager to an \c SCC. |
525 | /// |
526 | /// When a module pass runs and triggers invalidation, both the CGSCC and |
527 | /// Function analysis manager proxies on the module get an invalidation event. |
528 | /// We don't want to fully duplicate responsibility for most of the |
529 | /// invalidation logic. Instead, this layer is only responsible for SCC-local |
530 | /// invalidation events. We work with the module's FunctionAnalysisManager to |
531 | /// invalidate function analyses. |
532 | class FunctionAnalysisManagerCGSCCProxy |
533 | : public AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy> { |
534 | public: |
535 | class Result { |
536 | public: |
537 | explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {} |
538 | |
539 | /// Accessor for the analysis manager. |
540 | FunctionAnalysisManager &getManager() { return *FAM; } |
541 | |
542 | bool invalidate(LazyCallGraph::SCC &C, const PreservedAnalyses &PA, |
543 | CGSCCAnalysisManager::Invalidator &Inv); |
544 | |
545 | private: |
546 | FunctionAnalysisManager *FAM; |
547 | }; |
548 | |
549 | /// Computes the \c FunctionAnalysisManager and stores it in the result proxy. |
550 | Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &); |
551 | |
552 | private: |
553 | friend AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy>; |
554 | |
555 | static AnalysisKey Key; |
556 | }; |
557 | |
558 | extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; |
559 | |
560 | /// A proxy from a \c CGSCCAnalysisManager to a \c Function. |
561 | using CGSCCAnalysisManagerFunctionProxy = |
562 | OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>; |
563 | |
564 | /// Helper to update the call graph after running a function pass. |
565 | /// |
566 | /// Function passes can only mutate the call graph in specific ways. This |
567 | /// routine provides a helper that updates the call graph in those ways |
568 | /// including returning whether any changes were made and populating a CG |
569 | /// update result struct for the overall CGSCC walk. |
570 | LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass( |
571 | LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, |
572 | CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR); |
573 | |
574 | /// Adaptor that maps from a SCC to its functions. |
575 | /// |
576 | /// Designed to allow composition of a FunctionPass(Manager) and |
577 | /// a CGSCCPassManager. Note that if this pass is constructed with a pointer |
578 | /// to a \c CGSCCAnalysisManager it will run the |
579 | /// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function |
580 | /// pass over the SCC to enable a \c FunctionAnalysisManager to be used |
581 | /// within this run safely. |
582 | template <typename FunctionPassT> |
583 | class CGSCCToFunctionPassAdaptor |
584 | : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> { |
585 | public: |
586 | explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) |
587 | : Pass(std::move(Pass)) {} |
588 | |
589 | // We have to explicitly define all the special member functions because MSVC |
590 | // refuses to generate them. |
591 | CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) |
592 | : Pass(Arg.Pass) {} |
593 | |
594 | CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) |
595 | : Pass(std::move(Arg.Pass)) {} |
596 | |
597 | friend void swap(CGSCCToFunctionPassAdaptor &LHS, |
598 | CGSCCToFunctionPassAdaptor &RHS) { |
599 | std::swap(LHS.Pass, RHS.Pass); |
600 | } |
601 | |
602 | CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) { |
603 | swap(*this, RHS); |
604 | return *this; |
605 | } |
606 | |
607 | /// Runs the function pass across every function in the module. |
608 | PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, |
609 | LazyCallGraph &CG, CGSCCUpdateResult &UR) { |
610 | // Setup the function analysis manager from its proxy. |
611 | FunctionAnalysisManager &FAM = |
612 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); |
613 | |
614 | SmallVector<LazyCallGraph::Node *, 4> Nodes; |
615 | for (LazyCallGraph::Node &N : C) |
616 | Nodes.push_back(&N); |
617 | |
618 | // The SCC may get split while we are optimizing functions due to deleting |
619 | // edges. If this happens, the current SCC can shift, so keep track of |
620 | // a pointer we can overwrite. |
621 | LazyCallGraph::SCC *CurrentC = &C; |
622 | |
623 | LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C |
624 | << "\n" ); |
625 | |
626 | PreservedAnalyses PA = PreservedAnalyses::all(); |
627 | for (LazyCallGraph::Node *N : Nodes) { |
628 | // Skip nodes from other SCCs. These may have been split out during |
629 | // processing. We'll eventually visit those SCCs and pick up the nodes |
630 | // there. |
631 | if (CG.lookupSCC(*N) != CurrentC) |
632 | continue; |
633 | |
634 | Function &F = N->getFunction(); |
635 | |
636 | PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); |
637 | if (!PI.runBeforePass<Function>(Pass, F)) |
638 | continue; |
639 | |
640 | PreservedAnalyses PassPA = Pass.run(F, FAM); |
641 | |
642 | PI.runAfterPass<Function>(Pass, F); |
643 | |
644 | // We know that the function pass couldn't have invalidated any other |
645 | // function's analyses (that's the contract of a function pass), so |
646 | // directly handle the function analysis manager's invalidation here. |
647 | FAM.invalidate(F, PassPA); |
648 | |
649 | // Then intersect the preserved set so that invalidation of module |
650 | // analyses will eventually occur when the module pass completes. |
651 | PA.intersect(std::move(PassPA)); |
652 | |
653 | // If the call graph hasn't been preserved, update it based on this |
654 | // function pass. This may also update the current SCC to point to |
655 | // a smaller, more refined SCC. |
656 | auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); |
657 | if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { |
658 | CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, |
659 | AM, UR); |
660 | assert( |
661 | CG.lookupSCC(*N) == CurrentC && |
662 | "Current SCC not updated to the SCC containing the current node!" ); |
663 | } |
664 | } |
665 | |
666 | // By definition we preserve the proxy. And we preserve all analyses on |
667 | // Functions. This precludes *any* invalidation of function analyses by the |
668 | // proxy, but that's OK because we've taken care to invalidate analyses in |
669 | // the function analysis manager incrementally above. |
670 | PA.preserveSet<AllAnalysesOn<Function>>(); |
671 | PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); |
672 | |
673 | // We've also ensured that we updated the call graph along the way. |
674 | PA.preserve<LazyCallGraphAnalysis>(); |
675 | |
676 | return PA; |
677 | } |
678 | |
679 | private: |
680 | FunctionPassT Pass; |
681 | }; |
682 | |
683 | /// A function to deduce a function pass type and wrap it in the |
684 | /// templated adaptor. |
685 | template <typename FunctionPassT> |
686 | CGSCCToFunctionPassAdaptor<FunctionPassT> |
687 | createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { |
688 | return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); |
689 | } |
690 | |
691 | /// A helper that repeats an SCC pass each time an indirect call is refined to |
692 | /// a direct call by that pass. |
693 | /// |
694 | /// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they |
695 | /// change shape, we may also want to repeat an SCC pass if it simply refines |
696 | /// an indirect call to a direct call, even if doing so does not alter the |
697 | /// shape of the graph. Note that this only pertains to direct calls to |
698 | /// functions where IPO across the SCC may be able to compute more precise |
699 | /// results. For intrinsics, we assume scalar optimizations already can fully |
700 | /// reason about them. |
701 | /// |
702 | /// This repetition has the potential to be very large however, as each one |
703 | /// might refine a single call site. As a consequence, in practice we use an |
704 | /// upper bound on the number of repetitions to limit things. |
705 | template <typename PassT> |
706 | class DevirtSCCRepeatedPass |
707 | : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> { |
708 | public: |
709 | explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations) |
710 | : Pass(std::move(Pass)), MaxIterations(MaxIterations) {} |
711 | |
712 | /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating |
713 | /// whenever an indirect call is refined. |
714 | PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, |
715 | LazyCallGraph &CG, CGSCCUpdateResult &UR) { |
716 | PreservedAnalyses PA = PreservedAnalyses::all(); |
717 | PassInstrumentation PI = |
718 | AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); |
719 | |
720 | // The SCC may be refined while we are running passes over it, so set up |
721 | // a pointer that we can update. |
722 | LazyCallGraph::SCC *C = &InitialC; |
723 | |
724 | // Collect value handles for all of the indirect call sites. |
725 | SmallVector<WeakTrackingVH, 8> CallHandles; |
726 | |
727 | // Struct to track the counts of direct and indirect calls in each function |
728 | // of the SCC. |
729 | struct CallCount { |
730 | int Direct; |
731 | int Indirect; |
732 | }; |
733 | |
734 | // Put value handles on all of the indirect calls and return the number of |
735 | // direct calls for each function in the SCC. |
736 | auto ScanSCC = [](LazyCallGraph::SCC &C, |
737 | SmallVectorImpl<WeakTrackingVH> &CallHandles) { |
738 | assert(CallHandles.empty() && "Must start with a clear set of handles." ); |
739 | |
740 | SmallVector<CallCount, 4> CallCounts; |
741 | for (LazyCallGraph::Node &N : C) { |
742 | CallCounts.push_back({0, 0}); |
743 | CallCount &Count = CallCounts.back(); |
744 | for (Instruction &I : instructions(N.getFunction())) |
745 | if (auto CS = CallSite(&I)) { |
746 | if (CS.getCalledFunction()) { |
747 | ++Count.Direct; |
748 | } else { |
749 | ++Count.Indirect; |
750 | CallHandles.push_back(WeakTrackingVH(&I)); |
751 | } |
752 | } |
753 | } |
754 | |
755 | return CallCounts; |
756 | }; |
757 | |
758 | // Populate the initial call handles and get the initial call counts. |
759 | auto CallCounts = ScanSCC(*C, CallHandles); |
760 | |
761 | for (int Iteration = 0;; ++Iteration) { |
762 | |
763 | if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) |
764 | continue; |
765 | |
766 | PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR); |
767 | |
768 | if (UR.InvalidatedSCCs.count(C)) |
769 | PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); |
770 | else |
771 | PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); |
772 | |
773 | // If the SCC structure has changed, bail immediately and let the outer |
774 | // CGSCC layer handle any iteration to reflect the refined structure. |
775 | if (UR.UpdatedC && UR.UpdatedC != C) { |
776 | PA.intersect(std::move(PassPA)); |
777 | break; |
778 | } |
779 | |
780 | // Check that we didn't miss any update scenario. |
781 | assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!" ); |
782 | assert(C->begin() != C->end() && "Cannot have an empty SCC!" ); |
783 | assert((int)CallCounts.size() == C->size() && |
784 | "Cannot have changed the size of the SCC!" ); |
785 | |
786 | // Check whether any of the handles were devirtualized. |
787 | auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { |
788 | if (!CallH) |
789 | return false; |
790 | auto CS = CallSite(CallH); |
791 | if (!CS) |
792 | return false; |
793 | |
794 | // If the call is still indirect, leave it alone. |
795 | Function *F = CS.getCalledFunction(); |
796 | if (!F) |
797 | return false; |
798 | |
799 | LLVM_DEBUG(dbgs() << "Found devirutalized call from " |
800 | << CS.getParent()->getParent()->getName() << " to " |
801 | << F->getName() << "\n" ); |
802 | |
803 | // We now have a direct call where previously we had an indirect call, |
804 | // so iterate to process this devirtualization site. |
805 | return true; |
806 | }; |
807 | bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); |
808 | |
809 | // Rescan to build up a new set of handles and count how many direct |
810 | // calls remain. If we decide to iterate, this also sets up the input to |
811 | // the next iteration. |
812 | CallHandles.clear(); |
813 | auto NewCallCounts = ScanSCC(*C, CallHandles); |
814 | |
815 | // If we haven't found an explicit devirtualization already see if we |
816 | // have decreased the number of indirect calls and increased the number |
817 | // of direct calls for any function in the SCC. This can be fooled by all |
818 | // manner of transformations such as DCE and other things, but seems to |
819 | // work well in practice. |
820 | if (!Devirt) |
821 | for (int i = 0, Size = C->size(); i < Size; ++i) |
822 | if (CallCounts[i].Indirect > NewCallCounts[i].Indirect && |
823 | CallCounts[i].Direct < NewCallCounts[i].Direct) { |
824 | Devirt = true; |
825 | break; |
826 | } |
827 | |
828 | if (!Devirt) { |
829 | PA.intersect(std::move(PassPA)); |
830 | break; |
831 | } |
832 | |
833 | // Otherwise, if we've already hit our max, we're done. |
834 | if (Iteration >= MaxIterations) { |
835 | LLVM_DEBUG( |
836 | dbgs() << "Found another devirtualization after hitting the max " |
837 | "number of repetitions (" |
838 | << MaxIterations << ") on SCC: " << *C << "\n" ); |
839 | PA.intersect(std::move(PassPA)); |
840 | break; |
841 | } |
842 | |
843 | LLVM_DEBUG( |
844 | dbgs() |
845 | << "Repeating an SCC pass after finding a devirtualization in: " << *C |
846 | << "\n" ); |
847 | |
848 | // Move over the new call counts in preparation for iterating. |
849 | CallCounts = std::move(NewCallCounts); |
850 | |
851 | // Update the analysis manager with each run and intersect the total set |
852 | // of preserved analyses so we're ready to iterate. |
853 | AM.invalidate(*C, PassPA); |
854 | PA.intersect(std::move(PassPA)); |
855 | } |
856 | |
857 | // Note that we don't add any preserved entries here unlike a more normal |
858 | // "pass manager" because we only handle invalidation *between* iterations, |
859 | // not after the last iteration. |
860 | return PA; |
861 | } |
862 | |
863 | private: |
864 | PassT Pass; |
865 | int MaxIterations; |
866 | }; |
867 | |
868 | /// A function to deduce a function pass type and wrap it in the |
869 | /// templated adaptor. |
870 | template <typename PassT> |
871 | DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass, |
872 | int MaxIterations) { |
873 | return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations); |
874 | } |
875 | |
876 | // Clear out the debug logging macro. |
877 | #undef DEBUG_TYPE |
878 | |
879 | } // end namespace llvm |
880 | |
881 | #endif // LLVM_ANALYSIS_CGSCCPASSMANAGER_H |
882 | |