TargetTransformInfo.h source code [include/llvm-14/llvm/Analysis/TargetTransformInfo.h]

1	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This pass exposes codegen information to IR-level passes. Every
10	/// transformation that uses codegen information is broken into three parts:
11	/// 1. The IR-level analysis pass.
12	/// 2. The IR-level transformation interface which provides the needed
13	/// information.
14	/// 3. Codegen-level implementation which uses target-specific hooks.
15	///
16	/// This file defines #2, which is the interface that IR-level transformations
17	/// use for querying the codegen.
18	///
19	//===----------------------------------------------------------------------===//
20
21	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24	#include "llvm/IR/InstrTypes.h"
25	#include "llvm/IR/Operator.h"
26	#include "llvm/IR/PassManager.h"
27	#include "llvm/Pass.h"
28	#include "llvm/Support/AtomicOrdering.h"
29	#include "llvm/Support/BranchProbability.h"
30	#include "llvm/Support/DataTypes.h"
31	#include "llvm/Support/InstructionCost.h"
32	#include <functional>
33	#include <utility>
34
35	namespace llvm {
36
37	namespace Intrinsic {
38	typedef unsigned ID;
39	}
40
41	class AssumptionCache;
42	class BlockFrequencyInfo;
43	class DominatorTree;
44	class BranchInst;
45	class CallBase;
46	class Function;
47	class GlobalValue;
48	class InstCombiner;
49	class OptimizationRemarkEmitter;
50	class IntrinsicInst;
51	class LoadInst;
52	class LoopAccessInfo;
53	class Loop;
54	class LoopInfo;
55	class ProfileSummaryInfo;
56	class RecurrenceDescriptor;
57	class SCEV;
58	class ScalarEvolution;
59	class StoreInst;
60	class SwitchInst;
61	class TargetLibraryInfo;
62	class Type;
63	class User;
64	class Value;
65	class VPIntrinsic;
66	struct KnownBits;
67	template <typename T> class Optional;
68
69	/// Information about a load/store intrinsic defined by the target.
70	struct MemIntrinsicInfo {
71	/// This is the pointer that the intrinsic is loading from or storing to.
72	/// If this is non-null, then analysis/optimization passes can assume that
73	/// this intrinsic is functionally equivalent to a load/store from this
74	/// pointer.
75	Value PtrVal = nullptr*;
76
77	// Ordering for atomic operations.
78	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
79
80	// Same Id is set by the target for corresponding load/store intrinsics.
81	unsigned short MatchingId = `0`;
82
83	bool ReadMem = false;
84	bool WriteMem = false;
85	bool IsVolatile = false;
86
87	bool isUnordered() const {
88	return (Ordering == AtomicOrdering::NotAtomic \|\|
89	Ordering == AtomicOrdering::Unordered) &&
90	!IsVolatile;
91	}
92	};
93
94	/// Attributes of a target dependent hardware loop.
95	struct HardwareLoopInfo {
96	HardwareLoopInfo() = delete;
97	HardwareLoopInfo(Loop *L) : L(L) {}
98	Loop L = nullptr*;
99	BasicBlock ExitBlock = nullptr*;
100	BranchInst ExitBranch = nullptr*;
101	const SCEV ExitCount = nullptr*;
102	IntegerType CountType = nullptr*;
103	Value LoopDecrement = nullptr; // Decrement the loop counter by this*
104	// value in every iteration.
105	bool IsNestingLegal = false; // Can a hardware loop be a parent to
106	// another hardware loop?
107	bool CounterInReg = false; // Should loop counter be updated in
108	// the loop via a phi?
109	bool PerformEntryTest = false; // Generate the intrinsic which also performs
110	// icmp ne zero on the loop counter value and
111	// produces an i1 to guard the loop entry.
112	bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
113	DominatorTree &DT, bool ForceNestedLoop = false,
114	bool ForceHardwareLoopPHI = false);
115	bool canAnalyze(LoopInfo &LI);
116	};
117
118	class IntrinsicCostAttributes {
119	const IntrinsicInst II = nullptr*;
120	Type RetTy = nullptr*;
121	Intrinsic::ID IID;
122	SmallVector<Type *, `4`> ParamTys;
123	SmallVector<const Value *, `4`> Arguments;
124	FastMathFlags FMF;
125	// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126	// arguments and the return value will be computed based on types.
127	InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128
129	public:
130	IntrinsicCostAttributes(
131	Intrinsic::ID Id, const CallBase &CI,
132	InstructionCost ScalarCost = InstructionCost::getInvalid());
133
134	IntrinsicCostAttributes(
135	Intrinsic::ID Id, Type RTy, ArrayRef<Type > Tys,
136	FastMathFlags Flags = FastMathFlags (), const IntrinsicInst I = nullptr*,
137	InstructionCost ScalarCost = InstructionCost::getInvalid());
138
139	IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
140	ArrayRef<const Value *> Args);
141
142	IntrinsicCostAttributes(
143	Intrinsic::ID Id, Type RTy, ArrayRef<const* Value *> Args,
144	ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags (),
145	const IntrinsicInst I = nullptr*,
146	InstructionCost ScalarCost = InstructionCost::getInvalid());
147
148	Intrinsic::ID getID() const { return IID; }
149	const IntrinsicInst getInst() const* { return II; }
150	Type getReturnType() const* { return RetTy; }
151	FastMathFlags getFlags() const { return FMF; }
152	InstructionCost getScalarizationCost() const { return ScalarizationCost; }
153	const SmallVectorImpl<const Value > &getArgs() const* { return Arguments; }
154	const SmallVectorImpl<Type > &getArgTypes() const* { return ParamTys; }
155
156	bool isTypeBasedOnly() const {
157	return Arguments.empty();
158	}
159
160	bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
161	};
162
163	class TargetTransformInfo;
164	typedef TargetTransformInfo TTI;
165
166	/// This pass provides access to the codegen interfaces that are needed
167	/// for IR-level transformations.
168	class TargetTransformInfo {
169	public:
170	/// Construct a TTI object using a type implementing the \c Concept
171	/// API below.
172	///
173	/// This is used by targets to construct a TTI wrapping their target-specific
174	/// implementation that encodes appropriate costs for their target.
175	template <typename T> TargetTransformInfo(T Impl);
176
177	/// Construct a baseline TTI object using a minimal implementation of
178	/// the \c Concept API below.
179	///
180	/// The TTI implementation will reflect the information in the DataLayout
181	/// provided if non-null.
182	explicit TargetTransformInfo(const DataLayout &DL);
183
184	// Provide move semantics.
185	TargetTransformInfo(TargetTransformInfo &&Arg);
186	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
187
188	// We need to define the destructor out-of-line to define our sub-classes
189	// out-of-line.
190	~TargetTransformInfo();
191
192	/// Handle the invalidation of this information.
193	///
194	/// When used as a result of \c TargetIRAnalysis this method will be called
195	/// when the function this was computed for changes. When it returns false,
196	/// the information is preserved across those changes.
197	bool invalidate(Function &, const PreservedAnalyses &,
198	FunctionAnalysisManager::Invalidator &) {
199	// FIXME: We should probably in some way ensure that the subtarget
200	// information for a function hasn't changed.
201	return false;
202	}
203
204	/// \name Generic Target Information
205	/// @{
206
207	/// The kind of cost model.
208	///
209	/// There are several different cost models that can be customized by the
210	/// target. The normalization of each cost model may be target specific.
211	enum TargetCostKind {
212	TCK_RecipThroughput, ///< Reciprocal throughput.
213	TCK_Latency, ///< The latency of instruction.
214	TCK_CodeSize, ///< Instruction code size.
215	TCK_SizeAndLatency ///< The weighted sum of size and latency.
216	};
217
218	/// Query the cost of a specified instruction.
219	///
220	/// Clients should use this interface to query the cost of an existing
221	/// instruction. The instruction must have a valid parent (basic block).
222	///
223	/// Note, this method does not cache the cost calculation and it
224	/// can be expensive in some cases.
225	InstructionCost getInstructionCost(const Instruction *I,
226	enum TargetCostKind kind) const {
227	InstructionCost Cost;
228	switch (kind) {
229	case TCK_RecipThroughput:
230	Cost = getInstructionThroughput(I);
231	break;
232	case TCK_Latency:
233	Cost = getInstructionLatency(I);
234	break;
235	case TCK_CodeSize:
236	case TCK_SizeAndLatency:
237	Cost = getUserCost(I, kind);
238	break;
239	}
240	return Cost;
241	}
242
243	/// Underlying constants for 'cost' values in this interface.
244	///
245	/// Many APIs in this interface return a cost. This enum defines the
246	/// fundamental values that should be used to interpret (and produce) those
247	/// costs. The costs are returned as an int rather than a member of this
248	/// enumeration because it is expected that the cost of one IR instruction
249	/// may have a multiplicative factor to it or otherwise won't fit directly
250	/// into the enum. Moreover, it is common to sum or average costs which works
251	/// better as simple integral values. Thus this enum only provides constants.
252	/// Also note that the returned costs are signed integers to make it natural
253	/// to add, subtract, and test with zero (a common boundary condition). It is
254	/// not expected that 2^32 is a realistic cost to be modeling at any point.
255	///
256	/// Note that these costs should usually reflect the intersection of code-size
257	/// cost and execution cost. A free instruction is typically one that folds
258	/// into another instruction. For example, reg-to-reg moves can often be
259	/// skipped by renaming the registers in the CPU, but they still are encoded
260	/// and thus wouldn't be considered 'free' here.
261	enum TargetCostConstants {
262	TCC_Free = `0`, ///< Expected to fold away in lowering.
263	TCC_Basic = `1`, ///< The cost of a typical 'add' instruction.
264	TCC_Expensive = `4` ///< The cost of a 'div' instruction on x86.
265	};
266
267	/// Estimate the cost of a GEP operation when lowered.
268	InstructionCost
269	getGEPCost(Type PointeeType, const* Value *Ptr,
270	ArrayRef<const Value *> Operands,
271	TargetCostKind CostKind = TCK_SizeAndLatency) const;
272
273	/// \returns A value by which our inlining threshold should be multiplied.
274	/// This is primarily used to bump up the inlining threshold wholesale on
275	/// targets where calls are unusually expensive.
276	///
277	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
278	/// individual classes of instructions would be better.
279	unsigned getInliningThresholdMultiplier() const;
280
281	/// \returns A value to be added to the inlining threshold.
282	unsigned adjustInliningThreshold(const CallBase CB) const*;
283
284	/// \returns Vector bonus in percent.
285	///
286	/// Vector bonuses: We want to more aggressively inline vector-dense kernels
287	/// and apply this bonus based on the percentage of vector instructions. A
288	/// bonus is applied if the vector instructions exceed 50% and half that
289	/// amount is applied if it exceeds 10%. Note that these bonuses are some what
290	/// arbitrary and evolved over time by accident as much as because they are
291	/// principled bonuses.
292	/// FIXME: It would be nice to base the bonus values on something more
293	/// scientific. A target may has no bonus on vector instructions.
294	int getInlinerVectorBonusPercent() const;
295
296	/// \return the expected cost of a memcpy, which could e.g. depend on the
297	/// source/destination type and alignment and the number of bytes copied.
298	InstructionCost getMemcpyCost(const Instruction I) const*;
299
300	/// \return The estimated number of case clusters when lowering \p 'SI'.
301	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
302	/// table.
303	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
304	unsigned &JTSize,
305	ProfileSummaryInfo *PSI,
306	BlockFrequencyInfo BFI) const*;
307
308	/// Estimate the cost of a given IR user when lowered.
309	///
310	/// This can estimate the cost of either a ConstantExpr or Instruction when
311	/// lowered.
312	///
313	/// \p Operands is a list of operands which can be a result of transformations
314	/// of the current operands. The number of the operands on the list must equal
315	/// to the number of the current operands the IR user has. Their order on the
316	/// list must be the same as the order of the current operands the IR user
317	/// has.
318	///
319	/// The returned cost is defined in terms of \c TargetCostConstants, see its
320	/// comments for a detailed explanation of the cost values.
321	InstructionCost getUserCost(const User U, ArrayRef<const* Value *> Operands,
322	TargetCostKind CostKind) const;
323
324	/// This is a helper function which calls the two-argument getUserCost
325	/// with \p Operands which are the current operands U has.
326	InstructionCost getUserCost(const User U, TargetCostKind CostKind) const* {
327	SmallVector<const Value *, `4`> Operands(U->operand_values());
328	return getUserCost(U, Operands, CostKind);
329	}
330
331	/// If a branch or a select condition is skewed in one direction by more than
332	/// this factor, it is very likely to be predicted correctly.
333	BranchProbability getPredictableBranchThreshold() const;
334
335	/// Return true if branch divergence exists.
336	///
337	/// Branch divergence has a significantly negative impact on GPU performance
338	/// when threads in the same wavefront take different paths due to conditional
339	/// branches.
340	bool hasBranchDivergence() const;
341
342	/// Return true if the target prefers to use GPU divergence analysis to
343	/// replace the legacy version.
344	bool useGPUDivergenceAnalysis() const;
345
346	/// Returns whether V is a source of divergence.
347	///
348	/// This function provides the target-dependent information for
349	/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
350	/// first builds the dependency graph, and then runs the reachability
351	/// algorithm starting with the sources of divergence.
352	bool isSourceOfDivergence(const Value V) const*;
353
354	// Returns true for the target specific
355	// set of operations which produce uniform result
356	// even taking non-uniform arguments
357	bool isAlwaysUniform(const Value V) const*;
358
359	/// Returns the address space ID for a target's 'flat' address space. Note
360	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
361	/// refers to as the generic address space. The flat address space is a
362	/// generic address space that can be used access multiple segments of memory
363	/// with different address spaces. Access of a memory location through a
364	/// pointer with this address space is expected to be legal but slower
365	/// compared to the same memory location accessed through a pointer with a
366	/// different address space.
367	//
368	/// This is for targets with different pointer representations which can
369	/// be converted with the addrspacecast instruction. If a pointer is converted
370	/// to this address space, optimizations should attempt to replace the access
371	/// with the source address space.
372	///
373	/// \returns ~0u if the target does not have such a flat address space to
374	/// optimize away.
375	unsigned getFlatAddressSpace() const;
376
377	/// Return any intrinsic address operand indexes which may be rewritten if
378	/// they use a flat address space pointer.
379	///
380	/// \returns true if the intrinsic was handled.
381	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
382	Intrinsic::ID IID) const;
383
384	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
385
386	/// Return true if globals in this address space can have initializers other
387	/// than `undef`.
388	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
389
390	unsigned getAssumedAddrSpace(const Value V) const*;
391
392	std::pair<const Value , unsigned*>
393	getPredicatedAddrSpace(const Value V) const*;
394
395	/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
396	/// NewV, which has a different address space. This should happen for every
397	/// operand index that collectFlatAddressOperands returned for the intrinsic.
398	/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
399	/// new value (which may be the original \p II with modified operands).
400	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
401	Value NewV) const*;
402
403	/// Test whether calls to a function lower to actual program function
404	/// calls.
405	///
406	/// The idea is to test whether the program is likely to require a 'call'
407	/// instruction or equivalent in order to call the given function.
408	///
409	/// FIXME: It's not clear that this is a good or useful query API. Client's
410	/// should probably move to simpler cost metrics using the above.
411	/// Alternatively, we could split the cost interface into distinct code-size
412	/// and execution-speed costs. This would allow modelling the core of this
413	/// query more accurately as a call is a single small instruction, but
414	/// incurs significant execution cost.
415	bool isLoweredToCall(const Function F) const*;
416
417	struct LSRCost {
418	/// TODO: Some of these could be merged. Also, a lexical ordering
419	/// isn't always optimal.
420	unsigned Insns;
421	unsigned NumRegs;
422	unsigned AddRecCost;
423	unsigned NumIVMuls;
424	unsigned NumBaseAdds;
425	unsigned ImmCost;
426	unsigned SetupCost;
427	unsigned ScaleCost;
428	};
429
430	/// Parameters that control the generic loop unrolling transformation.
431	struct UnrollingPreferences {
432	/// The cost threshold for the unrolled loop. Should be relative to the
433	/// getUserCost values returned by this API, and the expectation is that
434	/// the unrolled loop's instructions when run through that interface should
435	/// not exceed this cost. However, this is only an estimate. Also, specific
436	/// loops may be unrolled even with a cost above this threshold if deemed
437	/// profitable. Set this to UINT_MAX to disable the loop body cost
438	/// restriction.
439	unsigned Threshold;
440	/// If complete unrolling will reduce the cost of the loop, we will boost
441	/// the Threshold by a certain percent to allow more aggressive complete
442	/// unrolling. This value provides the maximum boost percentage that we
443	/// can apply to Threshold (The value should be no less than 100).
444	/// BoostedThreshold = Threshold min(RolledCost / UnrolledCost,*
445	/// MaxPercentThresholdBoost / 100)
446	/// E.g. if complete unrolling reduces the loop execution time by 50%
447	/// then we boost the threshold by the factor of 2x. If unrolling is not
448	/// expected to reduce the running time, then we do not increase the
449	/// threshold.
450	unsigned MaxPercentThresholdBoost;
451	/// The cost threshold for the unrolled loop when optimizing for size (set
452	/// to UINT_MAX to disable).
453	unsigned OptSizeThreshold;
454	/// The cost threshold for the unrolled loop, like Threshold, but used
455	/// for partial/runtime unrolling (set to UINT_MAX to disable).
456	unsigned PartialThreshold;
457	/// The cost threshold for the unrolled loop when optimizing for size, like
458	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
459	/// UINT_MAX to disable).
460	unsigned PartialOptSizeThreshold;
461	/// A forced unrolling factor (the number of concatenated bodies of the
462	/// original loop in the unrolled loop body). When set to 0, the unrolling
463	/// transformation will select an unrolling factor based on the current cost
464	/// threshold and other factors.
465	unsigned Count;
466	/// Default unroll count for loops with run-time trip count.
467	unsigned DefaultUnrollRuntimeCount;
468	// Set the maximum unrolling factor. The unrolling factor may be selected
469	// using the appropriate cost threshold, but may not exceed this number
470	// (set to UINT_MAX to disable). This does not apply in cases where the
471	// loop is being fully unrolled.
472	unsigned MaxCount;
473	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
474	/// applies even if full unrolling is selected. This allows a target to fall
475	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
476	unsigned FullUnrollMaxCount;
477	// Represents number of instructions optimized when "back edge"
478	// becomes "fall through" in unrolled loop.
479	// For now we count a conditional branch on a backedge and a comparison
480	// feeding it.
481	unsigned BEInsns;
482	/// Allow partial unrolling (unrolling of loops to expand the size of the
483	/// loop body, not only to eliminate small constant-trip-count loops).
484	bool Partial;
485	/// Allow runtime unrolling (unrolling of loops to expand the size of the
486	/// loop body even when the number of loop iterations is not known at
487	/// compile time).
488	bool Runtime;
489	/// Allow generation of a loop remainder (extra iterations after unroll).
490	bool AllowRemainder;
491	/// Allow emitting expensive instructions (such as divisions) when computing
492	/// the trip count of a loop for runtime unrolling.
493	bool AllowExpensiveTripCount;
494	/// Apply loop unroll on any kind of loop
495	/// (mainly to loops that fail runtime unrolling).
496	bool Force;
497	/// Allow using trip count upper bound to unroll loops.
498	bool UpperBound;
499	/// Allow unrolling of all the iterations of the runtime loop remainder.
500	bool UnrollRemainder;
501	/// Allow unroll and jam. Used to enable unroll and jam for the target.
502	bool UnrollAndJam;
503	/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
504	/// value above is used during unroll and jam for the outer loop size.
505	/// This value is used in the same manner to limit the size of the inner
506	/// loop.
507	unsigned UnrollAndJamInnerLoopThreshold;
508	/// Don't allow loop unrolling to simulate more than this number of
509	/// iterations when checking full unroll profitability
510	unsigned MaxIterationsCountToAnalyze;
511	};
512
513	/// Get target-customized preferences for the generic loop unrolling
514	/// transformation. The caller will initialize UP with the current
515	/// target-independent defaults.
516	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
517	UnrollingPreferences &UP,
518	OptimizationRemarkEmitter ORE) const*;
519
520	/// Query the target whether it would be profitable to convert the given loop
521	/// into a hardware loop.
522	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
523	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
524	HardwareLoopInfo &HWLoopInfo) const;
525
526	/// Query the target whether it would be prefered to create a predicated
527	/// vector loop, which can avoid the need to emit a scalar epilogue loop.
528	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
529	AssumptionCache &AC, TargetLibraryInfo *TLI,
530	DominatorTree *DT,
531	const LoopAccessInfo LAI) const*;
532
533	/// Query the target whether lowering of the llvm.get.active.lane.mask
534	/// intrinsic is supported.
535	bool emitGetActiveLaneMask() const;
536
537	// Parameters that control the loop peeling transformation
538	struct PeelingPreferences {
539	/// A forced peeling factor (the number of bodied of the original loop
540	/// that should be peeled off before the loop body). When set to 0, the
541	/// a peeling factor based on profile information and other factors.
542	unsigned PeelCount;
543	/// Allow peeling off loop iterations.
544	bool AllowPeeling;
545	/// Allow peeling off loop iterations for loop nests.
546	bool AllowLoopNestsPeeling;
547	/// Allow peeling basing on profile. Uses to enable peeling off all
548	/// iterations basing on provided profile.
549	/// If the value is true the peeling cost model can decide to peel only
550	/// some iterations and in this case it will set this to false.
551	bool PeelProfiledIterations;
552	};
553
554	/// Get target-customized preferences for the generic loop peeling
555	/// transformation. The caller will initialize \p PP with the current
556	/// target-independent defaults with information from \p L and \p SE.
557	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
558	PeelingPreferences &PP) const;
559
560	/// Targets can implement their own combinations for target-specific
561	/// intrinsics. This function will be called from the InstCombine pass every
562	/// time a target-specific intrinsic is encountered.
563	///
564	/// \returns None to not do anything target specific or a value that will be
565	/// returned from the InstCombiner. It is possible to return null and stop
566	/// further processing of the intrinsic by returning nullptr.
567	Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
568	IntrinsicInst &II) const;
569	/// Can be used to implement target-specific instruction combining.
570	/// \see instCombineIntrinsic
571	Optional<Value *>
572	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
573	APInt DemandedMask, KnownBits &Known,
574	bool &KnownBitsComputed) const;
575	/// Can be used to implement target-specific instruction combining.
576	/// \see instCombineIntrinsic
577	Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
578	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
579	APInt &UndefElts2, APInt &UndefElts3,
580	std::function<void(Instruction , unsigned*, APInt, APInt &)>
581	SimplifyAndSetOp) const;
582	/// @}
583
584	/// \name Scalar Target Information
585	/// @{
586
587	/// Flags indicating the kind of support for population count.
588	///
589	/// Compared to the SW implementation, HW support is supposed to
590	/// significantly boost the performance when the population is dense, and it
591	/// may or may not degrade performance if the population is sparse. A HW
592	/// support is considered as "Fast" if it can outperform, or is on a par
593	/// with, SW implementation when the population is sparse; otherwise, it is
594	/// considered as "Slow".
595	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
596
597	/// Return true if the specified immediate is legal add immediate, that
598	/// is the target has add instructions which can add a register with the
599	/// immediate without having to materialize the immediate into a register.
600	bool isLegalAddImmediate(int64_t Imm) const;
601
602	/// Return true if the specified immediate is legal icmp immediate,
603	/// that is the target has icmp instructions which can compare a register
604	/// against the immediate without having to materialize the immediate into a
605	/// register.
606	bool isLegalICmpImmediate(int64_t Imm) const;
607
608	/// Return true if the addressing mode represented by AM is legal for
609	/// this target, for a load/store of the specified type.
610	/// The type may be VoidTy, in which case only return true if the addressing
611	/// mode is legal for a load/store of any legal type.
612	/// If target returns true in LSRWithInstrQueries(), I may be valid.
613	/// TODO: Handle pre/postinc as well.
614	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
615	bool HasBaseReg, int64_t Scale,
616	unsigned AddrSpace = `0`,
617	Instruction I = nullptr) const*;
618
619	/// Return true if LSR cost of C1 is lower than C1.
620	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
621	TargetTransformInfo::LSRCost &C2) const;
622
623	/// Return true if LSR major cost is number of registers. Targets which
624	/// implement their own isLSRCostLess and unset number of registers as major
625	/// cost should return false, otherwise return true.
626	bool isNumRegsMajorCostOfLSR() const;
627
628	/// \returns true if LSR should not optimize a chain that includes \p I.
629	bool isProfitableLSRChainElement(Instruction I) const*;
630
631	/// Return true if the target can fuse a compare and branch.
632	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
633	/// calculation for the instructions in a loop.
634	bool canMacroFuseCmp() const;
635
636	/// Return true if the target can save a compare for loop count, for example
637	/// hardware loop saves a compare.
638	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
639	DominatorTree DT, AssumptionCache AC,
640	TargetLibraryInfo LibInfo) const*;
641
642	enum AddressingModeKind {
643	AMK_PreIndexed,
644	AMK_PostIndexed,
645	AMK_None
646	};
647
648	/// Return the preferred addressing mode LSR should make efforts to generate.
649	AddressingModeKind getPreferredAddressingMode(const Loop *L,
650	ScalarEvolution SE) const*;
651
652	/// Return true if the target supports masked store.
653	bool isLegalMaskedStore(Type DataType, Align Alignment) const*;
654	/// Return true if the target supports masked load.
655	bool isLegalMaskedLoad(Type DataType, Align Alignment) const*;
656
657	/// Return true if the target supports nontemporal store.
658	bool isLegalNTStore(Type DataType, Align Alignment) const*;
659	/// Return true if the target supports nontemporal load.
660	bool isLegalNTLoad(Type DataType, Align Alignment) const*;
661
662	/// Return true if the target supports masked scatter.
663	bool isLegalMaskedScatter(Type DataType, Align Alignment) const*;
664	/// Return true if the target supports masked gather.
665	bool isLegalMaskedGather(Type DataType, Align Alignment) const*;
666	/// Return true if the target forces scalarizing of llvm.masked.gather
667	/// intrinsics.
668	bool forceScalarizeMaskedGather(VectorType Type, Align Alignment) const*;
669	/// Return true if the target forces scalarizing of llvm.masked.scatter
670	/// intrinsics.
671	bool forceScalarizeMaskedScatter(VectorType Type, Align Alignment) const*;
672
673	/// Return true if the target supports masked compress store.
674	bool isLegalMaskedCompressStore(Type DataType) const*;
675	/// Return true if the target supports masked expand load.
676	bool isLegalMaskedExpandLoad(Type DataType) const*;
677
678	/// Return true if we should be enabling ordered reductions for the target.
679	bool enableOrderedReductions() const;
680
681	/// Return true if the target has a unified operation to calculate division
682	/// and remainder. If so, the additional implicit multiplication and
683	/// subtraction required to calculate a remainder from division are free. This
684	/// can enable more aggressive transformations for division and remainder than
685	/// would typically be allowed using throughput or size cost models.
686	bool hasDivRemOp(Type DataType, bool* IsSigned) const;
687
688	/// Return true if the given instruction (assumed to be a memory access
689	/// instruction) has a volatile variant. If that's the case then we can avoid
690	/// addrspacecast to generic AS for volatile loads/stores. Default
691	/// implementation returns false, which prevents address space inference for
692	/// volatile loads/stores.
693	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const;
694
695	/// Return true if target doesn't mind addresses in vectors.
696	bool prefersVectorizedAddressing() const;
697
698	/// Return the cost of the scaling factor used in the addressing
699	/// mode represented by AM for this target, for a load/store
700	/// of the specified type.
701	/// If the AM is supported, the return value must be >= 0.
702	/// If the AM is not supported, it returns a negative value.
703	/// TODO: Handle pre/postinc as well.
704	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
705	int64_t BaseOffset, bool HasBaseReg,
706	int64_t Scale,
707	unsigned AddrSpace = `0`) const;
708
709	/// Return true if the loop strength reduce pass should make
710	/// Instruction based TTI queries to isLegalAddressingMode(). This is*
711	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
712	/// immediate offset and no index register.
713	bool LSRWithInstrQueries() const;
714
715	/// Return true if it's free to truncate a value of type Ty1 to type
716	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
717	/// by referencing its sub-register AX.
718	bool isTruncateFree(Type Ty1, Type Ty2) const;
719
720	/// Return true if it is profitable to hoist instruction in the
721	/// then/else to before if.
722	bool isProfitableToHoist(Instruction I) const*;
723
724	bool useAA() const;
725
726	/// Return true if this type is legal.
727	bool isTypeLegal(Type Ty) const*;
728
729	/// Returns the estimated number of registers required to represent \p Ty.
730	InstructionCost getRegUsageForType(Type Ty) const*;
731
732	/// Return true if switches should be turned into lookup tables for the
733	/// target.
734	bool shouldBuildLookupTables() const;
735
736	/// Return true if switches should be turned into lookup tables
737	/// containing this constant value for the target.
738	bool shouldBuildLookupTablesForConstant(Constant C) const*;
739
740	/// Return true if lookup tables should be turned into relative lookup tables.
741	bool shouldBuildRelLookupTables() const;
742
743	/// Return true if the input function which is cold at all call sites,
744	/// should use coldcc calling convention.
745	bool useColdCCForColdCall(Function &F) const;
746
747	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
748	/// are set if the demanded result elements need to be inserted and/or
749	/// extracted from vectors.
750	InstructionCost getScalarizationOverhead(VectorType *Ty,
751	const APInt &DemandedElts,
752	bool Insert, bool Extract) const;
753
754	/// Estimate the overhead of scalarizing an instructions unique
755	/// non-constant operands. The (potentially vector) types to use for each of
756	/// argument are passes via Tys.
757	InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
758	ArrayRef<Type > Tys) const*;
759
760	/// If target has efficient vector element load/store instructions, it can
761	/// return true here so that insertion/extraction costs are not added to
762	/// the scalarization cost of a load/store.
763	bool supportsEfficientVectorElementLoadStore() const;
764
765	/// Don't restrict interleaved unrolling to small loops.
766	bool enableAggressiveInterleaving(bool LoopHasReductions) const;
767
768	/// Returns options for expansion of memcmp. IsZeroCmp is
769	// true if this is the expansion of memcmp(p1, p2, s) == 0.
770	struct MemCmpExpansionOptions {
771	// Return true if memcmp expansion is enabled.
772	operator bool() const { return MaxNumLoads > `0`; }
773
774	// Maximum number of load operations.
775	unsigned MaxNumLoads = `0`;
776
777	// The list of available load sizes (in bytes), sorted in decreasing order.
778	SmallVector<unsigned, `8`> LoadSizes;
779
780	// For memcmp expansion when the memcmp result is only compared equal or
781	// not-equal to 0, allow up to this number of load pairs per block. As an
782	// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
783	// a0 = load2bytes &a[0]
784	// b0 = load2bytes &b[0]
785	// a2 = load1byte &a[2]
786	// b2 = load1byte &b[2]
787	// r = cmp eq (a0 ^ b0 \| a2 ^ b2), 0
788	unsigned NumLoadsPerBlock = `1`;
789
790	// Set to true to allow overlapping loads. For example, 7-byte compares can
791	// be done with two 4-byte compares instead of 4+2+1-byte compares. This
792	// requires all loads in LoadSizes to be doable in an unaligned way.
793	bool AllowOverlappingLoads = false;
794	};
795	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
796	bool IsZeroCmp) const;
797
798	/// Enable matching of interleaved access groups.
799	bool enableInterleavedAccessVectorization() const;
800
801	/// Enable matching of interleaved access groups that contain predicated
802	/// accesses or gaps and therefore vectorized using masked
803	/// vector loads/stores.
804	bool enableMaskedInterleavedAccessVectorization() const;
805
806	/// Indicate that it is potentially unsafe to automatically vectorize
807	/// floating-point operations because the semantics of vector and scalar
808	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
809	/// does not support IEEE-754 denormal numbers, while depending on the
810	/// platform, scalar floating-point math does.
811	/// This applies to floating-point math operations and calls, not memory
812	/// operations, shuffles, or casts.
813	bool isFPVectorizationPotentiallyUnsafe() const;
814
815	/// Determine if the target supports unaligned memory accesses.
816	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
817	unsigned AddressSpace = `0`,
818	Align Alignment = Align (`1`),
819	bool Fast = nullptr) const*;
820
821	/// Return hardware support for population count.
822	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
823
824	/// Return true if the hardware has a fast square-root instruction.
825	bool haveFastSqrt(Type Ty) const*;
826
827	/// Return true if it is faster to check if a floating-point value is NaN
828	/// (or not-NaN) versus a comparison against a constant FP zero value.
829	/// Targets should override this if materializing a 0.0 for comparison is
830	/// generally as cheap as checking for ordered/unordered.
831	bool isFCmpOrdCheaperThanFCmpZero(Type Ty) const*;
832
833	/// Return the expected cost of supporting the floating point operation
834	/// of the specified type.
835	InstructionCost getFPOpCost(Type Ty) const*;
836
837	/// Return the expected cost of materializing for the given integer
838	/// immediate of the specified type.
839	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
840	TargetCostKind CostKind) const;
841
842	/// Return the expected cost of materialization for the given integer
843	/// immediate of the specified type for a given instruction. The cost can be
844	/// zero if the immediate can be folded into the specified instruction.
845	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
846	const APInt &Imm, Type *Ty,
847	TargetCostKind CostKind,
848	Instruction Inst = nullptr) const*;
849	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
850	const APInt &Imm, Type *Ty,
851	TargetCostKind CostKind) const;
852
853	/// Return the expected cost for the given integer when optimising
854	/// for size. This is different than the other integer immediate cost
855	/// functions in that it is subtarget agnostic. This is useful when you e.g.
856	/// target one ISA such as Aarch32 but smaller encodings could be possible
857	/// with another such as Thumb. This return value is used as a penalty when
858	/// the total costs for a constant is calculated (the bigger the cost, the
859	/// more beneficial constant hoisting is).
860	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
861	const APInt &Imm, Type Ty) const*;
862	/// @}
863
864	/// \name Vector Target Information
865	/// @{
866
867	/// The various kinds of shuffle patterns for vector queries.
868	enum ShuffleKind {
869	SK_Broadcast, ///< Broadcast element 0 to all other elements.
870	SK_Reverse, ///< Reverse the order of the vector.
871	SK_Select, ///< Selects elements from the corresponding lane of
872	///< either source operand. This is equivalent to a
873	///< vector select with a constant condition operand.
874	SK_Transpose, ///< Transpose two vectors.
875	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
876	SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
877	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
878	///< with any shuffle mask.
879	SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
880	///< shuffle mask.
881	SK_Splice ///< Concatenates elements from the first input vector
882	///< with elements of the second input vector. Returning
883	///< a vector of the same type as the input vectors.
884	};
885
886	/// Additional information about an operand's possible values.
887	enum OperandValueKind {
888	OK_AnyValue, // Operand can have any value.
889	OK_UniformValue, // Operand is uniform (splat of a value).
890	OK_UniformConstantValue, // Operand is uniform constant.
891	OK_NonUniformConstantValue // Operand is a non uniform constant value.
892	};
893
894	/// Additional properties of an operand's values.
895	enum OperandValueProperties { OP_None = `0`, OP_PowerOf2 = `1` };
896
897	/// \return the number of registers in the target-provided register class.
898	unsigned getNumberOfRegisters(unsigned ClassID) const;
899
900	/// \return the target-provided register class ID for the provided type,
901	/// accounting for type promotion and other type-legalization techniques that
902	/// the target might apply. However, it specifically does not account for the
903	/// scalarization or splitting of vector types. Should a vector type require
904	/// scalarization or splitting into multiple underlying vector registers, that
905	/// type should be mapped to a register class containing no registers.
906	/// Specifically, this is designed to provide a simple, high-level view of the
907	/// register allocation later performed by the backend. These register classes
908	/// don't necessarily map onto the register classes used by the backend.
909	/// FIXME: It's not currently possible to determine how many registers
910	/// are used by the provided type.
911	unsigned getRegisterClassForType(bool Vector, Type Ty = nullptr) const*;
912
913	/// \return the target-provided register class name
914	const char getRegisterClassName(unsigned* ClassID) const;
915
916	enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
917
918	/// \return The width of the largest scalar or vector register type.
919	TypeSize getRegisterBitWidth(RegisterKind K) const;
920
921	/// \return The width of the smallest vector register type.
922	unsigned getMinVectorRegisterBitWidth() const;
923
924	/// \return The maximum value of vscale if the target specifies an
925	/// architectural maximum vector length, and None otherwise.
926	Optional<unsigned> getMaxVScale() const;
927
928	/// \return the value of vscale to tune the cost model for.
929	Optional<unsigned> getVScaleForTuning() const;
930
931	/// \return True if the vectorization factor should be chosen to
932	/// make the vector of the smallest element type match the size of a
933	/// vector register. For wider element types, this could result in
934	/// creating vectors that span multiple vector registers.
935	/// If false, the vectorization factor will be chosen based on the
936	/// size of the widest element type.
937	bool shouldMaximizeVectorBandwidth() const;
938
939	/// \return The minimum vectorization factor for types of given element
940	/// bit width, or 0 if there is no minimum VF. The returned value only
941	/// applies when shouldMaximizeVectorBandwidth returns true.
942	/// If IsScalable is true, the returned ElementCount must be a scalable VF.
943	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
944
945	/// \return The maximum vectorization factor for types of given element
946	/// bit width and opcode, or 0 if there is no maximum VF.
947	/// Currently only used by the SLP vectorizer.
948	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
949
950	/// \return True if it should be considered for address type promotion.
951	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
952	/// profitable without finding other extensions fed by the same input.
953	bool shouldConsiderAddressTypePromotion(
954	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
955
956	/// \return The size of a cache line in bytes.
957	unsigned getCacheLineSize() const;
958
959	/// The possible cache levels
960	enum class CacheLevel {
961	L1D, // The L1 data cache
962	L2D, // The L2 data cache
963
964	// We currently do not model L3 caches, as their sizes differ widely between
965	// microarchitectures. Also, we currently do not have a use for L3 cache
966	// size modeling yet.
967	};
968
969	/// \return The size of the cache level in bytes, if available.
970	Optional<unsigned> getCacheSize(CacheLevel Level) const;
971
972	/// \return The associativity of the cache level, if available.
973	Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
974
975	/// \return How much before a load we should place the prefetch
976	/// instruction. This is currently measured in number of
977	/// instructions.
978	unsigned getPrefetchDistance() const;
979
980	/// Some HW prefetchers can handle accesses up to a certain constant stride.
981	/// Sometimes prefetching is beneficial even below the HW prefetcher limit,
982	/// and the arguments provided are meant to serve as a basis for deciding this
983	/// for a particular loop.
984	///
985	/// \param NumMemAccesses Number of memory accesses in the loop.
986	/// \param NumStridedMemAccesses Number of the memory accesses that
987	/// ScalarEvolution could find a known stride
988	/// for.
989	/// \param NumPrefetches Number of software prefetches that will be
990	/// emitted as determined by the addresses
991	/// involved and the cache line size.
992	/// \param HasCall True if the loop contains a call.
993	///
994	/// \return This is the minimum stride in bytes where it makes sense to start
995	/// adding SW prefetches. The default is 1, i.e. prefetch with any
996	/// stride.
997	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
998	unsigned NumStridedMemAccesses,
999	unsigned NumPrefetches, bool HasCall) const;
1000
1001	/// \return The maximum number of iterations to prefetch ahead. If
1002	/// the required number of iterations is more than this number, no
1003	/// prefetching is performed.
1004	unsigned getMaxPrefetchIterationsAhead() const;
1005
1006	/// \return True if prefetching should also be done for writes.
1007	bool enableWritePrefetching() const;
1008
1009	/// \return The maximum interleave factor that any transform should try to
1010	/// perform for this target. This number depends on the level of parallelism
1011	/// and the number of execution units in the CPU.
1012	unsigned getMaxInterleaveFactor(unsigned VF) const;
1013
1014	/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1015	static OperandValueKind getOperandInfo(const Value *V,
1016	OperandValueProperties &OpProps);
1017
1018	/// This is an approximation of reciprocal throughput of a math/logic op.
1019	/// A higher cost indicates less expected throughput.
1020	/// From Agner Fog's guides, reciprocal throughput is "the average number of
1021	/// clock cycles per instruction when the instructions are not part of a
1022	/// limiting dependency chain."
1023	/// Therefore, costs should be scaled to account for multiple execution units
1024	/// on the target that can process this type of instruction. For example, if
1025	/// there are 5 scalar integer units and 2 vector integer units that can
1026	/// calculate an 'add' in a single cycle, this model should indicate that the
1027	/// cost of the vector add instruction is 2.5 times the cost of the scalar
1028	/// add instruction.
1029	/// \p Args is an optional argument which holds the instruction operands
1030	/// values so the TTI can analyze those values searching for special
1031	/// cases or optimizations based on those values.
1032	/// \p CxtI is the optional original context instruction, if one exists, to
1033	/// provide even more information.
1034	InstructionCost getArithmeticInstrCost(
1035	unsigned Opcode, Type *Ty,
1036	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1037	OperandValueKind Opd1Info = OK_AnyValue,
1038	OperandValueKind Opd2Info = OK_AnyValue,
1039	OperandValueProperties Opd1PropInfo = OP_None,
1040	OperandValueProperties Opd2PropInfo = OP_None,
1041	ArrayRef<const Value > Args = ArrayRef<const* Value *>(),
1042	const Instruction CxtI = nullptr) const*;
1043
1044	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1045	/// The exact mask may be passed as Mask, or else the array will be empty.
1046	/// The index and subtype parameters are used by the subvector insertion and
1047	/// extraction shuffle kinds to show the insert/extract point and the type of
1048	/// the subvector being inserted/extracted.
1049	/// NOTE: For subvector extractions Tp represents the source type.
1050	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1051	ArrayRef<int> Mask = None, int Index = `0`,
1052	VectorType SubTp = nullptr) const*;
1053
1054	/// Represents a hint about the context in which a cast is used.
1055	///
1056	/// For zext/sext, the context of the cast is the operand, which must be a
1057	/// load of some kind. For trunc, the context is of the cast is the single
1058	/// user of the instruction, which must be a store of some kind.
1059	///
1060	/// This enum allows the vectorizer to give getCastInstrCost an idea of the
1061	/// type of cast it's dealing with, as not every cast is equal. For instance,
1062	/// the zext of a load may be free, but the zext of an interleaving load can
1063	//// be (very) expensive!
1064	///
1065	/// See \c getCastContextHint to compute a CastContextHint from a cast
1066	/// Instruction. Callers can use it if they don't need to override the*
1067	/// context and just want it to be calculated from the instruction.
1068	///
1069	/// FIXME: This handles the types of load/store that the vectorizer can
1070	/// produce, which are the cases where the context instruction is most
1071	/// likely to be incorrect. There are other situations where that can happen
1072	/// too, which might be handled here but in the long run a more general
1073	/// solution of costing multiple instructions at the same times may be better.
1074	enum class CastContextHint : uint8_t {
1075	None, ///< The cast is not used with a load/store of any kind.
1076	Normal, ///< The cast is used with a normal load/store.
1077	Masked, ///< The cast is used with a masked load/store.
1078	GatherScatter, ///< The cast is used with a gather/scatter.
1079	Interleave, ///< The cast is used with an interleaved load/store.
1080	Reversed, ///< The cast is used with a reversed load/store.
1081	};
1082
1083	/// Calculates a CastContextHint from \p I.
1084	/// This should be used by callers of getCastInstrCost if they wish to
1085	/// determine the context from some instruction.
1086	/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1087	/// or if it's another type of cast.
1088	static CastContextHint getCastContextHint(const Instruction *I);
1089
1090	/// \return The expected cost of cast instructions, such as bitcast, trunc,
1091	/// zext, etc. If there is an existing instruction that holds Opcode, it
1092	/// may be passed in the 'I' parameter.
1093	InstructionCost
1094	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
1095	TTI::CastContextHint CCH,
1096	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1097	const Instruction I = nullptr) const*;
1098
1099	/// \return The expected cost of a sign- or zero-extended vector extract. Use
1100	/// -1 to indicate that there is no information about the index value.
1101	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1102	VectorType *VecTy,
1103	unsigned Index = -`1`) const;
1104
1105	/// \return The expected cost of control-flow related instructions such as
1106	/// Phi, Ret, Br, Switch.
1107	InstructionCost
1108	getCFInstrCost(unsigned Opcode,
1109	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1110	const Instruction I = nullptr) const*;
1111
1112	/// \returns The expected cost of compare and select instructions. If there
1113	/// is an existing instruction that holds Opcode, it may be passed in the
1114	/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1115	/// is using a compare with the specified predicate as condition. When vector
1116	/// types are passed, \p VecPred must be used for all lanes.
1117	InstructionCost
1118	getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
1119	CmpInst::Predicate VecPred,
1120	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1121	const Instruction I = nullptr) const*;
1122
1123	/// \return The expected cost of vector Insert and Extract.
1124	/// Use -1 to indicate that there is no information on the index value.
1125	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1126	unsigned Index = -`1`) const;
1127
1128	/// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1129	/// \p ReplicationFactor times.
1130	///
1131	/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1132	/// <0,0,0,1,1,1,2,2,2,3,3,3>
1133	InstructionCost getReplicationShuffleCost(Type EltTy, int* ReplicationFactor,
1134	int VF,
1135	const APInt &DemandedDstElts,
1136	TTI::TargetCostKind CostKind);
1137
1138	/// \return The cost of Load and Store instructions.
1139	InstructionCost
1140	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1141	unsigned AddressSpace,
1142	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1143	const Instruction I = nullptr) const*;
1144
1145	/// \return The cost of VP Load and Store instructions.
1146	InstructionCost
1147	getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1148	unsigned AddressSpace,
1149	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1150	const Instruction I = nullptr) const*;
1151
1152	/// \return The cost of masked Load and Store instructions.
1153	InstructionCost getMaskedMemoryOpCost(
1154	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
1155	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1156
1157	/// \return The cost of Gather or Scatter operation
1158	/// \p Opcode - is a type of memory access Load or Store
1159	/// \p DataTy - a vector type of the data to be loaded or stored
1160	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1161	/// \p VariableMask - true when the memory access is predicated with a mask
1162	/// that is not a compile-time constant
1163	/// \p Alignment - alignment of single element
1164	/// \p I - the optional original context instruction, if one exists, e.g. the
1165	/// load/store to transform or the call to the gather/scatter intrinsic
1166	InstructionCost getGatherScatterOpCost(
1167	unsigned Opcode, Type DataTy, const* Value Ptr, bool* VariableMask,
1168	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1169	const Instruction I = nullptr) const*;
1170
1171	/// \return The cost of the interleaved memory operation.
1172	/// \p Opcode is the memory operation code
1173	/// \p VecTy is the vector type of the interleaved access.
1174	/// \p Factor is the interleave factor
1175	/// \p Indices is the indices for interleaved load members (as interleaved
1176	/// load allows gaps)
1177	/// \p Alignment is the alignment of the memory operation
1178	/// \p AddressSpace is address space of the pointer.
1179	/// \p UseMaskForCond indicates if the memory access is predicated.
1180	/// \p UseMaskForGaps indicates if gaps should be masked.
1181	InstructionCost getInterleavedMemoryOpCost(
1182	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
1183	Align Alignment, unsigned AddressSpace,
1184	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1185	bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1186
1187	/// A helper function to determine the type of reduction algorithm used
1188	/// for a given \p Opcode and set of FastMathFlags \p FMF.
1189	static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
1190	return FMF != None && !(*FMF).allowReassoc();
1191	}
1192
1193	/// Calculate the cost of vector reduction intrinsics.
1194	///
1195	/// This is the cost of reducing the vector value of type \p Ty to a scalar
1196	/// value using the operation denoted by \p Opcode. The FastMathFlags
1197	/// parameter \p FMF indicates what type of reduction we are performing:
1198	/// 1. Tree-wise. This is the typical 'fast' reduction performed that
1199	/// involves successively splitting a vector into half and doing the
1200	/// operation on the pair of halves until you have a scalar value. For
1201	/// example:
1202	/// (v0, v1, v2, v3)
1203	/// ((v0+v2), (v1+v3), undef, undef)
1204	/// ((v0+v2+v1+v3), undef, undef, undef)
1205	/// This is the default behaviour for integer operations, whereas for
1206	/// floating point we only do this if \p FMF indicates that
1207	/// reassociation is allowed.
1208	/// 2. Ordered. For a vector with N elements this involves performing N
1209	/// operations in lane order, starting with an initial scalar value, i.e.
1210	/// result = InitVal + v0
1211	/// result = result + v1
1212	/// result = result + v2
1213	/// result = result + v3
1214	/// This is only the case for FP operations and when reassociation is not
1215	/// allowed.
1216	///
1217	InstructionCost getArithmeticReductionCost(
1218	unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1219	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1220
1221	InstructionCost getMinMaxReductionCost(
1222	VectorType Ty, VectorType CondTy, bool IsUnsigned,
1223	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1224
1225	/// Calculate the cost of an extended reduction pattern, similar to
1226	/// getArithmeticReductionCost of an Add reduction with an extension and
1227	/// optional multiply. This is the cost of as:
1228	/// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1229	/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1230	/// on a VectorType with ResTy elements and Ty lanes.
1231	InstructionCost getExtendedAddReductionCost(
1232	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
1233	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1234
1235	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1236	/// Three cases are handled: 1. scalar instruction 2. vector instruction
1237	/// 3. scalar instruction which is to be vectorized.
1238	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1239	TTI::TargetCostKind CostKind) const;
1240
1241	/// \returns The cost of Call instructions.
1242	InstructionCost getCallInstrCost(
1243	Function F, Type RetTy, ArrayRef<Type *> Tys,
1244	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1245
1246	/// \returns The number of pieces into which the provided type must be
1247	/// split during legalization. Zero is returned when the answer is unknown.
1248	unsigned getNumberOfParts(Type Tp) const*;
1249
1250	/// \returns The cost of the address computation. For most targets this can be
1251	/// merged into the instruction indexing mode. Some targets might want to
1252	/// distinguish between address computation for memory operations on vector
1253	/// types and scalar types. Such targets should override this function.
1254	/// The 'SE' parameter holds pointer for the scalar evolution object which
1255	/// is used in order to get the Ptr step value in case of constant stride.
1256	/// The 'Ptr' parameter holds SCEV of the access pointer.
1257	InstructionCost getAddressComputationCost(Type *Ty,
1258	ScalarEvolution SE = nullptr*,
1259	const SCEV Ptr = nullptr) const*;
1260
1261	/// \returns The cost, if any, of keeping values of the given types alive
1262	/// over a callsite.
1263	///
1264	/// Some types may require the use of register classes that do not have
1265	/// any callee-saved registers, so would require a spill and fill.
1266	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type > Tys) const*;
1267
1268	/// \returns True if the intrinsic is a supported memory intrinsic. Info
1269	/// will contain additional information - whether the intrinsic may write
1270	/// or read to memory, volatility and the pointer. Info is undefined
1271	/// if false is returned.
1272	bool getTgtMemIntrinsic(IntrinsicInst Inst, MemIntrinsicInfo &Info) const*;
1273
1274	/// \returns The maximum element size, in bytes, for an element
1275	/// unordered-atomic memory intrinsic.
1276	unsigned getAtomicMemIntrinsicMaxElementSize() const;
1277
1278	/// \returns A value which is the result of the given memory intrinsic. New
1279	/// instructions may be created to extract the result from the given intrinsic
1280	/// memory operation. Returns nullptr if the target cannot create a result
1281	/// from the given intrinsic.
1282	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
1283	Type ExpectedType) const*;
1284
1285	/// \returns The type to use in a loop expansion of a memcpy call.
1286	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
1287	unsigned SrcAddrSpace, unsigned DestAddrSpace,
1288	unsigned SrcAlign, unsigned DestAlign) const;
1289
1290	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1291	/// \param RemainingBytes The number of bytes to copy.
1292	///
1293	/// Calculates the operand types to use when copying \p RemainingBytes of
1294	/// memory, where source and destination alignments are \p SrcAlign and
1295	/// \p DestAlign respectively.
1296	void getMemcpyLoopResidualLoweringType(
1297	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1298	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1299	unsigned SrcAlign, unsigned DestAlign) const;
1300
1301	/// \returns True if the two functions have compatible attributes for inlining
1302	/// purposes.
1303	bool areInlineCompatible(const Function *Caller,
1304	const Function Callee) const*;
1305
1306	/// \returns True if the caller and callee agree on how \p Types will be
1307	/// passed to or returned from the callee.
1308	/// to the callee.
1309	/// \param Types List of types to check.
1310	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
1311	const ArrayRef<Type > &Types) const*;
1312
1313	/// The type of load/store indexing.
1314	enum MemIndexedMode {
1315	MIM_Unindexed, ///< No indexing.
1316	MIM_PreInc, ///< Pre-incrementing.
1317	MIM_PreDec, ///< Pre-decrementing.
1318	MIM_PostInc, ///< Post-incrementing.
1319	MIM_PostDec ///< Post-decrementing.
1320	};
1321
1322	/// \returns True if the specified indexed load for the given type is legal.
1323	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type Ty) const*;
1324
1325	/// \returns True if the specified indexed store for the given type is legal.
1326	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type Ty) const*;
1327
1328	/// \returns The bitwidth of the largest vector type that should be used to
1329	/// load/store in the given address space.
1330	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1331
1332	/// \returns True if the load instruction is legal to vectorize.
1333	bool isLegalToVectorizeLoad(LoadInst LI) const*;
1334
1335	/// \returns True if the store instruction is legal to vectorize.
1336	bool isLegalToVectorizeStore(StoreInst SI) const*;
1337
1338	/// \returns True if it is legal to vectorize the given load chain.
1339	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1340	unsigned AddrSpace) const;
1341
1342	/// \returns True if it is legal to vectorize the given store chain.
1343	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1344	unsigned AddrSpace) const;
1345
1346	/// \returns True if it is legal to vectorize the given reduction kind.
1347	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1348	ElementCount VF) const;
1349
1350	/// \returns True if the given type is supported for scalable vectors
1351	bool isElementTypeLegalForScalableVector(Type Ty) const*;
1352
1353	/// \returns The new vector factor value if the target doesn't support \p
1354	/// SizeInBytes loads or has a better vector factor.
1355	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1356	unsigned ChainSizeInBytes,
1357	VectorType VecTy) const*;
1358
1359	/// \returns The new vector factor value if the target doesn't support \p
1360	/// SizeInBytes stores or has a better vector factor.
1361	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1362	unsigned ChainSizeInBytes,
1363	VectorType VecTy) const*;
1364
1365	/// Flags describing the kind of vector reduction.
1366	struct ReductionFlags {
1367	ReductionFlags() = default;
1368	bool IsMaxOp =
1369	false; ///< If the op a min/max kind, true if it's a max operation.
1370	bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1371	bool NoNaN =
1372	false; ///< If op is an fp min/max, whether NaNs may be present.
1373	};
1374
1375	/// \returns True if the target prefers reductions in loop.
1376	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1377	ReductionFlags Flags) const;
1378
1379	/// \returns True if the target prefers reductions select kept in the loop
1380	/// when tail folding. i.e.
1381	/// loop:
1382	/// p = phi (0, s)
1383	/// a = add (p, x)
1384	/// s = select (mask, a, p)
1385	/// vecreduce.add(s)
1386	///
1387	/// As opposed to the normal scheme of p = phi (0, a) which allows the select
1388	/// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1389	/// by the target, this can lead to cleaner code generation.
1390	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1391	ReductionFlags Flags) const;
1392
1393	/// \returns True if the target wants to expand the given reduction intrinsic
1394	/// into a shuffle sequence.
1395	bool shouldExpandReduction(const IntrinsicInst II) const*;
1396
1397	/// \returns the size cost of rematerializing a GlobalValue address relative
1398	/// to a stack reload.
1399	unsigned getGISelRematGlobalCost() const;
1400
1401	/// \returns True if the target supports scalable vectors.
1402	bool supportsScalableVectors() const;
1403
1404	/// \return true when scalable vectorization is preferred.
1405	bool enableScalableVectorization() const;
1406
1407	/// \name Vector Predication Information
1408	/// @{
1409	/// Whether the target supports the %evl parameter of VP intrinsic efficiently
1410	/// in hardware, for the given opcode and type/alignment. (see LLVM Language
1411	/// Reference - "Vector Predication Intrinsics").
1412	/// Use of %evl is discouraged when that is not the case.
1413	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1414	Align Alignment) const;
1415
1416	struct VPLegalization {
1417	enum VPTransform {
1418	// keep the predicating parameter
1419	Legal = `0`,
1420	// where legal, discard the predicate parameter
1421	Discard = `1`,
1422	// transform into something else that is also predicating
1423	Convert = `2`
1424	};
1425
1426	// How to transform the EVL parameter.
1427	// Legal: keep the EVL parameter as it is.
1428	// Discard: Ignore the EVL parameter where it is safe to do so.
1429	// Convert: Fold the EVL into the mask parameter.
1430	VPTransform EVLParamStrategy;
1431
1432	// How to transform the operator.
1433	// Legal: The target supports this operator.
1434	// Convert: Convert this to a non-VP operation.
1435	// The 'Discard' strategy is invalid.
1436	VPTransform OpStrategy;
1437
1438	bool shouldDoNothing() const {
1439	return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1440	}
1441	VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
1442	: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
1443	};
1444
1445	/// \returns How the target needs this vector-predicated operation to be
1446	/// transformed.
1447	VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
1448	/// @}
1449
1450	/// @}
1451
1452	private:
1453	/// Estimate the latency of specified instruction.
1454	/// Returns 1 as the default value.
1455	InstructionCost getInstructionLatency(const Instruction I) const*;
1456
1457	/// Returns the expected throughput cost of the instruction.
1458	/// Returns -1 if the cost is unknown.
1459	InstructionCost getInstructionThroughput(const Instruction I) const*;
1460
1461	/// The abstract base class used to type erase specific TTI
1462	/// implementations.
1463	class Concept;
1464
1465	/// The template model for the base class which wraps a concrete
1466	/// implementation in a type erased interface.
1467	template <typename T> class Model;
1468
1469	std::unique_ptr<Concept> TTIImpl;
1470	};
1471
1472	class TargetTransformInfo::Concept {
1473	public:
1474	virtual ~Concept() = `0`;
1475	virtual const DataLayout &getDataLayout() const = `0`;
1476	virtual InstructionCost getGEPCost(Type PointeeType, const* Value *Ptr,
1477	ArrayRef<const Value *> Operands,
1478	TTI::TargetCostKind CostKind) = `0`;
1479	virtual unsigned getInliningThresholdMultiplier() = `0`;
1480	virtual unsigned adjustInliningThreshold(const CallBase *CB) = `0`;
1481	virtual int getInlinerVectorBonusPercent() = `0`;
1482	virtual InstructionCost getMemcpyCost(const Instruction *I) = `0`;
1483	virtual unsigned
1484	getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1485	ProfileSummaryInfo *PSI,
1486	BlockFrequencyInfo *BFI) = `0`;
1487	virtual InstructionCost getUserCost(const User *U,
1488	ArrayRef<const Value *> Operands,
1489	TargetCostKind CostKind) = `0`;
1490	virtual BranchProbability getPredictableBranchThreshold() = `0`;
1491	virtual bool hasBranchDivergence() = `0`;
1492	virtual bool useGPUDivergenceAnalysis() = `0`;
1493	virtual bool isSourceOfDivergence(const Value *V) = `0`;
1494	virtual bool isAlwaysUniform(const Value *V) = `0`;
1495	virtual unsigned getFlatAddressSpace() = `0`;
1496	virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1497	Intrinsic::ID IID) const = `0`;
1498	virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = `0`;
1499	virtual bool
1500	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = `0`;
1501	virtual unsigned getAssumedAddrSpace(const Value V) const* = `0`;
1502	virtual std::pair<const Value , unsigned*>
1503	getPredicatedAddrSpace(const Value V) const* = `0`;
1504	virtual Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II,
1505	Value *OldV,
1506	Value NewV) const* = `0`;
1507	virtual bool isLoweredToCall(const Function *F) = `0`;
1508	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1509	UnrollingPreferences &UP,
1510	OptimizationRemarkEmitter *ORE) = `0`;
1511	virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1512	PeelingPreferences &PP) = `0`;
1513	virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1514	AssumptionCache &AC,
1515	TargetLibraryInfo *LibInfo,
1516	HardwareLoopInfo &HWLoopInfo) = `0`;
1517	virtual bool
1518	preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
1519	AssumptionCache &AC, TargetLibraryInfo *TLI,
1520	DominatorTree DT, const* LoopAccessInfo *LAI) = `0`;
1521	virtual bool emitGetActiveLaneMask() = `0`;
1522	virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1523	IntrinsicInst &II) = `0`;
1524	virtual Optional<Value *>
1525	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1526	APInt DemandedMask, KnownBits &Known,
1527	bool &KnownBitsComputed) = `0`;
1528	virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1529	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1530	APInt &UndefElts2, APInt &UndefElts3,
1531	std::function<void(Instruction , unsigned*, APInt, APInt &)>
1532	SimplifyAndSetOp) = `0`;
1533	virtual bool isLegalAddImmediate(int64_t Imm) = `0`;
1534	virtual bool isLegalICmpImmediate(int64_t Imm) = `0`;
1535	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
1536	int64_t BaseOffset, bool HasBaseReg,
1537	int64_t Scale, unsigned AddrSpace,
1538	Instruction *I) = `0`;
1539	virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1540	TargetTransformInfo::LSRCost &C2) = `0`;
1541	virtual bool isNumRegsMajorCostOfLSR() = `0`;
1542	virtual bool isProfitableLSRChainElement(Instruction *I) = `0`;
1543	virtual bool canMacroFuseCmp() = `0`;
1544	virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
1545	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
1546	TargetLibraryInfo *LibInfo) = `0`;
1547	virtual AddressingModeKind
1548	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = `0`;
1549	virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = `0`;
1550	virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = `0`;
1551	virtual bool isLegalNTStore(Type *DataType, Align Alignment) = `0`;
1552	virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = `0`;
1553	virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = `0`;
1554	virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = `0`;
1555	virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1556	Align Alignment) = `0`;
1557	virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1558	Align Alignment) = `0`;
1559	virtual bool isLegalMaskedCompressStore(Type *DataType) = `0`;
1560	virtual bool isLegalMaskedExpandLoad(Type *DataType) = `0`;
1561	virtual bool enableOrderedReductions() = `0`;
1562	virtual bool hasDivRemOp(Type DataType, bool* IsSigned) = `0`;
1563	virtual bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) = `0`;
1564	virtual bool prefersVectorizedAddressing() = `0`;
1565	virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
1566	int64_t BaseOffset,
1567	bool HasBaseReg, int64_t Scale,
1568	unsigned AddrSpace) = `0`;
1569	virtual bool LSRWithInstrQueries() = `0`;
1570	virtual bool isTruncateFree(Type Ty1, Type Ty2) = `0`;
1571	virtual bool isProfitableToHoist(Instruction *I) = `0`;
1572	virtual bool useAA() = `0`;
1573	virtual bool isTypeLegal(Type *Ty) = `0`;
1574	virtual InstructionCost getRegUsageForType(Type *Ty) = `0`;
1575	virtual bool shouldBuildLookupTables() = `0`;
1576	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = `0`;
1577	virtual bool shouldBuildRelLookupTables() = `0`;
1578	virtual bool useColdCCForColdCall(Function &F) = `0`;
1579	virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
1580	const APInt &DemandedElts,
1581	bool Insert,
1582	bool Extract) = `0`;
1583	virtual InstructionCost
1584	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1585	ArrayRef<Type *> Tys) = `0`;
1586	virtual bool supportsEfficientVectorElementLoadStore() = `0`;
1587	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = `0`;
1588	virtual MemCmpExpansionOptions
1589	enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = `0`;
1590	virtual bool enableInterleavedAccessVectorization() = `0`;
1591	virtual bool enableMaskedInterleavedAccessVectorization() = `0`;
1592	virtual bool isFPVectorizationPotentiallyUnsafe() = `0`;
1593	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1594	unsigned BitWidth,
1595	unsigned AddressSpace,
1596	Align Alignment,
1597	bool *Fast) = `0`;
1598	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = `0`;
1599	virtual bool haveFastSqrt(Type *Ty) = `0`;
1600	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = `0`;
1601	virtual InstructionCost getFPOpCost(Type *Ty) = `0`;
1602	virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1603	const APInt &Imm, Type *Ty) = `0`;
1604	virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1605	TargetCostKind CostKind) = `0`;
1606	virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1607	const APInt &Imm, Type *Ty,
1608	TargetCostKind CostKind,
1609	Instruction Inst = nullptr*) = `0`;
1610	virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1611	const APInt &Imm, Type *Ty,
1612	TargetCostKind CostKind) = `0`;
1613	virtual unsigned getNumberOfRegisters(unsigned ClassID) const = `0`;
1614	virtual unsigned getRegisterClassForType(bool Vector,
1615	Type Ty = nullptr) const* = `0`;
1616	virtual const char getRegisterClassName(unsigned* ClassID) const = `0`;
1617	virtual TypeSize getRegisterBitWidth(RegisterKind K) const = `0`;
1618	virtual unsigned getMinVectorRegisterBitWidth() const = `0`;
1619	virtual Optional<unsigned> getMaxVScale() const = `0`;
1620	virtual Optional<unsigned> getVScaleForTuning() const = `0`;
1621	virtual bool shouldMaximizeVectorBandwidth() const = `0`;
1622	virtual ElementCount getMinimumVF(unsigned ElemWidth,
1623	bool IsScalable) const = `0`;
1624	virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = `0`;
1625	virtual bool shouldConsiderAddressTypePromotion(
1626	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = `0`;
1627	virtual unsigned getCacheLineSize() const = `0`;
1628	virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = `0`;
1629	virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = `0`;
1630
1631	/// \return How much before a load we should place the prefetch
1632	/// instruction. This is currently measured in number of
1633	/// instructions.
1634	virtual unsigned getPrefetchDistance() const = `0`;
1635
1636	/// \return Some HW prefetchers can handle accesses up to a certain
1637	/// constant stride. This is the minimum stride in bytes where it
1638	/// makes sense to start adding SW prefetches. The default is 1,
1639	/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1640	/// even below the HW prefetcher limit, and the arguments provided are
1641	/// meant to serve as a basis for deciding this for a particular loop.
1642	virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1643	unsigned NumStridedMemAccesses,
1644	unsigned NumPrefetches,
1645	bool HasCall) const = `0`;
1646
1647	/// \return The maximum number of iterations to prefetch ahead. If
1648	/// the required number of iterations is more than this number, no
1649	/// prefetching is performed.
1650	virtual unsigned getMaxPrefetchIterationsAhead() const = `0`;
1651
1652	/// \return True if prefetching should also be done for writes.
1653	virtual bool enableWritePrefetching() const = `0`;
1654
1655	virtual unsigned getMaxInterleaveFactor(unsigned VF) = `0`;
1656	virtual InstructionCost getArithmeticInstrCost(
1657	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1658	OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1659	OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1660	ArrayRef<const Value > Args, const* Instruction CxtI = nullptr*) = `0`;
1661	virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1662	ArrayRef<int> Mask, int Index,
1663	VectorType *SubTp) = `0`;
1664	virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1665	Type *Src, CastContextHint CCH,
1666	TTI::TargetCostKind CostKind,
1667	const Instruction *I) = `0`;
1668	virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1669	VectorType *VecTy,
1670	unsigned Index) = `0`;
1671	virtual InstructionCost getCFInstrCost(unsigned Opcode,
1672	TTI::TargetCostKind CostKind,
1673	const Instruction I = nullptr*) = `0`;
1674	virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1675	Type *CondTy,
1676	CmpInst::Predicate VecPred,
1677	TTI::TargetCostKind CostKind,
1678	const Instruction *I) = `0`;
1679	virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1680	unsigned Index) = `0`;
1681
1682	virtual InstructionCost
1683	getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
1684	const APInt &DemandedDstElts,
1685	TTI::TargetCostKind CostKind) = `0`;
1686
1687	virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1688	Align Alignment,
1689	unsigned AddressSpace,
1690	TTI::TargetCostKind CostKind,
1691	const Instruction *I) = `0`;
1692	virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1693	Align Alignment,
1694	unsigned AddressSpace,
1695	TTI::TargetCostKind CostKind,
1696	const Instruction *I) = `0`;
1697	virtual InstructionCost
1698	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1699	unsigned AddressSpace,
1700	TTI::TargetCostKind CostKind) = `0`;
1701	virtual InstructionCost
1702	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
1703	bool VariableMask, Align Alignment,
1704	TTI::TargetCostKind CostKind,
1705	const Instruction I = nullptr*) = `0`;
1706
1707	virtual InstructionCost getInterleavedMemoryOpCost(
1708	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
1709	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1710	bool UseMaskForCond = false, bool UseMaskForGaps = false) = `0`;
1711	virtual InstructionCost
1712	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1713	Optional<FastMathFlags> FMF,
1714	TTI::TargetCostKind CostKind) = `0`;
1715	virtual InstructionCost
1716	getMinMaxReductionCost(VectorType Ty, VectorType CondTy, bool IsUnsigned,
1717	TTI::TargetCostKind CostKind) = `0`;
1718	virtual InstructionCost getExtendedAddReductionCost(
1719	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
1720	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = `0`;
1721	virtual InstructionCost
1722	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1723	TTI::TargetCostKind CostKind) = `0`;
1724	virtual InstructionCost getCallInstrCost(Function F, Type RetTy,
1725	ArrayRef<Type *> Tys,
1726	TTI::TargetCostKind CostKind) = `0`;
1727	virtual unsigned getNumberOfParts(Type *Tp) = `0`;
1728	virtual InstructionCost
1729	getAddressComputationCost(Type Ty, ScalarEvolution SE, const SCEV *Ptr) = `0`;
1730	virtual InstructionCost
1731	getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = `0`;
1732	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1733	MemIntrinsicInfo &Info) = `0`;
1734	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = `0`;
1735	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
1736	Type *ExpectedType) = `0`;
1737	virtual Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
1738	unsigned SrcAddrSpace,
1739	unsigned DestAddrSpace,
1740	unsigned SrcAlign,
1741	unsigned DestAlign) const = `0`;
1742	virtual void getMemcpyLoopResidualLoweringType(
1743	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1744	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1745	unsigned SrcAlign, unsigned DestAlign) const = `0`;
1746	virtual bool areInlineCompatible(const Function *Caller,
1747	const Function Callee) const* = `0`;
1748	virtual bool areTypesABICompatible(const Function *Caller,
1749	const Function *Callee,
1750	const ArrayRef<Type > &Types) const* = `0`;
1751	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
1752	virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
1753	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = `0`;
1754	virtual bool isLegalToVectorizeLoad(LoadInst LI) const* = `0`;
1755	virtual bool isLegalToVectorizeStore(StoreInst SI) const* = `0`;
1756	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1757	Align Alignment,
1758	unsigned AddrSpace) const = `0`;
1759	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1760	Align Alignment,
1761	unsigned AddrSpace) const = `0`;
1762	virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1763	ElementCount VF) const = `0`;
1764	virtual bool isElementTypeLegalForScalableVector(Type Ty) const* = `0`;
1765	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1766	unsigned ChainSizeInBytes,
1767	VectorType VecTy) const* = `0`;
1768	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1769	unsigned ChainSizeInBytes,
1770	VectorType VecTy) const* = `0`;
1771	virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1772	ReductionFlags) const = `0`;
1773	virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1774	ReductionFlags) const = `0`;
1775	virtual bool shouldExpandReduction(const IntrinsicInst II) const* = `0`;
1776	virtual unsigned getGISelRematGlobalCost() const = `0`;
1777	virtual bool enableScalableVectorization() const = `0`;
1778	virtual bool supportsScalableVectors() const = `0`;
1779	virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1780	Align Alignment) const = `0`;
1781	virtual InstructionCost getInstructionLatency(const Instruction *I) = `0`;
1782	virtual VPLegalization
1783	getVPLegalizationStrategy(const VPIntrinsic &PI) const = `0`;
1784	};
1785
1786	template <typename T>
1787	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1788	T Impl;
1789
1790	public:
1791	Model(T Impl) : Impl(std::move(Impl)) {}
1792	~Model() override = default;
1793
1794	const DataLayout &getDataLayout() const override {
1795	return Impl.getDataLayout();
1796	}
1797
1798	InstructionCost
1799	getGEPCost(Type PointeeType, const* Value *Ptr,
1800	ArrayRef<const Value *> Operands,
1801	TargetTransformInfo::TargetCostKind CostKind) override {
1802	return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1803	}
1804	unsigned getInliningThresholdMultiplier() override {
1805	return Impl.getInliningThresholdMultiplier();
1806	}
1807	unsigned adjustInliningThreshold(const CallBase *CB) override {
1808	return Impl.adjustInliningThreshold(CB);
1809	}
1810	int getInlinerVectorBonusPercent() override {
1811	return Impl.getInlinerVectorBonusPercent();
1812	}
1813	InstructionCost getMemcpyCost(const Instruction *I) override {
1814	return Impl.getMemcpyCost(I);
1815	}
1816	InstructionCost getUserCost(const User U, ArrayRef<const* Value *> Operands,
1817	TargetCostKind CostKind) override {
1818	return Impl.getUserCost(U, Operands, CostKind);
1819	}
1820	BranchProbability getPredictableBranchThreshold() override {
1821	return Impl.getPredictableBranchThreshold();
1822	}
1823	bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1824	bool useGPUDivergenceAnalysis() override {
1825	return Impl.useGPUDivergenceAnalysis();
1826	}
1827	bool isSourceOfDivergence(const Value *V) override {
1828	return Impl.isSourceOfDivergence(V);
1829	}
1830
1831	bool isAlwaysUniform(const Value *V) override {
1832	return Impl.isAlwaysUniform(V);
1833	}
1834
1835	unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1836
1837	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1838	Intrinsic::ID IID) const override {
1839	return Impl.collectFlatAddressOperands(OpIndexes, IID);
1840	}
1841
1842	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1843	return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1844	}
1845
1846	bool
1847	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1848	return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1849	}
1850
1851	unsigned getAssumedAddrSpace(const Value V) const* override {
1852	return Impl.getAssumedAddrSpace(V);
1853	}
1854
1855	std::pair<const Value , unsigned*>
1856	getPredicatedAddrSpace(const Value V) const* override {
1857	return Impl.getPredicatedAddrSpace(V);
1858	}
1859
1860	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
1861	Value NewV) const* override {
1862	return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1863	}
1864
1865	bool isLoweredToCall(const Function *F) override {
1866	return Impl.isLoweredToCall(F);
1867	}
1868	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1869	UnrollingPreferences &UP,
1870	OptimizationRemarkEmitter *ORE) override {
1871	return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1872	}
1873	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1874	PeelingPreferences &PP) override {
1875	return Impl.getPeelingPreferences(L, SE, PP);
1876	}
1877	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1878	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1879	HardwareLoopInfo &HWLoopInfo) override {
1880	return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1881	}
1882	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
1883	AssumptionCache &AC, TargetLibraryInfo *TLI,
1884	DominatorTree *DT,
1885	const LoopAccessInfo *LAI) override {
1886	return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1887	}
1888	bool emitGetActiveLaneMask() override {
1889	return Impl.emitGetActiveLaneMask();
1890	}
1891	Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1892	IntrinsicInst &II) override {
1893	return Impl.instCombineIntrinsic(IC, II);
1894	}
1895	Optional<Value *>
1896	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1897	APInt DemandedMask, KnownBits &Known,
1898	bool &KnownBitsComputed) override {
1899	return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1900	KnownBitsComputed);
1901	}
1902	Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1903	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1904	APInt &UndefElts2, APInt &UndefElts3,
1905	std::function<void(Instruction , unsigned*, APInt, APInt &)>
1906	SimplifyAndSetOp) override {
1907	return Impl.simplifyDemandedVectorEltsIntrinsic(
1908	IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1909	SimplifyAndSetOp);
1910	}
1911	bool isLegalAddImmediate(int64_t Imm) override {
1912	return Impl.isLegalAddImmediate(Imm);
1913	}
1914	bool isLegalICmpImmediate(int64_t Imm) override {
1915	return Impl.isLegalICmpImmediate(Imm);
1916	}
1917	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
1918	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1919	Instruction *I) override {
1920	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1921	AddrSpace, I);
1922	}
1923	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1924	TargetTransformInfo::LSRCost &C2) override {
1925	return Impl.isLSRCostLess(C1, C2);
1926	}
1927	bool isNumRegsMajorCostOfLSR() override {
1928	return Impl.isNumRegsMajorCostOfLSR();
1929	}
1930	bool isProfitableLSRChainElement(Instruction *I) override {
1931	return Impl.isProfitableLSRChainElement(I);
1932	}
1933	bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1934	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
1935	DominatorTree DT, AssumptionCache AC,
1936	TargetLibraryInfo *LibInfo) override {
1937	return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1938	}
1939	AddressingModeKind
1940	getPreferredAddressingMode(const Loop *L,
1941	ScalarEvolution SE) const* override {
1942	return Impl.getPreferredAddressingMode(L, SE);
1943	}
1944	bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1945	return Impl.isLegalMaskedStore(DataType, Alignment);
1946	}
1947	bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1948	return Impl.isLegalMaskedLoad(DataType, Alignment);
1949	}
1950	bool isLegalNTStore(Type *DataType, Align Alignment) override {
1951	return Impl.isLegalNTStore(DataType, Alignment);
1952	}
1953	bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1954	return Impl.isLegalNTLoad(DataType, Alignment);
1955	}
1956	bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
1957	return Impl.isLegalMaskedScatter(DataType, Alignment);
1958	}
1959	bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
1960	return Impl.isLegalMaskedGather(DataType, Alignment);
1961	}
1962	bool forceScalarizeMaskedGather(VectorType *DataType,
1963	Align Alignment) override {
1964	return Impl.forceScalarizeMaskedGather(DataType, Alignment);
1965	}
1966	bool forceScalarizeMaskedScatter(VectorType *DataType,
1967	Align Alignment) override {
1968	return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
1969	}
1970	bool isLegalMaskedCompressStore(Type *DataType) override {
1971	return Impl.isLegalMaskedCompressStore(DataType);
1972	}
1973	bool isLegalMaskedExpandLoad(Type *DataType) override {
1974	return Impl.isLegalMaskedExpandLoad(DataType);
1975	}
1976	bool enableOrderedReductions() override {
1977	return Impl.enableOrderedReductions();
1978	}
1979	bool hasDivRemOp(Type DataType, bool* IsSigned) override {
1980	return Impl.hasDivRemOp(DataType, IsSigned);
1981	}
1982	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) override {
1983	return Impl.hasVolatileVariant(I, AddrSpace);
1984	}
1985	bool prefersVectorizedAddressing() override {
1986	return Impl.prefersVectorizedAddressing();
1987	}
1988	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
1989	int64_t BaseOffset, bool HasBaseReg,
1990	int64_t Scale,
1991	unsigned AddrSpace) override {
1992	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1993	AddrSpace);
1994	}
1995	bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
1996	bool isTruncateFree(Type Ty1, Type Ty2) override {
1997	return Impl.isTruncateFree(Ty1, Ty2);
1998	}
1999	bool isProfitableToHoist(Instruction *I) override {
2000	return Impl.isProfitableToHoist(I);
2001	}
2002	bool useAA() override { return Impl.useAA(); }
2003	bool isTypeLegal(Type Ty) override { return* Impl.isTypeLegal(Ty); }
2004	InstructionCost getRegUsageForType(Type *Ty) override {
2005	return Impl.getRegUsageForType(Ty);
2006	}
2007	bool shouldBuildLookupTables() override {
2008	return Impl.shouldBuildLookupTables();
2009	}
2010	bool shouldBuildLookupTablesForConstant(Constant *C) override {
2011	return Impl.shouldBuildLookupTablesForConstant(C);
2012	}
2013	bool shouldBuildRelLookupTables() override {
2014	return Impl.shouldBuildRelLookupTables();
2015	}
2016	bool useColdCCForColdCall(Function &F) override {
2017	return Impl.useColdCCForColdCall(F);
2018	}
2019
2020	InstructionCost getScalarizationOverhead(VectorType *Ty,
2021	const APInt &DemandedElts,
2022	bool Insert, bool Extract) override {
2023	return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
2024	}
2025	InstructionCost
2026	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2027	ArrayRef<Type *> Tys) override {
2028	return Impl.getOperandsScalarizationOverhead(Args, Tys);
2029	}
2030
2031	bool supportsEfficientVectorElementLoadStore() override {
2032	return Impl.supportsEfficientVectorElementLoadStore();
2033	}
2034
2035	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2036	return Impl.enableAggressiveInterleaving(LoopHasReductions);
2037	}
2038	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2039	bool IsZeroCmp) const override {
2040	return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2041	}
2042	bool enableInterleavedAccessVectorization() override {
2043	return Impl.enableInterleavedAccessVectorization();
2044	}
2045	bool enableMaskedInterleavedAccessVectorization() override {
2046	return Impl.enableMaskedInterleavedAccessVectorization();
2047	}
2048	bool isFPVectorizationPotentiallyUnsafe() override {
2049	return Impl.isFPVectorizationPotentiallyUnsafe();
2050	}
2051	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2052	unsigned AddressSpace, Align Alignment,
2053	bool *Fast) override {
2054	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2055	Alignment, Fast);
2056	}
2057	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2058	return Impl.getPopcntSupport(IntTyWidthInBit);
2059	}
2060	bool haveFastSqrt(Type Ty) override { return* Impl.haveFastSqrt(Ty); }
2061
2062	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2063	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2064	}
2065
2066	InstructionCost getFPOpCost(Type *Ty) override {
2067	return Impl.getFPOpCost(Ty);
2068	}
2069
2070	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2071	const APInt &Imm, Type *Ty) override {
2072	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2073	}
2074	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2075	TargetCostKind CostKind) override {
2076	return Impl.getIntImmCost(Imm, Ty, CostKind);
2077	}
2078	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2079	const APInt &Imm, Type *Ty,
2080	TargetCostKind CostKind,
2081	Instruction Inst = nullptr*) override {
2082	return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2083	}
2084	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2085	const APInt &Imm, Type *Ty,
2086	TargetCostKind CostKind) override {
2087	return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2088	}
2089	unsigned getNumberOfRegisters(unsigned ClassID) const override {
2090	return Impl.getNumberOfRegisters(ClassID);
2091	}
2092	unsigned getRegisterClassForType(bool Vector,
2093	Type Ty = nullptr) const* override {
2094	return Impl.getRegisterClassForType(Vector, Ty);
2095	}
2096	const char getRegisterClassName(unsigned* ClassID) const override {
2097	return Impl.getRegisterClassName(ClassID);
2098	}
2099	TypeSize getRegisterBitWidth(RegisterKind K) const override {
2100	return Impl.getRegisterBitWidth(K);
2101	}
2102	unsigned getMinVectorRegisterBitWidth() const override {
2103	return Impl.getMinVectorRegisterBitWidth();
2104	}
2105	Optional<unsigned> getMaxVScale() const override {
2106	return Impl.getMaxVScale();
2107	}
2108	Optional<unsigned> getVScaleForTuning() const override {
2109	return Impl.getVScaleForTuning();
2110	}
2111	bool shouldMaximizeVectorBandwidth() const override {
2112	return Impl.shouldMaximizeVectorBandwidth();
2113	}
2114	ElementCount getMinimumVF(unsigned ElemWidth,
2115	bool IsScalable) const override {
2116	return Impl.getMinimumVF(ElemWidth, IsScalable);
2117	}
2118	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2119	return Impl.getMaximumVF(ElemWidth, Opcode);
2120	}
2121	bool shouldConsiderAddressTypePromotion(
2122	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2123	return Impl.shouldConsiderAddressTypePromotion(
2124	I, AllowPromotionWithoutCommonHeader);
2125	}
2126	unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2127	Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2128	return Impl.getCacheSize(Level);
2129	}
2130	Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2131	return Impl.getCacheAssociativity(Level);
2132	}
2133
2134	/// Return the preferred prefetch distance in terms of instructions.
2135	///
2136	unsigned getPrefetchDistance() const override {
2137	return Impl.getPrefetchDistance();
2138	}
2139
2140	/// Return the minimum stride necessary to trigger software
2141	/// prefetching.
2142	///
2143	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2144	unsigned NumStridedMemAccesses,
2145	unsigned NumPrefetches,
2146	bool HasCall) const override {
2147	return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2148	NumPrefetches, HasCall);
2149	}
2150
2151	/// Return the maximum prefetch distance in terms of loop
2152	/// iterations.
2153	///
2154	unsigned getMaxPrefetchIterationsAhead() const override {
2155	return Impl.getMaxPrefetchIterationsAhead();
2156	}
2157
2158	/// \return True if prefetching should also be done for writes.
2159	bool enableWritePrefetching() const override {
2160	return Impl.enableWritePrefetching();
2161	}
2162
2163	unsigned getMaxInterleaveFactor(unsigned VF) override {
2164	return Impl.getMaxInterleaveFactor(VF);
2165	}
2166	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2167	unsigned &JTSize,
2168	ProfileSummaryInfo *PSI,
2169	BlockFrequencyInfo *BFI) override {
2170	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2171	}
2172	InstructionCost getArithmeticInstrCost(
2173	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2174	OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2175	OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2176	ArrayRef<const Value *> Args,
2177	const Instruction CxtI = nullptr*) override {
2178	return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2179	Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2180	}
2181	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2182	ArrayRef<int> Mask, int Index,
2183	VectorType *SubTp) override {
2184	return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp);
2185	}
2186	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
2187	CastContextHint CCH,
2188	TTI::TargetCostKind CostKind,
2189	const Instruction *I) override {
2190	return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2191	}
2192	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2193	VectorType *VecTy,
2194	unsigned Index) override {
2195	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2196	}
2197	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2198	const Instruction I = nullptr*) override {
2199	return Impl.getCFInstrCost(Opcode, CostKind, I);
2200	}
2201	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
2202	CmpInst::Predicate VecPred,
2203	TTI::TargetCostKind CostKind,
2204	const Instruction *I) override {
2205	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2206	}
2207	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2208	unsigned Index) override {
2209	return Impl.getVectorInstrCost(Opcode, Val, Index);
2210	}
2211	InstructionCost
2212	getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
2213	const APInt &DemandedDstElts,
2214	TTI::TargetCostKind CostKind) override {
2215	return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2216	DemandedDstElts, CostKind);
2217	}
2218	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2219	unsigned AddressSpace,
2220	TTI::TargetCostKind CostKind,
2221	const Instruction *I) override {
2222	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2223	CostKind, I);
2224	}
2225	InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2226	unsigned AddressSpace,
2227	TTI::TargetCostKind CostKind,
2228	const Instruction *I) override {
2229	return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2230	CostKind, I);
2231	}
2232	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2233	Align Alignment, unsigned AddressSpace,
2234	TTI::TargetCostKind CostKind) override {
2235	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2236	CostKind);
2237	}
2238	InstructionCost
2239	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2240	bool VariableMask, Align Alignment,
2241	TTI::TargetCostKind CostKind,
2242	const Instruction I = nullptr*) override {
2243	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2244	Alignment, CostKind, I);
2245	}
2246	InstructionCost getInterleavedMemoryOpCost(
2247	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
2248	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2249	bool UseMaskForCond, bool UseMaskForGaps) override {
2250	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2251	Alignment, AddressSpace, CostKind,
2252	UseMaskForCond, UseMaskForGaps);
2253	}
2254	InstructionCost
2255	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2256	Optional<FastMathFlags> FMF,
2257	TTI::TargetCostKind CostKind) override {
2258	return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2259	}
2260	InstructionCost
2261	getMinMaxReductionCost(VectorType Ty, VectorType CondTy, bool IsUnsigned,
2262	TTI::TargetCostKind CostKind) override {
2263	return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2264	}
2265	InstructionCost getExtendedAddReductionCost(
2266	bool IsMLA, bool IsUnsigned, Type ResTy, VectorType Ty,
2267	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
2268	return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2269	CostKind);
2270	}
2271	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2272	TTI::TargetCostKind CostKind) override {
2273	return Impl.getIntrinsicInstrCost(ICA, CostKind);
2274	}
2275	InstructionCost getCallInstrCost(Function F, Type RetTy,
2276	ArrayRef<Type *> Tys,
2277	TTI::TargetCostKind CostKind) override {
2278	return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2279	}
2280	unsigned getNumberOfParts(Type *Tp) override {
2281	return Impl.getNumberOfParts(Tp);
2282	}
2283	InstructionCost getAddressComputationCost(Type Ty, ScalarEvolution SE,
2284	const SCEV *Ptr) override {
2285	return Impl.getAddressComputationCost(Ty, SE, Ptr);
2286	}
2287	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2288	return Impl.getCostOfKeepingLiveOverCall(Tys);
2289	}
2290	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2291	MemIntrinsicInfo &Info) override {
2292	return Impl.getTgtMemIntrinsic(Inst, Info);
2293	}
2294	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2295	return Impl.getAtomicMemIntrinsicMaxElementSize();
2296	}
2297	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
2298	Type *ExpectedType) override {
2299	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2300	}
2301	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
2302	unsigned SrcAddrSpace, unsigned DestAddrSpace,
2303	unsigned SrcAlign,
2304	unsigned DestAlign) const override {
2305	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2306	DestAddrSpace, SrcAlign, DestAlign);
2307	}
2308	void getMemcpyLoopResidualLoweringType(
2309	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2310	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2311	unsigned SrcAlign, unsigned DestAlign) const override {
2312	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2313	SrcAddrSpace, DestAddrSpace,
2314	SrcAlign, DestAlign);
2315	}
2316	bool areInlineCompatible(const Function *Caller,
2317	const Function Callee) const* override {
2318	return Impl.areInlineCompatible(Caller, Callee);
2319	}
2320	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
2321	const ArrayRef<Type > &Types) const* override {
2322	return Impl.areTypesABICompatible(Caller, Callee, Types);
2323	}
2324	bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* override {
2325	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2326	}
2327	bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* override {
2328	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2329	}
2330	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2331	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2332	}
2333	bool isLegalToVectorizeLoad(LoadInst LI) const* override {
2334	return Impl.isLegalToVectorizeLoad(LI);
2335	}
2336	bool isLegalToVectorizeStore(StoreInst SI) const* override {
2337	return Impl.isLegalToVectorizeStore(SI);
2338	}
2339	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2340	unsigned AddrSpace) const override {
2341	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2342	AddrSpace);
2343	}
2344	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2345	unsigned AddrSpace) const override {
2346	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2347	AddrSpace);
2348	}
2349	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2350	ElementCount VF) const override {
2351	return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2352	}
2353	bool isElementTypeLegalForScalableVector(Type Ty) const* override {
2354	return Impl.isElementTypeLegalForScalableVector(Ty);
2355	}
2356	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2357	unsigned ChainSizeInBytes,
2358	VectorType VecTy) const* override {
2359	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2360	}
2361	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2362	unsigned ChainSizeInBytes,
2363	VectorType VecTy) const* override {
2364	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2365	}
2366	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2367	ReductionFlags Flags) const override {
2368	return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2369	}
2370	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2371	ReductionFlags Flags) const override {
2372	return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2373	}
2374	bool shouldExpandReduction(const IntrinsicInst II) const* override {
2375	return Impl.shouldExpandReduction(II);
2376	}
2377
2378	unsigned getGISelRematGlobalCost() const override {
2379	return Impl.getGISelRematGlobalCost();
2380	}
2381
2382	bool supportsScalableVectors() const override {
2383	return Impl.supportsScalableVectors();
2384	}
2385
2386	bool enableScalableVectorization() const override {
2387	return Impl.enableScalableVectorization();
2388	}
2389
2390	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2391	Align Alignment) const override {
2392	return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2393	}
2394
2395	InstructionCost getInstructionLatency(const Instruction *I) override {
2396	return Impl.getInstructionLatency(I);
2397	}
2398
2399	VPLegalization
2400	getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2401	return Impl.getVPLegalizationStrategy(PI);
2402	}
2403	};
2404
2405	template <typename T>
2406	TargetTransformInfo::TargetTransformInfo(T Impl)
2407	: TTIImpl(new Model<T>(Impl)) {}
2408
2409	/// Analysis pass providing the \c TargetTransformInfo.
2410	///
2411	/// The core idea of the TargetIRAnalysis is to expose an interface through
2412	/// which LLVM targets can analyze and provide information about the middle
2413	/// end's target-independent IR. This supports use cases such as target-aware
2414	/// cost modeling of IR constructs.
2415	///
2416	/// This is a function analysis because much of the cost modeling for targets
2417	/// is done in a subtarget specific way and LLVM supports compiling different
2418	/// functions targeting different subtargets in order to support runtime
2419	/// dispatch according to the observed subtarget.
2420	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2421	public:
2422	typedef TargetTransformInfo Result;
2423
2424	/// Default construct a target IR analysis.
2425	///
2426	/// This will use the module's datalayout to construct a baseline
2427	/// conservative TTI result.
2428	TargetIRAnalysis();
2429
2430	/// Construct an IR analysis pass around a target-provide callback.
2431	///
2432	/// The callback will be called with a particular function for which the TTI
2433	/// is needed and must return a TTI object for that function.
2434	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2435
2436	// Value semantics. We spell out the constructors for MSVC.
2437	TargetIRAnalysis(const TargetIRAnalysis &Arg)
2438	: TTICallback (Arg.TTICallback) {}
2439	TargetIRAnalysis(TargetIRAnalysis &&Arg)
2440	: TTICallback (std::move(Arg.TTICallback)) {}
2441	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2442	TTICallback = RHS.TTICallback;
2443	return *this;
2444	}
2445	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2446	TTICallback = std::move(RHS.TTICallback);
2447	return *this;
2448	}
2449
2450	Result run(const Function &F, FunctionAnalysisManager &);
2451
2452	private:
2453	friend AnalysisInfoMixin<TargetIRAnalysis>;
2454	static AnalysisKey Key;
2455
2456	/// The callback used to produce a result.
2457	///
2458	/// We use a completely opaque callback so that targets can provide whatever
2459	/// mechanism they desire for constructing the TTI for a given function.
2460	///
2461	/// FIXME: Should we really use std::function? It's relatively inefficient.
2462	/// It might be possible to arrange for even stateful callbacks to outlive
2463	/// the analysis and thus use a function_ref which would be lighter weight.
2464	/// This may also be less error prone as the callback is likely to reference
2465	/// the external TargetMachine, and that reference needs to never dangle.
2466	std::function<Result(const Function &)> TTICallback;
2467
2468	/// Helper function used as the callback in the default constructor.
2469	static Result getDefaultTTI(const Function &F);
2470	};
2471
2472	/// Wrapper pass for TargetTransformInfo.
2473	///
2474	/// This pass can be constructed from a TTI object which it stores internally
2475	/// and is queried by passes.
2476	class TargetTransformInfoWrapperPass : public ImmutablePass {
2477	TargetIRAnalysis TIRA;
2478	Optional<TargetTransformInfo> TTI;
2479
2480	virtual void anchor();
2481
2482	public:
2483	static char ID;
2484
2485	/// We must provide a default constructor for the pass but it should
2486	/// never be used.
2487	///
2488	/// Use the constructor below or call one of the creation routines.
2489	TargetTransformInfoWrapperPass();
2490
2491	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2492
2493	TargetTransformInfo &getTTI(const Function &F);
2494	};
2495
2496	/// Create an analysis pass wrapper around a TTI object.
2497	///
2498	/// This analysis pass just holds the TTI instance and makes it available to
2499	/// clients.
2500	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2501
2502	} // namespace llvm
2503
2504	#endif
2505

Browse the source code of include/llvm-14/llvm/Analysis/TargetTransformInfo.h