TargetTransformInfo.h source code [include/llvm-8/llvm/Analysis/TargetTransformInfo.h]

1	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
2	//
3	// The LLVM Compiler Infrastructure
4	//
5	// This file is distributed under the University of Illinois Open Source
6	// License. See LICENSE.TXT for details.
7	//
8	//===----------------------------------------------------------------------===//
9	/// \file
10	/// This pass exposes codegen information to IR-level passes. Every
11	/// transformation that uses codegen information is broken into three parts:
12	/// 1. The IR-level analysis pass.
13	/// 2. The IR-level transformation interface which provides the needed
14	/// information.
15	/// 3. Codegen-level implementation which uses target-specific hooks.
16	///
17	/// This file defines #2, which is the interface that IR-level transformations
18	/// use for querying the codegen.
19	///
20	//===----------------------------------------------------------------------===//
21
22	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24
25	#include "llvm/ADT/Optional.h"
26	#include "llvm/IR/Operator.h"
27	#include "llvm/IR/PassManager.h"
28	#include "llvm/Pass.h"
29	#include "llvm/Support/AtomicOrdering.h"
30	#include "llvm/Support/DataTypes.h"
31	#include <functional>
32
33	namespace llvm {
34
35	namespace Intrinsic {
36	enum ID : unsigned;
37	}
38
39	class Function;
40	class GlobalValue;
41	class IntrinsicInst;
42	class LoadInst;
43	class Loop;
44	class SCEV;
45	class ScalarEvolution;
46	class StoreInst;
47	class SwitchInst;
48	class Type;
49	class User;
50	class Value;
51
52	/// Information about a load/store intrinsic defined by the target.
53	struct MemIntrinsicInfo {
54	/// This is the pointer that the intrinsic is loading from or storing to.
55	/// If this is non-null, then analysis/optimization passes can assume that
56	/// this intrinsic is functionally equivalent to a load/store from this
57	/// pointer.
58	Value PtrVal = nullptr*;
59
60	// Ordering for atomic operations.
61	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
62
63	// Same Id is set by the target for corresponding load/store intrinsics.
64	unsigned short MatchingId = `0`;
65
66	bool ReadMem = false;
67	bool WriteMem = false;
68	bool IsVolatile = false;
69
70	bool isUnordered() const {
71	return (Ordering == AtomicOrdering::NotAtomic \|\|
72	Ordering == AtomicOrdering::Unordered) && !IsVolatile;
73	}
74	};
75
76	/// This pass provides access to the codegen interfaces that are needed
77	/// for IR-level transformations.
78	class TargetTransformInfo {
79	public:
80	/// Construct a TTI object using a type implementing the \c Concept
81	/// API below.
82	///
83	/// This is used by targets to construct a TTI wrapping their target-specific
84	/// implementaion that encodes appropriate costs for their target.
85	template <typename T> TargetTransformInfo(T Impl);
86
87	/// Construct a baseline TTI object using a minimal implementation of
88	/// the \c Concept API below.
89	///
90	/// The TTI implementation will reflect the information in the DataLayout
91	/// provided if non-null.
92	explicit TargetTransformInfo(const DataLayout &DL);
93
94	// Provide move semantics.
95	TargetTransformInfo(TargetTransformInfo &&Arg);
96	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
97
98	// We need to define the destructor out-of-line to define our sub-classes
99	// out-of-line.
100	~TargetTransformInfo();
101
102	/// Handle the invalidation of this information.
103	///
104	/// When used as a result of \c TargetIRAnalysis this method will be called
105	/// when the function this was computed for changes. When it returns false,
106	/// the information is preserved across those changes.
107	bool invalidate(Function &, const PreservedAnalyses &,
108	FunctionAnalysisManager::Invalidator &) {
109	// FIXME: We should probably in some way ensure that the subtarget
110	// information for a function hasn't changed.
111	return false;
112	}
113
114	/// \name Generic Target Information
115	/// @{
116
117	/// The kind of cost model.
118	///
119	/// There are several different cost models that can be customized by the
120	/// target. The normalization of each cost model may be target specific.
121	enum TargetCostKind {
122	TCK_RecipThroughput, ///< Reciprocal throughput.
123	TCK_Latency, ///< The latency of instruction.
124	TCK_CodeSize ///< Instruction code size.
125	};
126
127	/// Query the cost of a specified instruction.
128	///
129	/// Clients should use this interface to query the cost of an existing
130	/// instruction. The instruction must have a valid parent (basic block).
131	///
132	/// Note, this method does not cache the cost calculation and it
133	/// can be expensive in some cases.
134	int getInstructionCost(const Instruction I, enum* TargetCostKind kind) const {
135	switch (kind){
136	case TCK_RecipThroughput:
137	return getInstructionThroughput(I);
138
139	case TCK_Latency:
140	return getInstructionLatency(I);
141
142	case TCK_CodeSize:
143	return getUserCost(I);
144	}
145	llvm_unreachable("Unknown instruction cost kind");
146	}
147
148	/// Underlying constants for 'cost' values in this interface.
149	///
150	/// Many APIs in this interface return a cost. This enum defines the
151	/// fundamental values that should be used to interpret (and produce) those
152	/// costs. The costs are returned as an int rather than a member of this
153	/// enumeration because it is expected that the cost of one IR instruction
154	/// may have a multiplicative factor to it or otherwise won't fit directly
155	/// into the enum. Moreover, it is common to sum or average costs which works
156	/// better as simple integral values. Thus this enum only provides constants.
157	/// Also note that the returned costs are signed integers to make it natural
158	/// to add, subtract, and test with zero (a common boundary condition). It is
159	/// not expected that 2^32 is a realistic cost to be modeling at any point.
160	///
161	/// Note that these costs should usually reflect the intersection of code-size
162	/// cost and execution cost. A free instruction is typically one that folds
163	/// into another instruction. For example, reg-to-reg moves can often be
164	/// skipped by renaming the registers in the CPU, but they still are encoded
165	/// and thus wouldn't be considered 'free' here.
166	enum TargetCostConstants {
167	TCC_Free = `0`, ///< Expected to fold away in lowering.
168	TCC_Basic = `1`, ///< The cost of a typical 'add' instruction.
169	TCC_Expensive = `4` ///< The cost of a 'div' instruction on x86.
170	};
171
172	/// Estimate the cost of a specific operation when lowered.
173	///
174	/// Note that this is designed to work on an arbitrary synthetic opcode, and
175	/// thus work for hypothetical queries before an instruction has even been
176	/// formed. However, this does not* work for GEPs, and must not be called*
177	/// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
178	/// analyzing a GEP's cost required more information.
179	///
180	/// Typically only the result type is required, and the operand type can be
181	/// omitted. However, if the opcode is one of the cast instructions, the
182	/// operand type is required.
183	///
184	/// The returned cost is defined in terms of \c TargetCostConstants, see its
185	/// comments for a detailed explanation of the cost values.
186	int getOperationCost(unsigned Opcode, Type Ty, Type OpTy = nullptr) const;
187
188	/// Estimate the cost of a GEP operation when lowered.
189	///
190	/// The contract for this function is the same as \c getOperationCost except
191	/// that it supports an interface that provides extra information specific to
192	/// the GEP operation.
193	int getGEPCost(Type PointeeType, const* Value *Ptr,
194	ArrayRef<const Value > Operands) const*;
195
196	/// Estimate the cost of a EXT operation when lowered.
197	///
198	/// The contract for this function is the same as \c getOperationCost except
199	/// that it supports an interface that provides extra information specific to
200	/// the EXT operation.
201	int getExtCost(const Instruction I, const* Value Src) const*;
202
203	/// Estimate the cost of a function call when lowered.
204	///
205	/// The contract for this is the same as \c getOperationCost except that it
206	/// supports an interface that provides extra information specific to call
207	/// instructions.
208	///
209	/// This is the most basic query for estimating call cost: it only knows the
210	/// function type and (potentially) the number of arguments at the call site.
211	/// The latter is only interesting for varargs function types.
212	int getCallCost(FunctionType FTy, int* NumArgs = -`1`) const;
213
214	/// Estimate the cost of calling a specific function when lowered.
215	///
216	/// This overload adds the ability to reason about the particular function
217	/// being called in the event it is a library call with special lowering.
218	int getCallCost(const Function F, int* NumArgs = -`1`) const;
219
220	/// Estimate the cost of calling a specific function when lowered.
221	///
222	/// This overload allows specifying a set of candidate argument values.
223	int getCallCost(const Function F, ArrayRef<const* Value > Arguments) const*;
224
225	/// \returns A value by which our inlining threshold should be multiplied.
226	/// This is primarily used to bump up the inlining threshold wholesale on
227	/// targets where calls are unusually expensive.
228	///
229	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
230	/// individual classes of instructions would be better.
231	unsigned getInliningThresholdMultiplier() const;
232
233	/// Estimate the cost of an intrinsic when lowered.
234	///
235	/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
236	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
237	ArrayRef<Type > ParamTys) const*;
238
239	/// Estimate the cost of an intrinsic when lowered.
240	///
241	/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
242	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
243	ArrayRef<const Value > Arguments) const*;
244
245	/// \return The estimated number of case clusters when lowering \p 'SI'.
246	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
247	/// table.
248	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
249	unsigned &JTSize) const;
250
251	/// Estimate the cost of a given IR user when lowered.
252	///
253	/// This can estimate the cost of either a ConstantExpr or Instruction when
254	/// lowered. It has two primary advantages over the \c getOperationCost and
255	/// \c getGEPCost above, and one significant disadvantage: it can only be
256	/// used when the IR construct has already been formed.
257	///
258	/// The advantages are that it can inspect the SSA use graph to reason more
259	/// accurately about the cost. For example, all-constant-GEPs can often be
260	/// folded into a load or other instruction, but if they are used in some
261	/// other context they may not be folded. This routine can distinguish such
262	/// cases.
263	///
264	/// \p Operands is a list of operands which can be a result of transformations
265	/// of the current operands. The number of the operands on the list must equal
266	/// to the number of the current operands the IR user has. Their order on the
267	/// list must be the same as the order of the current operands the IR user
268	/// has.
269	///
270	/// The returned cost is defined in terms of \c TargetCostConstants, see its
271	/// comments for a detailed explanation of the cost values.
272	int getUserCost(const User U, ArrayRef<const* Value > Operands) const*;
273
274	/// This is a helper function which calls the two-argument getUserCost
275	/// with \p Operands which are the current operands U has.
276	int getUserCost(const User U) const* {
277	SmallVector<const Value *, `4`> Operands(U->value_op_begin(),
278	U->value_op_end());
279	return getUserCost(U, Operands);
280	}
281
282	/// Return true if branch divergence exists.
283	///
284	/// Branch divergence has a significantly negative impact on GPU performance
285	/// when threads in the same wavefront take different paths due to conditional
286	/// branches.
287	bool hasBranchDivergence() const;
288
289	/// Returns whether V is a source of divergence.
290	///
291	/// This function provides the target-dependent information for
292	/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
293	/// builds the dependency graph, and then runs the reachability algorithm
294	/// starting with the sources of divergence.
295	bool isSourceOfDivergence(const Value V) const*;
296
297	// Returns true for the target specific
298	// set of operations which produce uniform result
299	// even taking non-unform arguments
300	bool isAlwaysUniform(const Value V) const*;
301
302	/// Returns the address space ID for a target's 'flat' address space. Note
303	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
304	/// refers to as the generic address space. The flat address space is a
305	/// generic address space that can be used access multiple segments of memory
306	/// with different address spaces. Access of a memory location through a
307	/// pointer with this address space is expected to be legal but slower
308	/// compared to the same memory location accessed through a pointer with a
309	/// different address space.
310	//
311	/// This is for targets with different pointer representations which can
312	/// be converted with the addrspacecast instruction. If a pointer is converted
313	/// to this address space, optimizations should attempt to replace the access
314	/// with the source address space.
315	///
316	/// \returns ~0u if the target does not have such a flat address space to
317	/// optimize away.
318	unsigned getFlatAddressSpace() const;
319
320	/// Test whether calls to a function lower to actual program function
321	/// calls.
322	///
323	/// The idea is to test whether the program is likely to require a 'call'
324	/// instruction or equivalent in order to call the given function.
325	///
326	/// FIXME: It's not clear that this is a good or useful query API. Client's
327	/// should probably move to simpler cost metrics using the above.
328	/// Alternatively, we could split the cost interface into distinct code-size
329	/// and execution-speed costs. This would allow modelling the core of this
330	/// query more accurately as a call is a single small instruction, but
331	/// incurs significant execution cost.
332	bool isLoweredToCall(const Function F) const*;
333
334	struct LSRCost {
335	/// TODO: Some of these could be merged. Also, a lexical ordering
336	/// isn't always optimal.
337	unsigned Insns;
338	unsigned NumRegs;
339	unsigned AddRecCost;
340	unsigned NumIVMuls;
341	unsigned NumBaseAdds;
342	unsigned ImmCost;
343	unsigned SetupCost;
344	unsigned ScaleCost;
345	};
346
347	/// Parameters that control the generic loop unrolling transformation.
348	struct UnrollingPreferences {
349	/// The cost threshold for the unrolled loop. Should be relative to the
350	/// getUserCost values returned by this API, and the expectation is that
351	/// the unrolled loop's instructions when run through that interface should
352	/// not exceed this cost. However, this is only an estimate. Also, specific
353	/// loops may be unrolled even with a cost above this threshold if deemed
354	/// profitable. Set this to UINT_MAX to disable the loop body cost
355	/// restriction.
356	unsigned Threshold;
357	/// If complete unrolling will reduce the cost of the loop, we will boost
358	/// the Threshold by a certain percent to allow more aggressive complete
359	/// unrolling. This value provides the maximum boost percentage that we
360	/// can apply to Threshold (The value should be no less than 100).
361	/// BoostedThreshold = Threshold min(RolledCost / UnrolledCost,*
362	/// MaxPercentThresholdBoost / 100)
363	/// E.g. if complete unrolling reduces the loop execution time by 50%
364	/// then we boost the threshold by the factor of 2x. If unrolling is not
365	/// expected to reduce the running time, then we do not increase the
366	/// threshold.
367	unsigned MaxPercentThresholdBoost;
368	/// The cost threshold for the unrolled loop when optimizing for size (set
369	/// to UINT_MAX to disable).
370	unsigned OptSizeThreshold;
371	/// The cost threshold for the unrolled loop, like Threshold, but used
372	/// for partial/runtime unrolling (set to UINT_MAX to disable).
373	unsigned PartialThreshold;
374	/// The cost threshold for the unrolled loop when optimizing for size, like
375	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
376	/// UINT_MAX to disable).
377	unsigned PartialOptSizeThreshold;
378	/// A forced unrolling factor (the number of concatenated bodies of the
379	/// original loop in the unrolled loop body). When set to 0, the unrolling
380	/// transformation will select an unrolling factor based on the current cost
381	/// threshold and other factors.
382	unsigned Count;
383	/// A forced peeling factor (the number of bodied of the original loop
384	/// that should be peeled off before the loop body). When set to 0, the
385	/// unrolling transformation will select a peeling factor based on profile
386	/// information and other factors.
387	unsigned PeelCount;
388	/// Default unroll count for loops with run-time trip count.
389	unsigned DefaultUnrollRuntimeCount;
390	// Set the maximum unrolling factor. The unrolling factor may be selected
391	// using the appropriate cost threshold, but may not exceed this number
392	// (set to UINT_MAX to disable). This does not apply in cases where the
393	// loop is being fully unrolled.
394	unsigned MaxCount;
395	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
396	/// applies even if full unrolling is selected. This allows a target to fall
397	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
398	unsigned FullUnrollMaxCount;
399	// Represents number of instructions optimized when "back edge"
400	// becomes "fall through" in unrolled loop.
401	// For now we count a conditional branch on a backedge and a comparison
402	// feeding it.
403	unsigned BEInsns;
404	/// Allow partial unrolling (unrolling of loops to expand the size of the
405	/// loop body, not only to eliminate small constant-trip-count loops).
406	bool Partial;
407	/// Allow runtime unrolling (unrolling of loops to expand the size of the
408	/// loop body even when the number of loop iterations is not known at
409	/// compile time).
410	bool Runtime;
411	/// Allow generation of a loop remainder (extra iterations after unroll).
412	bool AllowRemainder;
413	/// Allow emitting expensive instructions (such as divisions) when computing
414	/// the trip count of a loop for runtime unrolling.
415	bool AllowExpensiveTripCount;
416	/// Apply loop unroll on any kind of loop
417	/// (mainly to loops that fail runtime unrolling).
418	bool Force;
419	/// Allow using trip count upper bound to unroll loops.
420	bool UpperBound;
421	/// Allow peeling off loop iterations for loops with low dynamic tripcount.
422	bool AllowPeeling;
423	/// Allow unrolling of all the iterations of the runtime loop remainder.
424	bool UnrollRemainder;
425	/// Allow unroll and jam. Used to enable unroll and jam for the target.
426	bool UnrollAndJam;
427	/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
428	/// value above is used during unroll and jam for the outer loop size.
429	/// This value is used in the same manner to limit the size of the inner
430	/// loop.
431	unsigned UnrollAndJamInnerLoopThreshold;
432	};
433
434	/// Get target-customized preferences for the generic loop unrolling
435	/// transformation. The caller will initialize UP with the current
436	/// target-independent defaults.
437	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
438	UnrollingPreferences &UP) const;
439
440	/// @}
441
442	/// \name Scalar Target Information
443	/// @{
444
445	/// Flags indicating the kind of support for population count.
446	///
447	/// Compared to the SW implementation, HW support is supposed to
448	/// significantly boost the performance when the population is dense, and it
449	/// may or may not degrade performance if the population is sparse. A HW
450	/// support is considered as "Fast" if it can outperform, or is on a par
451	/// with, SW implementation when the population is sparse; otherwise, it is
452	/// considered as "Slow".
453	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
454
455	/// Return true if the specified immediate is legal add immediate, that
456	/// is the target has add instructions which can add a register with the
457	/// immediate without having to materialize the immediate into a register.
458	bool isLegalAddImmediate(int64_t Imm) const;
459
460	/// Return true if the specified immediate is legal icmp immediate,
461	/// that is the target has icmp instructions which can compare a register
462	/// against the immediate without having to materialize the immediate into a
463	/// register.
464	bool isLegalICmpImmediate(int64_t Imm) const;
465
466	/// Return true if the addressing mode represented by AM is legal for
467	/// this target, for a load/store of the specified type.
468	/// The type may be VoidTy, in which case only return true if the addressing
469	/// mode is legal for a load/store of any legal type.
470	/// If target returns true in LSRWithInstrQueries(), I may be valid.
471	/// TODO: Handle pre/postinc as well.
472	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
473	bool HasBaseReg, int64_t Scale,
474	unsigned AddrSpace = `0`,
475	Instruction I = nullptr) const*;
476
477	/// Return true if LSR cost of C1 is lower than C1.
478	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
479	TargetTransformInfo::LSRCost &C2) const;
480
481	/// Return true if the target can fuse a compare and branch.
482	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
483	/// calculation for the instructions in a loop.
484	bool canMacroFuseCmp() const;
485
486	/// \return True is LSR should make efforts to create/preserve post-inc
487	/// addressing mode expressions.
488	bool shouldFavorPostInc() const;
489
490	/// Return true if the target supports masked load/store
491	/// AVX2 and AVX-512 targets allow masks for consecutive load and store
492	bool isLegalMaskedStore(Type DataType) const*;
493	bool isLegalMaskedLoad(Type DataType) const*;
494
495	/// Return true if the target supports masked gather/scatter
496	/// AVX-512 fully supports gather and scatter for vectors with 32 and 64
497	/// bits scalar type.
498	bool isLegalMaskedScatter(Type DataType) const*;
499	bool isLegalMaskedGather(Type DataType) const*;
500
501	/// Return true if the target has a unified operation to calculate division
502	/// and remainder. If so, the additional implicit multiplication and
503	/// subtraction required to calculate a remainder from division are free. This
504	/// can enable more aggressive transformations for division and remainder than
505	/// would typically be allowed using throughput or size cost models.
506	bool hasDivRemOp(Type DataType, bool* IsSigned) const;
507
508	/// Return true if the given instruction (assumed to be a memory access
509	/// instruction) has a volatile variant. If that's the case then we can avoid
510	/// addrspacecast to generic AS for volatile loads/stores. Default
511	/// implementation returns false, which prevents address space inference for
512	/// volatile loads/stores.
513	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const;
514
515	/// Return true if target doesn't mind addresses in vectors.
516	bool prefersVectorizedAddressing() const;
517
518	/// Return the cost of the scaling factor used in the addressing
519	/// mode represented by AM for this target, for a load/store
520	/// of the specified type.
521	/// If the AM is supported, the return value must be >= 0.
522	/// If the AM is not supported, it returns a negative value.
523	/// TODO: Handle pre/postinc as well.
524	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
525	bool HasBaseReg, int64_t Scale,
526	unsigned AddrSpace = `0`) const;
527
528	/// Return true if the loop strength reduce pass should make
529	/// Instruction based TTI queries to isLegalAddressingMode(). This is*
530	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
531	/// immediate offset and no index register.
532	bool LSRWithInstrQueries() const;
533
534	/// Return true if it's free to truncate a value of type Ty1 to type
535	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
536	/// by referencing its sub-register AX.
537	bool isTruncateFree(Type Ty1, Type Ty2) const;
538
539	/// Return true if it is profitable to hoist instruction in the
540	/// then/else to before if.
541	bool isProfitableToHoist(Instruction I) const*;
542
543	bool useAA() const;
544
545	/// Return true if this type is legal.
546	bool isTypeLegal(Type Ty) const*;
547
548	/// Returns the target's jmp_buf alignment in bytes.
549	unsigned getJumpBufAlignment() const;
550
551	/// Returns the target's jmp_buf size in bytes.
552	unsigned getJumpBufSize() const;
553
554	/// Return true if switches should be turned into lookup tables for the
555	/// target.
556	bool shouldBuildLookupTables() const;
557
558	/// Return true if switches should be turned into lookup tables
559	/// containing this constant value for the target.
560	bool shouldBuildLookupTablesForConstant(Constant C) const*;
561
562	/// Return true if the input function which is cold at all call sites,
563	/// should use coldcc calling convention.
564	bool useColdCCForColdCall(Function &F) const;
565
566	unsigned getScalarizationOverhead(Type Ty, bool* Insert, bool Extract) const;
567
568	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
569	unsigned VF) const;
570
571	/// If target has efficient vector element load/store instructions, it can
572	/// return true here so that insertion/extraction costs are not added to
573	/// the scalarization cost of a load/store.
574	bool supportsEfficientVectorElementLoadStore() const;
575
576	/// Don't restrict interleaved unrolling to small loops.
577	bool enableAggressiveInterleaving(bool LoopHasReductions) const;
578
579	/// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
580	/// true if this is the expansion of memcmp(p1, p2, s) == 0.
581	struct MemCmpExpansionOptions {
582	// The list of available load sizes (in bytes), sorted in decreasing order.
583	SmallVector<unsigned, `8`> LoadSizes;
584	// Set to true to allow overlapping loads. For example, 7-byte compares can
585	// be done with two 4-byte compares instead of 4+2+1-byte compares. This
586	// requires all loads in LoadSizes to be doable in an unaligned way.
587	bool AllowOverlappingLoads = false;
588	};
589	const MemCmpExpansionOptions enableMemCmpExpansion(bool* IsZeroCmp) const;
590
591	/// Enable matching of interleaved access groups.
592	bool enableInterleavedAccessVectorization() const;
593
594	/// Enable matching of interleaved access groups that contain predicated
595	/// accesses or gaps and therefore vectorized using masked
596	/// vector loads/stores.
597	bool enableMaskedInterleavedAccessVectorization() const;
598
599	/// Indicate that it is potentially unsafe to automatically vectorize
600	/// floating-point operations because the semantics of vector and scalar
601	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
602	/// does not support IEEE-754 denormal numbers, while depending on the
603	/// platform, scalar floating-point math does.
604	/// This applies to floating-point math operations and calls, not memory
605	/// operations, shuffles, or casts.
606	bool isFPVectorizationPotentiallyUnsafe() const;
607
608	/// Determine if the target supports unaligned memory accesses.
609	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
610	unsigned BitWidth, unsigned AddressSpace = `0`,
611	unsigned Alignment = `1`,
612	bool Fast = nullptr) const*;
613
614	/// Return hardware support for population count.
615	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
616
617	/// Return true if the hardware has a fast square-root instruction.
618	bool haveFastSqrt(Type Ty) const*;
619
620	/// Return true if it is faster to check if a floating-point value is NaN
621	/// (or not-NaN) versus a comparison against a constant FP zero value.
622	/// Targets should override this if materializing a 0.0 for comparison is
623	/// generally as cheap as checking for ordered/unordered.
624	bool isFCmpOrdCheaperThanFCmpZero(Type Ty) const*;
625
626	/// Return the expected cost of supporting the floating point operation
627	/// of the specified type.
628	int getFPOpCost(Type Ty) const*;
629
630	/// Return the expected cost of materializing for the given integer
631	/// immediate of the specified type.
632	int getIntImmCost(const APInt &Imm, Type Ty) const*;
633
634	/// Return the expected cost of materialization for the given integer
635	/// immediate of the specified type for a given instruction. The cost can be
636	/// zero if the immediate can be folded into the specified instruction.
637	int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
638	Type Ty) const*;
639	int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
640	Type Ty) const*;
641
642	/// Return the expected cost for the given integer when optimising
643	/// for size. This is different than the other integer immediate cost
644	/// functions in that it is subtarget agnostic. This is useful when you e.g.
645	/// target one ISA such as Aarch32 but smaller encodings could be possible
646	/// with another such as Thumb. This return value is used as a penalty when
647	/// the total costs for a constant is calculated (the bigger the cost, the
648	/// more beneficial constant hoisting is).
649	int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
650	Type Ty) const*;
651	/// @}
652
653	/// \name Vector Target Information
654	/// @{
655
656	/// The various kinds of shuffle patterns for vector queries.
657	enum ShuffleKind {
658	SK_Broadcast, ///< Broadcast element 0 to all other elements.
659	SK_Reverse, ///< Reverse the order of the vector.
660	SK_Select, ///< Selects elements from the corresponding lane of
661	///< either source operand. This is equivalent to a
662	///< vector select with a constant condition operand.
663	SK_Transpose, ///< Transpose two vectors.
664	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
665	SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
666	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
667	///< with any shuffle mask.
668	SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
669	///< shuffle mask.
670	};
671
672	/// Additional information about an operand's possible values.
673	enum OperandValueKind {
674	OK_AnyValue, // Operand can have any value.
675	OK_UniformValue, // Operand is uniform (splat of a value).
676	OK_UniformConstantValue, // Operand is uniform constant.
677	OK_NonUniformConstantValue // Operand is a non uniform constant value.
678	};
679
680	/// Additional properties of an operand's values.
681	enum OperandValueProperties { OP_None = `0`, OP_PowerOf2 = `1` };
682
683	/// \return The number of scalar or vector registers that the target has.
684	/// If 'Vectors' is true, it returns the number of vector registers. If it is
685	/// set to false, it returns the number of scalar registers.
686	unsigned getNumberOfRegisters(bool Vector) const;
687
688	/// \return The width of the largest scalar or vector register type.
689	unsigned getRegisterBitWidth(bool Vector) const;
690
691	/// \return The width of the smallest vector register type.
692	unsigned getMinVectorRegisterBitWidth() const;
693
694	/// \return True if the vectorization factor should be chosen to
695	/// make the vector of the smallest element type match the size of a
696	/// vector register. For wider element types, this could result in
697	/// creating vectors that span multiple vector registers.
698	/// If false, the vectorization factor will be chosen based on the
699	/// size of the widest element type.
700	bool shouldMaximizeVectorBandwidth(bool OptSize) const;
701
702	/// \return The minimum vectorization factor for types of given element
703	/// bit width, or 0 if there is no mimimum VF. The returned value only
704	/// applies when shouldMaximizeVectorBandwidth returns true.
705	unsigned getMinimumVF(unsigned ElemWidth) const;
706
707	/// \return True if it should be considered for address type promotion.
708	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
709	/// profitable without finding other extensions fed by the same input.
710	bool shouldConsiderAddressTypePromotion(
711	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
712
713	/// \return The size of a cache line in bytes.
714	unsigned getCacheLineSize() const;
715
716	/// The possible cache levels
717	enum class CacheLevel {
718	L1D, // The L1 data cache
719	L2D, // The L2 data cache
720
721	// We currently do not model L3 caches, as their sizes differ widely between
722	// microarchitectures. Also, we currently do not have a use for L3 cache
723	// size modeling yet.
724	};
725
726	/// \return The size of the cache level in bytes, if available.
727	llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
728
729	/// \return The associativity of the cache level, if available.
730	llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
731
732	/// \return How much before a load we should place the prefetch instruction.
733	/// This is currently measured in number of instructions.
734	unsigned getPrefetchDistance() const;
735
736	/// \return Some HW prefetchers can handle accesses up to a certain constant
737	/// stride. This is the minimum stride in bytes where it makes sense to start
738	/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
739	unsigned getMinPrefetchStride() const;
740
741	/// \return The maximum number of iterations to prefetch ahead. If the
742	/// required number of iterations is more than this number, no prefetching is
743	/// performed.
744	unsigned getMaxPrefetchIterationsAhead() const;
745
746	/// \return The maximum interleave factor that any transform should try to
747	/// perform for this target. This number depends on the level of parallelism
748	/// and the number of execution units in the CPU.
749	unsigned getMaxInterleaveFactor(unsigned VF) const;
750
751	/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
752	static OperandValueKind getOperandInfo(Value *V,
753	OperandValueProperties &OpProps);
754
755	/// This is an approximation of reciprocal throughput of a math/logic op.
756	/// A higher cost indicates less expected throughput.
757	/// From Agner Fog's guides, reciprocal throughput is "the average number of
758	/// clock cycles per instruction when the instructions are not part of a
759	/// limiting dependency chain."
760	/// Therefore, costs should be scaled to account for multiple execution units
761	/// on the target that can process this type of instruction. For example, if
762	/// there are 5 scalar integer units and 2 vector integer units that can
763	/// calculate an 'add' in a single cycle, this model should indicate that the
764	/// cost of the vector add instruction is 2.5 times the cost of the scalar
765	/// add instruction.
766	/// \p Args is an optional argument which holds the instruction operands
767	/// values so the TTI can analyze those values searching for special
768	/// cases or optimizations based on those values.
769	int getArithmeticInstrCost(
770	unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
771	OperandValueKind Opd2Info = OK_AnyValue,
772	OperandValueProperties Opd1PropInfo = OP_None,
773	OperandValueProperties Opd2PropInfo = OP_None,
774	ArrayRef<const Value > Args = ArrayRef<const* Value >()) const*;
775
776	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
777	/// The index and subtype parameters are used by the subvector insertion and
778	/// extraction shuffle kinds to show the insert/extract point and the type of
779	/// the subvector being inserted/extracted.
780	/// NOTE: For subvector extractions Tp represents the source type.
781	int getShuffleCost(ShuffleKind Kind, Type Tp, int* Index = `0`,
782	Type SubTp = nullptr) const*;
783
784	/// \return The expected cost of cast instructions, such as bitcast, trunc,
785	/// zext, etc. If there is an existing instruction that holds Opcode, it
786	/// may be passed in the 'I' parameter.
787	int getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
788	const Instruction I = nullptr) const*;
789
790	/// \return The expected cost of a sign- or zero-extended vector extract. Use
791	/// -1 to indicate that there is no information about the index value.
792	int getExtractWithExtendCost(unsigned Opcode, Type Dst, VectorType VecTy,
793	unsigned Index = -`1`) const;
794
795	/// \return The expected cost of control-flow related instructions such as
796	/// Phi, Ret, Br.
797	int getCFInstrCost(unsigned Opcode) const;
798
799	/// \returns The expected cost of compare and select instructions. If there
800	/// is an existing instruction that holds Opcode, it may be passed in the
801	/// 'I' parameter.
802	int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
803	Type CondTy = nullptr, const* Instruction I = nullptr) const*;
804
805	/// \return The expected cost of vector Insert and Extract.
806	/// Use -1 to indicate that there is no information on the index value.
807	int getVectorInstrCost(unsigned Opcode, Type Val, unsigned* Index = -`1`) const;
808
809	/// \return The cost of Load and Store instructions.
810	int getMemoryOpCost(unsigned Opcode, Type Src, unsigned* Alignment,
811	unsigned AddressSpace, const Instruction I = nullptr) const*;
812
813	/// \return The cost of masked Load and Store instructions.
814	int getMaskedMemoryOpCost(unsigned Opcode, Type Src, unsigned* Alignment,
815	unsigned AddressSpace) const;
816
817	/// \return The cost of Gather or Scatter operation
818	/// \p Opcode - is a type of memory access Load or Store
819	/// \p DataTy - a vector type of the data to be loaded or stored
820	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
821	/// \p VariableMask - true when the memory access is predicated with a mask
822	/// that is not a compile-time constant
823	/// \p Alignment - alignment of single element
824	int getGatherScatterOpCost(unsigned Opcode, Type DataTy, Value Ptr,
825	bool VariableMask, unsigned Alignment) const;
826
827	/// \return The cost of the interleaved memory operation.
828	/// \p Opcode is the memory operation code
829	/// \p VecTy is the vector type of the interleaved access.
830	/// \p Factor is the interleave factor
831	/// \p Indices is the indices for interleaved load members (as interleaved
832	/// load allows gaps)
833	/// \p Alignment is the alignment of the memory operation
834	/// \p AddressSpace is address space of the pointer.
835	/// \p UseMaskForCond indicates if the memory access is predicated.
836	/// \p UseMaskForGaps indicates if gaps should be masked.
837	int getInterleavedMemoryOpCost(unsigned Opcode, Type VecTy, unsigned* Factor,
838	ArrayRef<unsigned> Indices, unsigned Alignment,
839	unsigned AddressSpace,
840	bool UseMaskForCond = false,
841	bool UseMaskForGaps = false) const;
842
843	/// Calculate the cost of performing a vector reduction.
844	///
845	/// This is the cost of reducing the vector value of type \p Ty to a scalar
846	/// value using the operation denoted by \p Opcode. The form of the reduction
847	/// can either be a pairwise reduction or a reduction that splits the vector
848	/// at every reduction level.
849	///
850	/// Pairwise:
851	/// (v0, v1, v2, v3)
852	/// ((v0+v1), (v2+v3), undef, undef)
853	/// Split:
854	/// (v0, v1, v2, v3)
855	/// ((v0+v2), (v1+v3), undef, undef)
856	int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
857	bool IsPairwiseForm) const;
858	int getMinMaxReductionCost(Type Ty, Type CondTy, bool IsPairwiseForm,
859	bool IsUnsigned) const;
860
861	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
862	/// Three cases are handled: 1. scalar instruction 2. vector instruction
863	/// 3. scalar instruction which is to be vectorized with VF.
864	int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
865	ArrayRef<Value *> Args, FastMathFlags FMF,
866	unsigned VF = `1`) const;
867
868	/// \returns The cost of Intrinsic instructions. Types analysis only.
869	/// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
870	/// arguments and the return value will be computed based on types.
871	int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
872	ArrayRef<Type *> Tys, FastMathFlags FMF,
873	unsigned ScalarizationCostPassed = UINT_MAX) const;
874
875	/// \returns The cost of Call instructions.
876	int getCallInstrCost(Function F, Type RetTy, ArrayRef<Type > Tys) const*;
877
878	/// \returns The number of pieces into which the provided type must be
879	/// split during legalization. Zero is returned when the answer is unknown.
880	unsigned getNumberOfParts(Type Tp) const*;
881
882	/// \returns The cost of the address computation. For most targets this can be
883	/// merged into the instruction indexing mode. Some targets might want to
884	/// distinguish between address computation for memory operations on vector
885	/// types and scalar types. Such targets should override this function.
886	/// The 'SE' parameter holds pointer for the scalar evolution object which
887	/// is used in order to get the Ptr step value in case of constant stride.
888	/// The 'Ptr' parameter holds SCEV of the access pointer.
889	int getAddressComputationCost(Type Ty, ScalarEvolution SE = nullptr,
890	const SCEV Ptr = nullptr) const*;
891
892	/// \returns The cost, if any, of keeping values of the given types alive
893	/// over a callsite.
894	///
895	/// Some types may require the use of register classes that do not have
896	/// any callee-saved registers, so would require a spill and fill.
897	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type > Tys) const*;
898
899	/// \returns True if the intrinsic is a supported memory intrinsic. Info
900	/// will contain additional information - whether the intrinsic may write
901	/// or read to memory, volatility and the pointer. Info is undefined
902	/// if false is returned.
903	bool getTgtMemIntrinsic(IntrinsicInst Inst, MemIntrinsicInfo &Info) const*;
904
905	/// \returns The maximum element size, in bytes, for an element
906	/// unordered-atomic memory intrinsic.
907	unsigned getAtomicMemIntrinsicMaxElementSize() const;
908
909	/// \returns A value which is the result of the given memory intrinsic. New
910	/// instructions may be created to extract the result from the given intrinsic
911	/// memory operation. Returns nullptr if the target cannot create a result
912	/// from the given intrinsic.
913	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
914	Type ExpectedType) const*;
915
916	/// \returns The type to use in a loop expansion of a memcpy call.
917	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
918	unsigned SrcAlign, unsigned DestAlign) const;
919
920	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
921	/// \param RemainingBytes The number of bytes to copy.
922	///
923	/// Calculates the operand types to use when copying \p RemainingBytes of
924	/// memory, where source and destination alignments are \p SrcAlign and
925	/// \p DestAlign respectively.
926	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
927	LLVMContext &Context,
928	unsigned RemainingBytes,
929	unsigned SrcAlign,
930	unsigned DestAlign) const;
931
932	/// \returns True if the two functions have compatible attributes for inlining
933	/// purposes.
934	bool areInlineCompatible(const Function *Caller,
935	const Function Callee) const*;
936
937	/// \returns True if the caller and callee agree on how \p Args will be passed
938	/// to the callee.
939	/// \param[out] Args The list of compatible arguments. The implementation may
940	/// filter out any incompatible args from this list.
941	bool areFunctionArgsABICompatible(const Function *Caller,
942	const Function *Callee,
943	SmallPtrSetImpl<Argument > &Args) const*;
944
945	/// The type of load/store indexing.
946	enum MemIndexedMode {
947	MIM_Unindexed, ///< No indexing.
948	MIM_PreInc, ///< Pre-incrementing.
949	MIM_PreDec, ///< Pre-decrementing.
950	MIM_PostInc, ///< Post-incrementing.
951	MIM_PostDec ///< Post-decrementing.
952	};
953
954	/// \returns True if the specified indexed load for the given type is legal.
955	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type Ty) const*;
956
957	/// \returns True if the specified indexed store for the given type is legal.
958	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type Ty) const*;
959
960	/// \returns The bitwidth of the largest vector type that should be used to
961	/// load/store in the given address space.
962	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
963
964	/// \returns True if the load instruction is legal to vectorize.
965	bool isLegalToVectorizeLoad(LoadInst LI) const*;
966
967	/// \returns True if the store instruction is legal to vectorize.
968	bool isLegalToVectorizeStore(StoreInst SI) const*;
969
970	/// \returns True if it is legal to vectorize the given load chain.
971	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
972	unsigned Alignment,
973	unsigned AddrSpace) const;
974
975	/// \returns True if it is legal to vectorize the given store chain.
976	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
977	unsigned Alignment,
978	unsigned AddrSpace) const;
979
980	/// \returns The new vector factor value if the target doesn't support \p
981	/// SizeInBytes loads or has a better vector factor.
982	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
983	unsigned ChainSizeInBytes,
984	VectorType VecTy) const*;
985
986	/// \returns The new vector factor value if the target doesn't support \p
987	/// SizeInBytes stores or has a better vector factor.
988	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
989	unsigned ChainSizeInBytes,
990	VectorType VecTy) const*;
991
992	/// Flags describing the kind of vector reduction.
993	struct ReductionFlags {
994	ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
995	bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
996	bool IsSigned; ///< Whether the operation is a signed int reduction.
997	bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
998	};
999
1000	/// \returns True if the target wants to handle the given reduction idiom in
1001	/// the intrinsics form instead of the shuffle form.
1002	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1003	ReductionFlags Flags) const;
1004
1005	/// \returns True if the target wants to expand the given reduction intrinsic
1006	/// into a shuffle sequence.
1007	bool shouldExpandReduction(const IntrinsicInst II) const*;
1008	/// @}
1009
1010	private:
1011	/// Estimate the latency of specified instruction.
1012	/// Returns 1 as the default value.
1013	int getInstructionLatency(const Instruction I) const*;
1014
1015	/// Returns the expected throughput cost of the instruction.
1016	/// Returns -1 if the cost is unknown.
1017	int getInstructionThroughput(const Instruction I) const*;
1018
1019	/// The abstract base class used to type erase specific TTI
1020	/// implementations.
1021	class Concept;
1022
1023	/// The template model for the base class which wraps a concrete
1024	/// implementation in a type erased interface.
1025	template <typename T> class Model;
1026
1027	std::unique_ptr<Concept> TTIImpl;
1028	};
1029
1030	class TargetTransformInfo::Concept {
1031	public:
1032	virtual ~Concept() = `0`;
1033	virtual const DataLayout &getDataLayout() const = `0`;
1034	virtual int getOperationCost(unsigned Opcode, Type Ty, Type OpTy) = `0`;
1035	virtual int getGEPCost(Type PointeeType, const* Value *Ptr,
1036	ArrayRef<const Value *> Operands) = `0`;
1037	virtual int getExtCost(const Instruction I, const* Value *Src) = `0`;
1038	virtual int getCallCost(FunctionType FTy, int* NumArgs) = `0`;
1039	virtual int getCallCost(const Function F, int* NumArgs) = `0`;
1040	virtual int getCallCost(const Function *F,
1041	ArrayRef<const Value *> Arguments) = `0`;
1042	virtual unsigned getInliningThresholdMultiplier() = `0`;
1043	virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1044	ArrayRef<Type *> ParamTys) = `0`;
1045	virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1046	ArrayRef<const Value *> Arguments) = `0`;
1047	virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1048	unsigned &JTSize) = `0`;
1049	virtual int
1050	getUserCost(const User U, ArrayRef<const* Value *> Operands) = `0`;
1051	virtual bool hasBranchDivergence() = `0`;
1052	virtual bool isSourceOfDivergence(const Value *V) = `0`;
1053	virtual bool isAlwaysUniform(const Value *V) = `0`;
1054	virtual unsigned getFlatAddressSpace() = `0`;
1055	virtual bool isLoweredToCall(const Function *F) = `0`;
1056	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1057	UnrollingPreferences &UP) = `0`;
1058	virtual bool isLegalAddImmediate(int64_t Imm) = `0`;
1059	virtual bool isLegalICmpImmediate(int64_t Imm) = `0`;
1060	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
1061	int64_t BaseOffset, bool HasBaseReg,
1062	int64_t Scale,
1063	unsigned AddrSpace,
1064	Instruction *I) = `0`;
1065	virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1066	TargetTransformInfo::LSRCost &C2) = `0`;
1067	virtual bool canMacroFuseCmp() = `0`;
1068	virtual bool shouldFavorPostInc() const = `0`;
1069	virtual bool isLegalMaskedStore(Type *DataType) = `0`;
1070	virtual bool isLegalMaskedLoad(Type *DataType) = `0`;
1071	virtual bool isLegalMaskedScatter(Type *DataType) = `0`;
1072	virtual bool isLegalMaskedGather(Type *DataType) = `0`;
1073	virtual bool hasDivRemOp(Type DataType, bool* IsSigned) = `0`;
1074	virtual bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) = `0`;
1075	virtual bool prefersVectorizedAddressing() = `0`;
1076	virtual int getScalingFactorCost(Type Ty, GlobalValue BaseGV,
1077	int64_t BaseOffset, bool HasBaseReg,
1078	int64_t Scale, unsigned AddrSpace) = `0`;
1079	virtual bool LSRWithInstrQueries() = `0`;
1080	virtual bool isTruncateFree(Type Ty1, Type Ty2) = `0`;
1081	virtual bool isProfitableToHoist(Instruction *I) = `0`;
1082	virtual bool useAA() = `0`;
1083	virtual bool isTypeLegal(Type *Ty) = `0`;
1084	virtual unsigned getJumpBufAlignment() = `0`;
1085	virtual unsigned getJumpBufSize() = `0`;
1086	virtual bool shouldBuildLookupTables() = `0`;
1087	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = `0`;
1088	virtual bool useColdCCForColdCall(Function &F) = `0`;
1089	virtual unsigned
1090	getScalarizationOverhead(Type Ty, bool* Insert, bool Extract) = `0`;
1091	virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1092	unsigned VF) = `0`;
1093	virtual bool supportsEfficientVectorElementLoadStore() = `0`;
1094	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = `0`;
1095	virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1096	bool IsZeroCmp) const = `0`;
1097	virtual bool enableInterleavedAccessVectorization() = `0`;
1098	virtual bool enableMaskedInterleavedAccessVectorization() = `0`;
1099	virtual bool isFPVectorizationPotentiallyUnsafe() = `0`;
1100	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1101	unsigned BitWidth,
1102	unsigned AddressSpace,
1103	unsigned Alignment,
1104	bool *Fast) = `0`;
1105	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = `0`;
1106	virtual bool haveFastSqrt(Type *Ty) = `0`;
1107	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = `0`;
1108	virtual int getFPOpCost(Type *Ty) = `0`;
1109	virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1110	Type *Ty) = `0`;
1111	virtual int getIntImmCost(const APInt &Imm, Type *Ty) = `0`;
1112	virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1113	Type *Ty) = `0`;
1114	virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1115	Type *Ty) = `0`;
1116	virtual unsigned getNumberOfRegisters(bool Vector) = `0`;
1117	virtual unsigned getRegisterBitWidth(bool Vector) const = `0`;
1118	virtual unsigned getMinVectorRegisterBitWidth() = `0`;
1119	virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = `0`;
1120	virtual unsigned getMinimumVF(unsigned ElemWidth) const = `0`;
1121	virtual bool shouldConsiderAddressTypePromotion(
1122	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = `0`;
1123	virtual unsigned getCacheLineSize() = `0`;
1124	virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = `0`;
1125	virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = `0`;
1126	virtual unsigned getPrefetchDistance() = `0`;
1127	virtual unsigned getMinPrefetchStride() = `0`;
1128	virtual unsigned getMaxPrefetchIterationsAhead() = `0`;
1129	virtual unsigned getMaxInterleaveFactor(unsigned VF) = `0`;
1130	virtual unsigned
1131	getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1132	OperandValueKind Opd2Info,
1133	OperandValueProperties Opd1PropInfo,
1134	OperandValueProperties Opd2PropInfo,
1135	ArrayRef<const Value *> Args) = `0`;
1136	virtual int getShuffleCost(ShuffleKind Kind, Type Tp, int* Index,
1137	Type *SubTp) = `0`;
1138	virtual int getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
1139	const Instruction *I) = `0`;
1140	virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1141	VectorType VecTy, unsigned* Index) = `0`;
1142	virtual int getCFInstrCost(unsigned Opcode) = `0`;
1143	virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1144	Type CondTy, const* Instruction *I) = `0`;
1145	virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1146	unsigned Index) = `0`;
1147	virtual int getMemoryOpCost(unsigned Opcode, Type Src, unsigned* Alignment,
1148	unsigned AddressSpace, const Instruction *I) = `0`;
1149	virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1150	unsigned Alignment,
1151	unsigned AddressSpace) = `0`;
1152	virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1153	Value Ptr, bool* VariableMask,
1154	unsigned Alignment) = `0`;
1155	virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1156	unsigned Factor,
1157	ArrayRef<unsigned> Indices,
1158	unsigned Alignment,
1159	unsigned AddressSpace,
1160	bool UseMaskForCond = false,
1161	bool UseMaskForGaps = false) = `0`;
1162	virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1163	bool IsPairwiseForm) = `0`;
1164	virtual int getMinMaxReductionCost(Type Ty, Type CondTy,
1165	bool IsPairwiseForm, bool IsUnsigned) = `0`;
1166	virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1167	ArrayRef<Type *> Tys, FastMathFlags FMF,
1168	unsigned ScalarizationCostPassed) = `0`;
1169	virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1170	ArrayRef<Value > Args, FastMathFlags FMF, unsigned* VF) = `0`;
1171	virtual int getCallInstrCost(Function F, Type RetTy,
1172	ArrayRef<Type *> Tys) = `0`;
1173	virtual unsigned getNumberOfParts(Type *Tp) = `0`;
1174	virtual int getAddressComputationCost(Type Ty, ScalarEvolution SE,
1175	const SCEV *Ptr) = `0`;
1176	virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = `0`;
1177	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1178	MemIntrinsicInfo &Info) = `0`;
1179	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = `0`;
1180	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
1181	Type *ExpectedType) = `0`;
1182	virtual Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
1183	unsigned SrcAlign,
1184	unsigned DestAlign) const = `0`;
1185	virtual void getMemcpyLoopResidualLoweringType(
1186	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1187	unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = `0`;
1188	virtual bool areInlineCompatible(const Function *Caller,
1189	const Function Callee) const* = `0`;
1190	virtual bool
1191	areFunctionArgsABICompatible(const Function Caller, const* Function *Callee,
1192	SmallPtrSetImpl<Argument > &Args) const* = `0`;
1193	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
1194	virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type Ty) const* = `0`;
1195	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = `0`;
1196	virtual bool isLegalToVectorizeLoad(LoadInst LI) const* = `0`;
1197	virtual bool isLegalToVectorizeStore(StoreInst SI) const* = `0`;
1198	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1199	unsigned Alignment,
1200	unsigned AddrSpace) const = `0`;
1201	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1202	unsigned Alignment,
1203	unsigned AddrSpace) const = `0`;
1204	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1205	unsigned ChainSizeInBytes,
1206	VectorType VecTy) const* = `0`;
1207	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1208	unsigned ChainSizeInBytes,
1209	VectorType VecTy) const* = `0`;
1210	virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1211	ReductionFlags) const = `0`;
1212	virtual bool shouldExpandReduction(const IntrinsicInst II) const* = `0`;
1213	virtual int getInstructionLatency(const Instruction *I) = `0`;
1214	};
1215
1216	template <typename T>
1217	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1218	T Impl;
1219
1220	public:
1221	Model(T Impl) : Impl(std::move(Impl)) {}
1222	~Model() override {}
1223
1224	const DataLayout &getDataLayout() const override {
1225	return Impl.getDataLayout();
1226	}
1227
1228	int getOperationCost(unsigned Opcode, Type Ty, Type OpTy) override {
1229	return Impl.getOperationCost(Opcode, Ty, OpTy);
1230	}
1231	int getGEPCost(Type PointeeType, const* Value *Ptr,
1232	ArrayRef<const Value *> Operands) override {
1233	return Impl.getGEPCost(PointeeType, Ptr, Operands);
1234	}
1235	int getExtCost(const Instruction I, const* Value *Src) override {
1236	return Impl.getExtCost(I, Src);
1237	}
1238	int getCallCost(FunctionType FTy, int* NumArgs) override {
1239	return Impl.getCallCost(FTy, NumArgs);
1240	}
1241	int getCallCost(const Function F, int* NumArgs) override {
1242	return Impl.getCallCost(F, NumArgs);
1243	}
1244	int getCallCost(const Function *F,
1245	ArrayRef<const Value *> Arguments) override {
1246	return Impl.getCallCost(F, Arguments);
1247	}
1248	unsigned getInliningThresholdMultiplier() override {
1249	return Impl.getInliningThresholdMultiplier();
1250	}
1251	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1252	ArrayRef<Type *> ParamTys) override {
1253	return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1254	}
1255	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1256	ArrayRef<const Value *> Arguments) override {
1257	return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1258	}
1259	int getUserCost(const User U, ArrayRef<const* Value *> Operands) override {
1260	return Impl.getUserCost(U, Operands);
1261	}
1262	bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1263	bool isSourceOfDivergence(const Value *V) override {
1264	return Impl.isSourceOfDivergence(V);
1265	}
1266
1267	bool isAlwaysUniform(const Value *V) override {
1268	return Impl.isAlwaysUniform(V);
1269	}
1270
1271	unsigned getFlatAddressSpace() override {
1272	return Impl.getFlatAddressSpace();
1273	}
1274
1275	bool isLoweredToCall(const Function *F) override {
1276	return Impl.isLoweredToCall(F);
1277	}
1278	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1279	UnrollingPreferences &UP) override {
1280	return Impl.getUnrollingPreferences(L, SE, UP);
1281	}
1282	bool isLegalAddImmediate(int64_t Imm) override {
1283	return Impl.isLegalAddImmediate(Imm);
1284	}
1285	bool isLegalICmpImmediate(int64_t Imm) override {
1286	return Impl.isLegalICmpImmediate(Imm);
1287	}
1288	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
1289	bool HasBaseReg, int64_t Scale,
1290	unsigned AddrSpace,
1291	Instruction *I) override {
1292	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1293	Scale, AddrSpace, I);
1294	}
1295	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1296	TargetTransformInfo::LSRCost &C2) override {
1297	return Impl.isLSRCostLess(C1, C2);
1298	}
1299	bool canMacroFuseCmp() override {
1300	return Impl.canMacroFuseCmp();
1301	}
1302	bool shouldFavorPostInc() const override {
1303	return Impl.shouldFavorPostInc();
1304	}
1305	bool isLegalMaskedStore(Type *DataType) override {
1306	return Impl.isLegalMaskedStore(DataType);
1307	}
1308	bool isLegalMaskedLoad(Type *DataType) override {
1309	return Impl.isLegalMaskedLoad(DataType);
1310	}
1311	bool isLegalMaskedScatter(Type *DataType) override {
1312	return Impl.isLegalMaskedScatter(DataType);
1313	}
1314	bool isLegalMaskedGather(Type *DataType) override {
1315	return Impl.isLegalMaskedGather(DataType);
1316	}
1317	bool hasDivRemOp(Type DataType, bool* IsSigned) override {
1318	return Impl.hasDivRemOp(DataType, IsSigned);
1319	}
1320	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) override {
1321	return Impl.hasVolatileVariant(I, AddrSpace);
1322	}
1323	bool prefersVectorizedAddressing() override {
1324	return Impl.prefersVectorizedAddressing();
1325	}
1326	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
1327	bool HasBaseReg, int64_t Scale,
1328	unsigned AddrSpace) override {
1329	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1330	Scale, AddrSpace);
1331	}
1332	bool LSRWithInstrQueries() override {
1333	return Impl.LSRWithInstrQueries();
1334	}
1335	bool isTruncateFree(Type Ty1, Type Ty2) override {
1336	return Impl.isTruncateFree(Ty1, Ty2);
1337	}
1338	bool isProfitableToHoist(Instruction *I) override {
1339	return Impl.isProfitableToHoist(I);
1340	}
1341	bool useAA() override { return Impl.useAA(); }
1342	bool isTypeLegal(Type Ty) override { return* Impl.isTypeLegal(Ty); }
1343	unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1344	unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1345	bool shouldBuildLookupTables() override {
1346	return Impl.shouldBuildLookupTables();
1347	}
1348	bool shouldBuildLookupTablesForConstant(Constant *C) override {
1349	return Impl.shouldBuildLookupTablesForConstant(C);
1350	}
1351	bool useColdCCForColdCall(Function &F) override {
1352	return Impl.useColdCCForColdCall(F);
1353	}
1354
1355	unsigned getScalarizationOverhead(Type Ty, bool* Insert,
1356	bool Extract) override {
1357	return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1358	}
1359	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1360	unsigned VF) override {
1361	return Impl.getOperandsScalarizationOverhead(Args, VF);
1362	}
1363
1364	bool supportsEfficientVectorElementLoadStore() override {
1365	return Impl.supportsEfficientVectorElementLoadStore();
1366	}
1367
1368	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1369	return Impl.enableAggressiveInterleaving(LoopHasReductions);
1370	}
1371	const MemCmpExpansionOptions *enableMemCmpExpansion(
1372	bool IsZeroCmp) const override {
1373	return Impl.enableMemCmpExpansion(IsZeroCmp);
1374	}
1375	bool enableInterleavedAccessVectorization() override {
1376	return Impl.enableInterleavedAccessVectorization();
1377	}
1378	bool enableMaskedInterleavedAccessVectorization() override {
1379	return Impl.enableMaskedInterleavedAccessVectorization();
1380	}
1381	bool isFPVectorizationPotentiallyUnsafe() override {
1382	return Impl.isFPVectorizationPotentiallyUnsafe();
1383	}
1384	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1385	unsigned BitWidth, unsigned AddressSpace,
1386	unsigned Alignment, bool *Fast) override {
1387	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1388	Alignment, Fast);
1389	}
1390	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1391	return Impl.getPopcntSupport(IntTyWidthInBit);
1392	}
1393	bool haveFastSqrt(Type Ty) override { return* Impl.haveFastSqrt(Ty); }
1394
1395	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1396	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1397	}
1398
1399	int getFPOpCost(Type Ty) override { return* Impl.getFPOpCost(Ty); }
1400
1401	int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1402	Type *Ty) override {
1403	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1404	}
1405	int getIntImmCost(const APInt &Imm, Type *Ty) override {
1406	return Impl.getIntImmCost(Imm, Ty);
1407	}
1408	int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1409	Type *Ty) override {
1410	return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1411	}
1412	int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1413	Type *Ty) override {
1414	return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1415	}
1416	unsigned getNumberOfRegisters(bool Vector) override {
1417	return Impl.getNumberOfRegisters(Vector);
1418	}
1419	unsigned getRegisterBitWidth(bool Vector) const override {
1420	return Impl.getRegisterBitWidth(Vector);
1421	}
1422	unsigned getMinVectorRegisterBitWidth() override {
1423	return Impl.getMinVectorRegisterBitWidth();
1424	}
1425	bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1426	return Impl.shouldMaximizeVectorBandwidth(OptSize);
1427	}
1428	unsigned getMinimumVF(unsigned ElemWidth) const override {
1429	return Impl.getMinimumVF(ElemWidth);
1430	}
1431	bool shouldConsiderAddressTypePromotion(
1432	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1433	return Impl.shouldConsiderAddressTypePromotion(
1434	I, AllowPromotionWithoutCommonHeader);
1435	}
1436	unsigned getCacheLineSize() override {
1437	return Impl.getCacheLineSize();
1438	}
1439	llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1440	return Impl.getCacheSize(Level);
1441	}
1442	llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1443	return Impl.getCacheAssociativity(Level);
1444	}
1445	unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1446	unsigned getMinPrefetchStride() override {
1447	return Impl.getMinPrefetchStride();
1448	}
1449	unsigned getMaxPrefetchIterationsAhead() override {
1450	return Impl.getMaxPrefetchIterationsAhead();
1451	}
1452	unsigned getMaxInterleaveFactor(unsigned VF) override {
1453	return Impl.getMaxInterleaveFactor(VF);
1454	}
1455	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1456	unsigned &JTSize) override {
1457	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1458	}
1459	unsigned
1460	getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1461	OperandValueKind Opd2Info,
1462	OperandValueProperties Opd1PropInfo,
1463	OperandValueProperties Opd2PropInfo,
1464	ArrayRef<const Value *> Args) override {
1465	return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1466	Opd1PropInfo, Opd2PropInfo, Args);
1467	}
1468	int getShuffleCost(ShuffleKind Kind, Type Tp, int* Index,
1469	Type *SubTp) override {
1470	return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1471	}
1472	int getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
1473	const Instruction *I) override {
1474	return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1475	}
1476	int getExtractWithExtendCost(unsigned Opcode, Type Dst, VectorType VecTy,
1477	unsigned Index) override {
1478	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1479	}
1480	int getCFInstrCost(unsigned Opcode) override {
1481	return Impl.getCFInstrCost(Opcode);
1482	}
1483	int getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
1484	const Instruction *I) override {
1485	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1486	}
1487	int getVectorInstrCost(unsigned Opcode, Type Val, unsigned* Index) override {
1488	return Impl.getVectorInstrCost(Opcode, Val, Index);
1489	}
1490	int getMemoryOpCost(unsigned Opcode, Type Src, unsigned* Alignment,
1491	unsigned AddressSpace, const Instruction *I) override {
1492	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1493	}
1494	int getMaskedMemoryOpCost(unsigned Opcode, Type Src, unsigned* Alignment,
1495	unsigned AddressSpace) override {
1496	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1497	}
1498	int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1499	Value Ptr, bool* VariableMask,
1500	unsigned Alignment) override {
1501	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1502	Alignment);
1503	}
1504	int getInterleavedMemoryOpCost(unsigned Opcode, Type VecTy, unsigned* Factor,
1505	ArrayRef<unsigned> Indices, unsigned Alignment,
1506	unsigned AddressSpace, bool UseMaskForCond,
1507	bool UseMaskForGaps) override {
1508	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1509	Alignment, AddressSpace,
1510	UseMaskForCond, UseMaskForGaps);
1511	}
1512	int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1513	bool IsPairwiseForm) override {
1514	return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1515	}
1516	int getMinMaxReductionCost(Type Ty, Type CondTy,
1517	bool IsPairwiseForm, bool IsUnsigned) override {
1518	return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1519	}
1520	int getIntrinsicInstrCost(Intrinsic::ID ID, Type RetTy, ArrayRef<Type > Tys,
1521	FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1522	return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1523	ScalarizationCostPassed);
1524	}
1525	int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1526	ArrayRef<Value > Args, FastMathFlags FMF, unsigned* VF) override {
1527	return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1528	}
1529	int getCallInstrCost(Function F, Type RetTy,
1530	ArrayRef<Type *> Tys) override {
1531	return Impl.getCallInstrCost(F, RetTy, Tys);
1532	}
1533	unsigned getNumberOfParts(Type *Tp) override {
1534	return Impl.getNumberOfParts(Tp);
1535	}
1536	int getAddressComputationCost(Type Ty, ScalarEvolution SE,
1537	const SCEV *Ptr) override {
1538	return Impl.getAddressComputationCost(Ty, SE, Ptr);
1539	}
1540	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1541	return Impl.getCostOfKeepingLiveOverCall(Tys);
1542	}
1543	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1544	MemIntrinsicInfo &Info) override {
1545	return Impl.getTgtMemIntrinsic(Inst, Info);
1546	}
1547	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1548	return Impl.getAtomicMemIntrinsicMaxElementSize();
1549	}
1550	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
1551	Type *ExpectedType) override {
1552	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1553	}
1554	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
1555	unsigned SrcAlign,
1556	unsigned DestAlign) const override {
1557	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1558	}
1559	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1560	LLVMContext &Context,
1561	unsigned RemainingBytes,
1562	unsigned SrcAlign,
1563	unsigned DestAlign) const override {
1564	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1565	SrcAlign, DestAlign);
1566	}
1567	bool areInlineCompatible(const Function *Caller,
1568	const Function Callee) const* override {
1569	return Impl.areInlineCompatible(Caller, Callee);
1570	}
1571	bool areFunctionArgsABICompatible(
1572	const Function Caller, const* Function *Callee,
1573	SmallPtrSetImpl<Argument > &Args) const* override {
1574	return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1575	}
1576	bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* override {
1577	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1578	}
1579	bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* override {
1580	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1581	}
1582	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1583	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1584	}
1585	bool isLegalToVectorizeLoad(LoadInst LI) const* override {
1586	return Impl.isLegalToVectorizeLoad(LI);
1587	}
1588	bool isLegalToVectorizeStore(StoreInst SI) const* override {
1589	return Impl.isLegalToVectorizeStore(SI);
1590	}
1591	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1592	unsigned Alignment,
1593	unsigned AddrSpace) const override {
1594	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1595	AddrSpace);
1596	}
1597	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1598	unsigned Alignment,
1599	unsigned AddrSpace) const override {
1600	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1601	AddrSpace);
1602	}
1603	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1604	unsigned ChainSizeInBytes,
1605	VectorType VecTy) const* override {
1606	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1607	}
1608	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1609	unsigned ChainSizeInBytes,
1610	VectorType VecTy) const* override {
1611	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1612	}
1613	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1614	ReductionFlags Flags) const override {
1615	return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1616	}
1617	bool shouldExpandReduction(const IntrinsicInst II) const* override {
1618	return Impl.shouldExpandReduction(II);
1619	}
1620	int getInstructionLatency(const Instruction *I) override {
1621	return Impl.getInstructionLatency(I);
1622	}
1623	};
1624
1625	template <typename T>
1626	TargetTransformInfo::TargetTransformInfo(T Impl)
1627	: TTIImpl(new Model<T>(Impl)) {}
1628
1629	/// Analysis pass providing the \c TargetTransformInfo.
1630	///
1631	/// The core idea of the TargetIRAnalysis is to expose an interface through
1632	/// which LLVM targets can analyze and provide information about the middle
1633	/// end's target-independent IR. This supports use cases such as target-aware
1634	/// cost modeling of IR constructs.
1635	///
1636	/// This is a function analysis because much of the cost modeling for targets
1637	/// is done in a subtarget specific way and LLVM supports compiling different
1638	/// functions targeting different subtargets in order to support runtime
1639	/// dispatch according to the observed subtarget.
1640	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1641	public:
1642	typedef TargetTransformInfo Result;
1643
1644	/// Default construct a target IR analysis.
1645	///
1646	/// This will use the module's datalayout to construct a baseline
1647	/// conservative TTI result.
1648	TargetIRAnalysis();
1649
1650	/// Construct an IR analysis pass around a target-provide callback.
1651	///
1652	/// The callback will be called with a particular function for which the TTI
1653	/// is needed and must return a TTI object for that function.
1654	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1655
1656	// Value semantics. We spell out the constructors for MSVC.
1657	TargetIRAnalysis(const TargetIRAnalysis &Arg)
1658	: TTICallback (Arg.TTICallback) {}
1659	TargetIRAnalysis(TargetIRAnalysis &&Arg)
1660	: TTICallback (std::move(Arg.TTICallback)) {}
1661	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1662	TTICallback = RHS.TTICallback;
1663	return *this;
1664	}
1665	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1666	TTICallback = std::move(RHS.TTICallback);
1667	return *this;
1668	}
1669
1670	Result run(const Function &F, FunctionAnalysisManager &);
1671
1672	private:
1673	friend AnalysisInfoMixin<TargetIRAnalysis>;
1674	static AnalysisKey Key;
1675
1676	/// The callback used to produce a result.
1677	///
1678	/// We use a completely opaque callback so that targets can provide whatever
1679	/// mechanism they desire for constructing the TTI for a given function.
1680	///
1681	/// FIXME: Should we really use std::function? It's relatively inefficient.
1682	/// It might be possible to arrange for even stateful callbacks to outlive
1683	/// the analysis and thus use a function_ref which would be lighter weight.
1684	/// This may also be less error prone as the callback is likely to reference
1685	/// the external TargetMachine, and that reference needs to never dangle.
1686	std::function<Result(const Function &)> TTICallback;
1687
1688	/// Helper function used as the callback in the default constructor.
1689	static Result getDefaultTTI(const Function &F);
1690	};
1691
1692	/// Wrapper pass for TargetTransformInfo.
1693	///
1694	/// This pass can be constructed from a TTI object which it stores internally
1695	/// and is queried by passes.
1696	class TargetTransformInfoWrapperPass : public ImmutablePass {
1697	TargetIRAnalysis TIRA;
1698	Optional<TargetTransformInfo> TTI;
1699
1700	virtual void anchor();
1701
1702	public:
1703	static char ID;
1704
1705	/// We must provide a default constructor for the pass but it should
1706	/// never be used.
1707	///
1708	/// Use the constructor below or call one of the creation routines.
1709	TargetTransformInfoWrapperPass();
1710
1711	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1712
1713	TargetTransformInfo &getTTI(const Function &F);
1714	};
1715
1716	/// Create an analysis pass wrapper around a TTI object.
1717	///
1718	/// This analysis pass just holds the TTI instance and makes it available to
1719	/// clients.
1720	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1721
1722	} // End llvm namespace
1723
1724	#endif
1725

Browse the source code of include/llvm-8/llvm/Analysis/TargetTransformInfo.h