builtin.h source code [tvm/include/tvm/tir/builtin.h]

1	/*
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing,
13	* software distributed under the License is distributed on an
14	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15	* KIND, either express or implied. See the License for the
16	* specific language governing permissions and limitations
17	* under the License.
18	*/
19
20	/!*
21	* \file tvm/tir/builtin.h
22	* \brief TIR builtin intrinsics.
23	*
24	* TIR builtin intrinsics are stored as tvm:Op.
25	* They are processed in the same way as we process Ops.
26	*
27	* It is not necessary to create a function for every Op,
28	* as we can obtain them through Op::Get.
29	*
30	* This file contains the most commonly used intrinsics or
31	* those that have special semantics and need compiler support.
32	*/
33	#ifndef TVM_TIR_BUILTIN_H_
34	#define TVM_TIR_BUILTIN_H_
35
36	#include <tvm/ir/op.h>
37	#include <tvm/tir/expr.h>
38
39	namespace tvm {
40	namespace tir {
41
42	/! \brief Collection of builtin intrinsics as ops /
43	namespace builtin {
44	/!*
45	* \brief Return value.
46	*/
47	TVM_DLL const Op& ret();
48	/!*
49	* \brief Reinterpret the value using the target type.
50	*/
51	TVM_DLL const Op& reinterpret();
52
53	/!*
54	* \brief Marks a condition is likely going to happen.
55	*/
56	TVM_DLL const Op& likely();
57
58	/!*
59	* \brief Bitwise and operator.
60	*/
61	TVM_DLL const Op& bitwise_and();
62
63	/!*
64	* \brief Bitwise or operator.
65	*/
66	TVM_DLL const Op& bitwise_or();
67
68	/!*
69	* \brief Bitwise xor operator.
70	*/
71	TVM_DLL const Op& bitwise_xor();
72
73	/!*
74	* \brief Bitwise not operator.
75	*/
76	TVM_DLL const Op& bitwise_not();
77
78	/!*
79	* \brief Left shift
80	*/
81	TVM_DLL const Op& shift_left();
82
83	/!*
84	* \brief Right shift
85	*/
86	TVM_DLL const Op& shift_right();
87
88	/!*
89	* \brief See pesudo code
90	*
91	* Construct a big uint that may not be representable by int64
92	*
93	* Expr large_uint_imm(uint32_t v0, uin32_t v1) {
94	* return (v1 << 32) \| v0;
95	* }
96	*/
97	TVM_DLL const Op& large_uint_imm();
98
99	/!*
100	* \brief Execute a multiplication between two Q-numbers x and y
101	* followed by a right shift s
102	* The default rounding rule is to the nearest value, rounding half up
103	* (i.e., round(x.1) = x and round (x.5) = x+1)
104	*/
105	TVM_DLL const Op& q_multiply_shift();
106
107	/!*
108	* \brief Returns the address of an element in the buffer (see pseudocode below).
109	*
110	* The number of indices should match the dimensionality of the buffer
111	* being accessed. If this operation occurs after buffer flattening,
112	* the number of indices must be supported by the target (i.e. N>1
113	* only on targets that support non-flat memory buffers).
114	*
115	* Handle address_of(BufferLoad *op) {
116	* return &op->buffer_var[op->indices[0], op->indices[1], ..., op->indices[N-1]];
117	* }
118	*/
119	TVM_DLL const Op& address_of();
120
121	/!*
122	* \brief Same as select, used for unsafe memory access.
123	*
124	* Type tvm_if_then_else(cond, a, b) {
125	* return cond ? a : b;
126	* }
127	*/
128	TVM_DLL const Op& if_then_else();
129
130	/!*
131	* \brief See pesudo code
132	*
133	* bool isnullptr(void* handle) {
134	* return handle == nullptr
135	* }
136	*/
137	TVM_DLL const Op& isnullptr();
138
139	/!*
140	* \brief Check if value is nan
141	*/
142	TVM_DLL const Op& isnan();
143
144	/!*
145	* \brief Popcount
146	*/
147	TVM_DLL const Op& popcount();
148
149	/!*
150	* \brief Fused multiply add
151	*
152	* Type fma(a, b, c) {
153	* return a * b + c;
154	* }
155	*/
156	TVM_DLL const Op& fma();
157
158	/!*
159	* \brief Call an extern C function with given name
160	* and signature from the types of args in the runtime environment.
161	*
162	* Type call_extern(name, args...) {
163	* return dlsym(name)(args...);
164	* }
165	*
166	* \note This intrinsic does not provide any type checking,
167	* and is main used for backward compatibility reasons.
168	* Always consider use pre-registered and typed tvm::Op first.
169	*/
170	TVM_DLL const Op& call_extern();
171
172	/!*
173	* \brief Call an pure extern C function with given name
174	* and signature from the types of args in the runtime environment.
175	*
176	* Type call_pure_extern(name, args...) {
177	* return dlsym(name)(args...);
178	* }
179	*
180	* \note This intrinsic does not provide any type checking,
181	* and is main used for backward compatibility reasons.
182	* Always consider use pre-registered and typed tvm::Op first.
183	*/
184	TVM_DLL const Op& call_pure_extern();
185
186	/!*
187	* \brief Call an LLVM intrinsic with a given intrinsic id
188	* and signature from the types of args in the runtime environment.
189	*
190	* Type call_llvm_pure_intrin(intrin_id, args...) {
191	* return dlsym(name)(args...);
192	* }
193	*
194	* \note This op does not provide any type checking.
195	*/
196	TVM_DLL const Op& call_llvm_intrin();
197
198	/!*
199	* \brief Call an LLVM pure intrinsic with a given intrinsic id
200	* and signature from the types of args in the runtime environment.
201	*
202	* Type call_llvm_pure_intrin(intrin_id, args...) {
203	* return dlsym(name)(args...);
204	* }
205	*
206	* \note This op does not provide any type checking.
207	*/
208	TVM_DLL const Op& call_llvm_pure_intrin();
209
210	/!*
211	* \brief Call an SPIRV pure GLSL450 intrinsic.
212	*
213	* Type call_spirv_pure_glsl450(intrin_id, args...) {
214	* return dlsym(name)(args...);
215	* }
216	*
217	* \note This op does not provide any type checking.
218	*/
219	TVM_DLL const Op& call_spirv_pure_glsl450();
220
221	// TODO(tvm-team) revisit the builtins below
222	// some of them can simply become ops with special codegen attr.
223	/!*
224	* \brief Prefetch a cacheline
225	*/
226	TVM_DLL const Op& prefetch();
227
228	/!*
229	* \brief Get head access address with memory access pattern info.
230	*
231	* This operator also marks range of the memory access
232	* The offset and extent are in unit of the DType(including vectorization factor).
233	* rw_mask is a bit_mask setting whether the access is a read(1) or write(2).
234	* The access is assume to happen in the current expression.
235	*
236	* PtrType tvm_access_ptr(Expr dtype, DType* data,
237	* int offset, int extent,
238	* int rw_mask) {
239	* // DType == dtype.type();
240	* return &data[offset];
241	* }
242	*/
243	TVM_DLL const Op& tvm_access_ptr();
244
245	/!*
246	* \brief Create a function local static handle that iniitalizes to nullptr.
247	* can be used to cache function local static resources.
248	*/
249	TVM_DLL const Op& tvm_static_handle();
250
251	/!*
252	* \brief Return a unique context id, used for hint of workspace separation.
253	* Different context id ganrantees not having overlapping workspace.
254	*/
255	TVM_DLL const Op& tvm_context_id();
256
257	/!*
258	* \brief tvm_tuple is not an actual function and cannot codegen.
259	* It is used to represent tuple structure in value field of AttrStmt,
260	* for the sake of giving hint to optimization.
261	*
262	* Handle tvm_tuple(value0, value1, ..., value_n);
263	*/
264	TVM_DLL const Op& tvm_tuple();
265
266	/!*
267	* \brief See pesudo code
268	*
269	* Type tvm_struct_get(StructType* arr, int index, int field_id) {
270	* return arr[index]->field;
271	* }
272	* \sa TVMStructFieldKind
273	*/
274	TVM_DLL const Op& tvm_struct_get();
275
276	/!*
277	* \brief See pesudo code
278	*
279	* Handle tvm_struct_set(StructType* arr, int index, int field_id, value) {
280	* arr[index]->field = value;
281	* }
282	* \sa TVMStructFieldKind
283	*/
284	TVM_DLL const Op& tvm_struct_set();
285
286	/!*
287	* \brief See pseudo code
288	* Type lookup_param(String param_name) {
289	* return __tvm_param__param_name;
290	* }
291	*/
292	TVM_DLL const Op& lookup_param();
293
294	/!*
295	* \brief See pesudo code
296	*
297	* void tvm_throw_last_error() {
298	* throw TVMGetLastError();
299	* }
300	*/
301	TVM_DLL const Op& tvm_throw_last_error();
302
303	/!*
304	* \brief See pesudo code
305	*
306	* dtype in {shape, array, arg_value, arg_tcode}
307	*
308	* Handle tvm_stack_alloca(string dtype, int num) {
309	* return new on stack dtype[num];
310	* }
311	*/
312	TVM_DLL const Op& tvm_stack_alloca();
313
314	/!*
315	* \brief Allocate a shape tuple on stack, return the handle.
316	*
317	* Handle tvm_stack_make_shape(list args) {
318	* ret = alloca stack int64_t[len(args)];
319	* for i in range(len(args)):
320	* ret[i] = args[i]
321	* return &ret[0];
322	* }
323	*/
324	TVM_DLL const Op& tvm_stack_make_shape();
325
326	/!*
327	* \brief Allocate a NDArray(DLTensor) on stack, return the handle.
328	*
329	* Type tvm_stack_make_array(Expr data,
330	* Expr shape,
331	* Expr strides,
332	* Expr ndim,
333	* Expr dtype,
334	* Expr elem_offset) {
335	* ret = alloca stack DLTensor();
336	* ret->data = data;
337	* ret->shape = shape;
338	* ret->strides = strides != 0 ? strides : nullptr;
339	* ret->ndim = ndim;
340	* ret->dtype = dtype.type();
341	* ret->byte_offset = elem_offset * sizeof(dtype);
342	* return ret;
343	* }
344	*/
345	TVM_DLL const Op& tvm_stack_make_array();
346
347	/!*
348	* \brief See pesudo code
349	*
350	* return_type tvm_call_packed(name, TVMValue* args) {
351	* TVMValue ret_value;
352	* int ret_code;
353	* ModuleNode* env = GetCurrentEnv();
354	* const PackedFunc* f = env->GetFuncFromEnv(name);
355	* (*f)(args, type_code_of(args), len(args), &ret_value, &ret_code);
356	* // return type can be int, float, handle.
357	* return cast(return_type, ret_value.v_return_type);
358	* }
359	*/
360	TVM_DLL const Op& tvm_call_packed();
361
362	/!*
363	* \brief See pesudo code
364	*
365	* return_type tvm_call_packed(fname, TVMValue* args) {
366	* int ret_code;
367	* TVMValue ret_value;
368	* (*fname)(args, type_code_of(args), len(args), &ret_value, &ret_code);
369	* return cast(return_type, ret_value.v_return_type);
370	* }
371	*/
372	TVM_DLL const Op& tvm_call_cpacked();
373
374	/!*
375	* \brief See pesudo code
376	*
377	* return_type tvm_call_trace_packed(name, TVMValue* args) {
378	* ModuleNode* env = GetCurrentEnv();
379	* const PackedFunc* f = env->GetFuncFromEnv(name);
380	* (*f)(args, type_code_of(args), len(args));
381	* // return type can be int, float, handle.
382	* return cast(return_type, ret_value.v_return_type);
383	* }
384	*/
385	TVM_DLL const Op& tvm_call_trace_packed();
386
387	/!*
388	* \brief Checks the return value of another call is correct or returns a given value.
389	*
390	* \note This is meant to serve a specific case for AOT code generator whilst this
391	* cannot be fully represented in TIR.
392	*
393	* Type tvm_check_return(expected, return_unexpected, nested_call) {
394	* if (nested_call() != expected) {
395	* return return_unexpected;
396	* }
397	* }
398	*/
399	TVM_DLL const Op& tvm_check_return();
400
401	/!*
402	* \brief See pesudo code
403	* Mark the content as thread local context, can get optimized
404	* by only call the call once at thread start.
405	*
406	* Do not allow nesting(getting a thread context from another).
407	*
408	* Handle tvm_thread_context(Expr call) {
409	* return call;
410	* }
411	*/
412	TVM_DLL const Op& tvm_thread_context();
413
414	/!*
415	* \brief Lowered version of call packed, the space of value and
416	* type codes are explicitly allocated.
417	*
418	* return_type tvm_call_packed_lowered(name,
419	* TVMValue* value_stack,
420	* int* tcode_stack,
421	* int begin,
422	* int end) {
423	* ModuleNode* env = GetCurrentEnv();
424	* const PackedFunc* f = env->GetFuncFromEnv(name);
425	* f->CallPacked(TVMArgs(value_stack[begin:end],
426	* tcode_stack[begin:end]),
427	* TVMRetValue(value_stack + end, tcode_stack + end));
428	* // return type can be int, float, handle.
429	* return cast(return_type, load_return_from(tcode_stack + end))
430	* }
431	*/
432	TVM_DLL const Op& tvm_call_packed_lowered();
433
434	/!*
435	* \brief Lowered version of call c-packed, the space of value and
436	* type codes are explicitly allocated.
437	*
438	* int tvm_call_packed_lowered(fname,
439	* TVMValue* value_stack,
440	* int* tcode_stack,
441	* int begin,
442	* int end) {
443	* fname(TVMArgs(value_stack[begin:end], tcode_stack[begin:end]),
444	* TVMRetValue(value_stack + end, tcode_stack + end));
445	* }
446	*/
447	TVM_DLL const Op& tvm_call_cpacked_lowered();
448
449	/!*
450	* \brief Lowered version of trace intrinsic, the space of value and
451	* type codes are explicitly allocated. The return value is the
452	* (end - 1) value on the stack.
453	*
454	* return_type tvm_call_trace_packed_lowered(name,
455	* TVMValue* value_stack,
456	* int* tcode_stack,
457	* int begin,
458	* int end) {
459	* ModuleNode* env = GetCurrentEnv();
460	* const PackedFunc* f = env->GetFuncFromEnv(name);
461	* f->CallPacked(TVMArgs(value_stack[begin:end],
462	* tcode_stack[begin:end]),
463	* TVMRetValue(value_stack + end, tcode_stack + end));
464	* // return type can be int, float, handle.
465	* return cast(return_type, load_return_from(tcode_stack + end))
466	* }
467	*/
468	TVM_DLL const Op& tvm_call_trace_packed_lowered();
469
470	/!*
471	* \brief See pseudo code
472	*
473	* int tvm_storage_sync(std::string storage_scope) {
474	* __sync(storage_scope);
475	* return 0;
476	* }
477	*/
478	TVM_DLL const Op& tvm_storage_sync();
479
480	/!*
481	* \brief See pseudo code
482	*
483	* Type tvm_warp_shuffle(mask, Type value, warp_id, width, warp_size) {
484	* return (value passed in by warp indicated by this_warp_id);
485	* }
486	*
487	* Type tvm_warp_shuffle_up(mask, Type value, offset, width, warp_size) {
488	* return (value passed in by warp indicated by this_warp_id - offset);
489	* }
490	*
491	* Type tvm_warp_shuffle_down(mask, Type value, offset, width, warp_size) {
492	* return (value passed in by warp indicated by this_warp_id + offset);
493	* }
494	*
495	* unsigned tvm_warp_activemask() {
496	* return (32-bit mask of currently active threads in the calling warp);
497	* }
498	*
499	* Parameter warp_id indicates the source thread ID in a warp.
500	*
501	* Parameter offset indicates the relative distance to this_warp_id.
502	*
503	* Parameter width indicates the number of threads involved in one
504	* shuffle. See CUDA document for __shfl_sync, __shfl_up_sync,
505	* __shfl_down_sync and __activemask.
506	*
507	* Parameter warp_size is the size of a warp, which helps a backend
508	* to determine wheter the width paramter is legal.
509	*
510	*/
511	TVM_DLL const Op& tvm_warp_shuffle();
512	TVM_DLL const Op& tvm_warp_shuffle_up();
513	TVM_DLL const Op& tvm_warp_shuffle_down();
514	TVM_DLL const Op& tvm_warp_activemask();
515
516	/!*
517	* \brief Initialize the global barrier.
518	* Call this at beginning of kernel that need global barrier.
519	*/
520	TVM_DLL const Op& tvm_global_barrier_kinit();
521
522	/!*
523	* \brief See pesudo code
524	*
525	* void tvm_thread_allreduce(UIntImm size, Expr source0, ..., Expr cond,
526	* Var reduce_temp0, .., Var thread_idx1, ...) {
527	* // constraint by the other thread_idx remain the same.
528	* // reduce_temp is used to save intermediate result.
529	* reduce_temp0, ... = reduce(combiner, source0, ..., cond
530	* over [thread_idx1, thread_idx2] passed by any caller)
531	* }
532	*/
533	TVM_DLL const Op& tvm_thread_allreduce();
534
535	// TODO(tvm-team) TensorCore specific intrinsics should be directly registered under
536	// cuda. namespace and used through op.
537	/!*
538	* \brief tvm intrinsic for tensor core load operators.
539	*
540	* void tvm_load_matrix_sync(Var fragment, UIntImm m, UIntImm, n, UIntImm k,
541	* Expr index, Expr buffer_ptr, Expr stride,
542	* StringImm layout) {
543	* // m, n, k are the shape of wmma fragment.
544	* // Determine fragment layout(column-major or row major) by layout.
545	* // fragments must be in 'wmma.matrix_a' or 'wmma.matrix_b' scope.
546	* nvcuda::wmma::load_matrix_sync(fragment[index], buffer_ptr, stride);
547	* }
548	*/
549	TVM_DLL const Op& tvm_load_matrix_sync();
550
551	/!*
552	* \brief tvm intrinsic for tensor core mma_sync operators.
553	*
554	* void tvm_mma_sync(Var fragment_d, Expr index_d,
555	* Var fragment_a, Expr index_a,
556	* Var fragment_b, Expr index_b,
557	* Var fragment_c, Expr index_c) {
558	* nvcuda::wmma::mma_sync(fragment_d[index_d], fragment_a[index_a],
559	* fragment_b[index_b], fragment_c[index_c]);
560	* }
561	*/
562	TVM_DLL const Op& tvm_mma_sync();
563
564	/!*
565	* \brief tvm intrinsic for tensor core bmma_sync operators.
566	*
567	* void tvm_bmma_sync(Var fragment_d, Expr index_d,
568	* Var fragment_a, Expr index_a,
569	* Var fragment_b, Expr index_b,
570	* Var fragment_c, Expr index_c) {
571	* nvcuda::wmma::bmma_sync(fragment_d[index_d], fragment_a[index_a],
572	* fragment_b[index_b], fragment_c[index_c]);
573	* }
574	*/
575	TVM_DLL const Op& tvm_bmma_sync();
576
577	/!*
578	* \brief tvm intrinsic for tensor core fill_fragment operators.
579	*
580	* void tvm_fill_fragment(Var fragment, UIntImm m, UIntImm, n, UIntImm k,
581	* Expr index, Expr value) {
582	* // m, n, k are the shape of wmma fragment
583	* // fragments must be in 'wmma.accumulator' scope.
584	* nvcuda::wmma::fill_fragment(fragment[index], value);
585	* }
586	*/
587	TVM_DLL const Op& tvm_fill_fragment();
588
589	/!*
590	* \brief tvm intrinsic for tensor core store operators.
591	*
592	* void tvm_store_matrix_sync(Var fragment, UIntImm m, UIntImm, n, UIntImm k,
593	* Expr index, Expr buffer_ptr, Expr stride,
594	* StringImm layout) {
595	* // m, n, k are the shape of wmma fragment
596	* // fragments must be in 'wmma.accumulator' scope.
597	* nvcuda::wmma::store_matrix_sync(fragment[index], buffer_ptr, stride, layout);
598	* }
599	*/
600	TVM_DLL const Op& tvm_store_matrix_sync();
601
602	/!*
603	* \brief tvm intrinsic for ptx tensor core mma instructions.
604	*
605	* void ptx_mma(StringImm shape, StringImm A_layout, StringImm B_layout,
606	* StringImm A_dtype, StringImm B_dtype, StringImm C_dtype,
607	* Var multiplicand_a, Expr a_index,
608	* Var multiplicand_b, Expr b_index,
609	* Var accumulator, Expr c_index, bool saturate);
610	*/
611	TVM_DLL const Op& ptx_mma();
612
613	/!*
614	* \brief tvm intrinsic for sparse tensor core ptx instructions.
615	*
616	* void ptx_mma_sp(StringImm shape, StringImm A_layout, StringImm B_layout,
617	* StringImm A_dtype, StringImm B_dtype, StringImm C_dtype,
618	* Var multiplicand_a, Expr a_index,
619	* Var multiplicand_b, Expr b_index,
620	* Var accumulator, Expr c_index,
621	* Var metadata, Expr meta_index,
622	* Var sparse_selector, bool saturate);
623	*/
624	TVM_DLL const Op& ptx_mma_sp();
625
626	/!*
627	* \brief tvm intrinsic for ptx load matrix from shared memory.
628	*
629	* void ptx_ldmatrix(Bool trans, IntImm num, StringImm type,
630	* Var local_ptr, Expr local_offset,
631	* Var smem_ptr, Expr smem_offset);
632	*/
633	TVM_DLL const Op& ptx_ldmatrix();
634
635	/!*
636	* \brief tvm intrinsics for ptx async copy from global to shared memory
637	*
638	* void ptx_cp_async(Var shared_ptr, Expr shared_offset, Var global_ptr, Expr global_offset, size_t
639	* bytes);
640	*
641	*/
642	TVM_DLL const Op& ptx_cp_async();
643
644	/!*
645	* \brief tvm intrinsics for ptx async copy commit and wait.
646	*
647	* void ptx_commit_group();
648	* void ptx_wait_group(int num);
649	*
650	*/
651	TVM_DLL const Op& ptx_commit_group();
652	TVM_DLL const Op& ptx_wait_group();
653
654	/!*
655	* \brief tvm intrinsic for storing the result of PTX MMA into a destination pointer.
656	* For example, if each thread in a warp of size 32 has 4 elements from the result of
657	* m16xn8xk16 MMA in its registers, this intrinsic can be used to store the result in a
658	* 16x8 region in shared or global memory.
659	*
660	* There is no real PTX instruction that does that, but we want to hide details of
661	* complex index manipulation behind this intrinsic to simplify TIR lowering passes (e.g.
662	* LowerWarpMemory).
663	*
664	* void mma_store(IntImm m, IntImm n, Var dst_ptr, Var src_ptr, Expr src_offset, Var dst_stride);
665	*/
666	TVM_DLL const Op& mma_store();
667
668	/!*
669	* \brief tvm intrinsic for zero-initalizing an MMA accumulation registor.
670	* For example, if each thread in a warp of size 32 has 8 elements from the A matrix in
671	* m16xn8xk16 MMA in its registers, this intrinsic can be used to zero-initialize its
672	* 4 accumulation registers.
673	*
674	* There is no real PTX instruction that does that, but we introduce this intrinsic for the
675	* same reason as mma_store above.
676	*
677	* void mma_fill(IntImm local_size, Var local_ptr, Expr offset);
678	*/
679	TVM_DLL const Op& mma_fill();
680
681	// TODO(tvm-team) replace the usage of the vector operations by Shuffle.
682	/!*
683	* \brief Get the high level half of the vector
684	*/
685	TVM_DLL const Op& vectorhigh();
686
687	/!*
688	* \brief Get the low-level half of the vector
689	*/
690	TVM_DLL const Op& vectorlow();
691
692	/!*
693	* \brief Concat two vectors.
694	*/
695	TVM_DLL const Op& vectorcombine();
696
697	/!*
698	* \brief atomic add instruction, corresponding e.g. to atomicAdd in CUDA
699	*/
700	TVM_DLL const Op& atomic_add();
701	/!*
702	* \brief Create an Nd memory allocation with storage scope
703	*/
704	TVM_DLL const Op& nd_mem_alloc_with_scope();
705
706	/!*
707	* \brief Store to texture 2d memory
708	*/
709	TVM_DLL const Op& texture2d_store();
710
711	/!*
712	* \brief Load from texture 2d memory
713	*/
714	TVM_DLL const Op& texture2d_load();
715
716	/!*
717	* \brief Initiate a non-blocking DMA copy from source to destination
718	*/
719	TVM_DLL const Op& dma_copy();
720
721	/!*
722	* \brief Wait until the number of DMAs in flight is less than or equal to some maximum
723	*/
724	TVM_DLL const Op& dma_wait();
725
726	/!*
727	* \brief Provide a true statement that can be used for simplifications
728	*
729	* Compile-time representation of known constraints about function
730	* inputs. This assumption is removed when lowering, and does not
731	* occur in codegen.
732	*/
733	TVM_DLL const Op& assume();
734
735	/!*
736	* \brief Returns an initialized but arbitrary value
737	*
738	* Compile-time representation of memory locations whose values may be
739	* altered as a result of optimizations.
740	*/
741	TVM_DLL const Op& undef();
742
743	/!*
744	* \brief Profiling intrinsic
745	*/
746	TVM_DLL const Op& start_profile_intrinsic();
747
748	/!*
749	* \brief Profiling intrinsic
750	*/
751	TVM_DLL const Op& end_profile_intrinsic();
752
753	/! \brief The kind of structure field info used in intrinsic /
754	enum TVMStructFieldKind : int {
755	// array head address
756	kArrAddr,
757	kArrData,
758	kArrShape,
759	kArrStrides,
760	kArrNDim,
761	kArrTypeCode,
762	kArrTypeBits,
763	kArrTypeLanes,
764	kArrByteOffset,
765	kArrDeviceId,
766	kArrDeviceType,
767	kArrKindBound_,
768	// TVMValue field
769	kTVMValueContent,
770	kTVMValueKindBound_
771	};
772	} // namespace builtin
773	} // namespace tir
774	} // namespace tvm
775	#endif // TVM_TIR_BUILTIN_H_
776

Browse the source code of tvm/include/tvm/tir/builtin.h