1#pragma once
2
3// Use relative path here for runtime compilation
4#include "taichi/inc/constants.h"
5
6#if defined(TI_RUNTIME_HOST)
7namespace taichi::lang {
8#endif
9
10struct LLVMRuntime;
11struct DeviceAllocation;
12// "RuntimeContext" holds necessary data for kernel body execution, such as a
13// pointer to the LLVMRuntime struct, kernel arguments, and the thread id (if on
14// CPU).
15struct RuntimeContext {
16 enum class DevAllocType : int8_t {
17 kNone = 0,
18 kNdarray = 1,
19 kTexture = 2,
20 kRWTexture = 3
21 };
22
23 LLVMRuntime *runtime{nullptr};
24 // args can contain:
25 // - primitive_types
26 // - raw ptrs: for external array, or torch-based ndarray
27 // - DeviceAllocation*: for taichi ndaray
28 uint64 args[taichi_max_num_args_total];
29 uint64 grad_args[taichi_max_num_args_total];
30 int32 extra_args[taichi_max_num_args_extra][taichi_max_num_indices];
31 int32 cpu_thread_id;
32
33 bool has_grad[taichi_max_num_args_total];
34
35 // Note that I've tried to group `array_runtime_size` and
36 // `is_device_allocations` into a small struct. However, it caused some test
37 // cases to stuck.
38
39 // `array_runtime_size` records the runtime size of the
40 // corresponding array arguments.
41 uint64 array_runtime_sizes[taichi_max_num_args_total]{0};
42 // `device_allocation_type` is set iff i-th arg is a `DeviceAllocation*`,
43 // otherwise it is set to DevAllocType::kNone
44 DevAllocType device_allocation_type[taichi_max_num_args_total]{
45 DevAllocType::kNone};
46 // We move the pointer of result buffer from LLVMRuntime to RuntimeContext
47 // because each real function need a place to store its result, but
48 // LLVMRuntime is shared among functions. So we moved the pointer to
49 // RuntimeContext which each function have one.
50 uint64 *result_buffer;
51
52 static constexpr size_t extra_args_size = sizeof(extra_args);
53
54#if defined(TI_RUNTIME_HOST)
55 template <typename T>
56 T get_arg(int i) {
57 return taichi_union_cast_with_different_sizes<T>(args[i]);
58 }
59
60 template <typename T>
61 T get_grad_arg(int i) {
62 return taichi_union_cast_with_different_sizes<T>(grad_args[i]);
63 }
64
65 uint64 get_arg_as_uint64(int i) {
66 return args[i];
67 }
68
69 template <typename T>
70 void set_arg(int i, T v) {
71 args[i] = taichi_union_cast_with_different_sizes<uint64>(v);
72 set_array_device_allocation_type(i, DevAllocType::kNone);
73 }
74
75 template <typename T>
76 void set_grad_arg(int i, T v) {
77 grad_args[i] = taichi_union_cast_with_different_sizes<uint64>(v);
78 }
79
80 void set_array_runtime_size(int i, uint64 size) {
81 this->array_runtime_sizes[i] = size;
82 }
83
84 void set_array_device_allocation_type(int i, DevAllocType usage) {
85 this->device_allocation_type[i] = usage;
86 }
87
88 template <typename T>
89 T get_ret(int i) {
90 return taichi_union_cast_with_different_sizes<T>(result_buffer[i]);
91 }
92
93 void set_arg_texture(int arg_id, intptr_t alloc_ptr) {
94 args[arg_id] = taichi_union_cast_with_different_sizes<uint64>(alloc_ptr);
95 set_array_device_allocation_type(arg_id, DevAllocType::kTexture);
96 }
97
98 void set_arg_rw_texture(int arg_id,
99 intptr_t alloc_ptr,
100 const std::array<int, 3> &shape) {
101 args[arg_id] = taichi_union_cast_with_different_sizes<uint64>(alloc_ptr);
102 set_array_device_allocation_type(arg_id, DevAllocType::kRWTexture);
103 TI_ASSERT(shape.size() <= taichi_max_num_indices);
104 for (int i = 0; i < shape.size(); i++) {
105 extra_args[arg_id][i] = shape[i];
106 }
107 }
108
109 void set_arg_external_array(int arg_id,
110 uintptr_t ptr,
111 uint64 size,
112 const std::vector<int64> &shape) {
113 set_arg(arg_id, ptr);
114 set_array_runtime_size(arg_id, size);
115 set_array_device_allocation_type(arg_id,
116 RuntimeContext::DevAllocType::kNone);
117 for (uint64 i = 0; i < shape.size(); ++i) {
118 extra_args[arg_id][i] = shape[i];
119 }
120 }
121
122 void set_arg_ndarray(int arg_id,
123 intptr_t devalloc_ptr,
124 const std::vector<int> &shape,
125 bool has_grad = false,
126 intptr_t devalloc_ptr_grad = 0) {
127 // Set has_grad value
128 this->has_grad[arg_id] = has_grad;
129
130 // Set args[arg_id] value
131 args[arg_id] = taichi_union_cast_with_different_sizes<uint64>(devalloc_ptr);
132
133 // Set grad_args[arg_id] value
134 if (has_grad) {
135 grad_args[arg_id] =
136 taichi_union_cast_with_different_sizes<uint64>(devalloc_ptr_grad);
137 }
138
139 // Set device allocation type and runtime size
140 set_array_device_allocation_type(arg_id, DevAllocType::kNdarray);
141 TI_ASSERT(shape.size() <= taichi_max_num_indices);
142 size_t total_size = 1;
143 for (int i = 0; i < shape.size(); i++) {
144 extra_args[arg_id][i] = shape[i];
145 total_size *= shape[i];
146 }
147 set_array_runtime_size(arg_id, total_size);
148 }
149#endif
150};
151
152#if defined(TI_RUNTIME_HOST)
153} // namespace taichi::lang
154#endif
155