1 | #pragma once |
2 | |
3 | // Use relative path here for runtime compilation |
4 | #include "taichi/inc/constants.h" |
5 | |
6 | #if defined(TI_RUNTIME_HOST) |
7 | namespace taichi::lang { |
8 | #endif |
9 | |
10 | struct LLVMRuntime; |
11 | struct DeviceAllocation; |
12 | // "RuntimeContext" holds necessary data for kernel body execution, such as a |
13 | // pointer to the LLVMRuntime struct, kernel arguments, and the thread id (if on |
14 | // CPU). |
15 | struct RuntimeContext { |
16 | enum class DevAllocType : int8_t { |
17 | kNone = 0, |
18 | kNdarray = 1, |
19 | kTexture = 2, |
20 | kRWTexture = 3 |
21 | }; |
22 | |
23 | LLVMRuntime *runtime{nullptr}; |
24 | // args can contain: |
25 | // - primitive_types |
26 | // - raw ptrs: for external array, or torch-based ndarray |
27 | // - DeviceAllocation*: for taichi ndaray |
28 | uint64 args[taichi_max_num_args_total]; |
29 | uint64 grad_args[taichi_max_num_args_total]; |
30 | int32 [taichi_max_num_args_extra][taichi_max_num_indices]; |
31 | int32 cpu_thread_id; |
32 | |
33 | bool has_grad[taichi_max_num_args_total]; |
34 | |
35 | // Note that I've tried to group `array_runtime_size` and |
36 | // `is_device_allocations` into a small struct. However, it caused some test |
37 | // cases to stuck. |
38 | |
39 | // `array_runtime_size` records the runtime size of the |
40 | // corresponding array arguments. |
41 | uint64 array_runtime_sizes[taichi_max_num_args_total]{0}; |
42 | // `device_allocation_type` is set iff i-th arg is a `DeviceAllocation*`, |
43 | // otherwise it is set to DevAllocType::kNone |
44 | DevAllocType device_allocation_type[taichi_max_num_args_total]{ |
45 | DevAllocType::kNone}; |
46 | // We move the pointer of result buffer from LLVMRuntime to RuntimeContext |
47 | // because each real function need a place to store its result, but |
48 | // LLVMRuntime is shared among functions. So we moved the pointer to |
49 | // RuntimeContext which each function have one. |
50 | uint64 *result_buffer; |
51 | |
52 | static constexpr size_t = sizeof(extra_args); |
53 | |
54 | #if defined(TI_RUNTIME_HOST) |
55 | template <typename T> |
56 | T get_arg(int i) { |
57 | return taichi_union_cast_with_different_sizes<T>(args[i]); |
58 | } |
59 | |
60 | template <typename T> |
61 | T get_grad_arg(int i) { |
62 | return taichi_union_cast_with_different_sizes<T>(grad_args[i]); |
63 | } |
64 | |
65 | uint64 get_arg_as_uint64(int i) { |
66 | return args[i]; |
67 | } |
68 | |
69 | template <typename T> |
70 | void set_arg(int i, T v) { |
71 | args[i] = taichi_union_cast_with_different_sizes<uint64>(v); |
72 | set_array_device_allocation_type(i, DevAllocType::kNone); |
73 | } |
74 | |
75 | template <typename T> |
76 | void set_grad_arg(int i, T v) { |
77 | grad_args[i] = taichi_union_cast_with_different_sizes<uint64>(v); |
78 | } |
79 | |
80 | void set_array_runtime_size(int i, uint64 size) { |
81 | this->array_runtime_sizes[i] = size; |
82 | } |
83 | |
84 | void set_array_device_allocation_type(int i, DevAllocType usage) { |
85 | this->device_allocation_type[i] = usage; |
86 | } |
87 | |
88 | template <typename T> |
89 | T get_ret(int i) { |
90 | return taichi_union_cast_with_different_sizes<T>(result_buffer[i]); |
91 | } |
92 | |
93 | void set_arg_texture(int arg_id, intptr_t alloc_ptr) { |
94 | args[arg_id] = taichi_union_cast_with_different_sizes<uint64>(alloc_ptr); |
95 | set_array_device_allocation_type(arg_id, DevAllocType::kTexture); |
96 | } |
97 | |
98 | void set_arg_rw_texture(int arg_id, |
99 | intptr_t alloc_ptr, |
100 | const std::array<int, 3> &shape) { |
101 | args[arg_id] = taichi_union_cast_with_different_sizes<uint64>(alloc_ptr); |
102 | set_array_device_allocation_type(arg_id, DevAllocType::kRWTexture); |
103 | TI_ASSERT(shape.size() <= taichi_max_num_indices); |
104 | for (int i = 0; i < shape.size(); i++) { |
105 | extra_args[arg_id][i] = shape[i]; |
106 | } |
107 | } |
108 | |
109 | void set_arg_external_array(int arg_id, |
110 | uintptr_t ptr, |
111 | uint64 size, |
112 | const std::vector<int64> &shape) { |
113 | set_arg(arg_id, ptr); |
114 | set_array_runtime_size(arg_id, size); |
115 | set_array_device_allocation_type(arg_id, |
116 | RuntimeContext::DevAllocType::kNone); |
117 | for (uint64 i = 0; i < shape.size(); ++i) { |
118 | extra_args[arg_id][i] = shape[i]; |
119 | } |
120 | } |
121 | |
122 | void set_arg_ndarray(int arg_id, |
123 | intptr_t devalloc_ptr, |
124 | const std::vector<int> &shape, |
125 | bool has_grad = false, |
126 | intptr_t devalloc_ptr_grad = 0) { |
127 | // Set has_grad value |
128 | this->has_grad[arg_id] = has_grad; |
129 | |
130 | // Set args[arg_id] value |
131 | args[arg_id] = taichi_union_cast_with_different_sizes<uint64>(devalloc_ptr); |
132 | |
133 | // Set grad_args[arg_id] value |
134 | if (has_grad) { |
135 | grad_args[arg_id] = |
136 | taichi_union_cast_with_different_sizes<uint64>(devalloc_ptr_grad); |
137 | } |
138 | |
139 | // Set device allocation type and runtime size |
140 | set_array_device_allocation_type(arg_id, DevAllocType::kNdarray); |
141 | TI_ASSERT(shape.size() <= taichi_max_num_indices); |
142 | size_t total_size = 1; |
143 | for (int i = 0; i < shape.size(); i++) { |
144 | extra_args[arg_id][i] = shape[i]; |
145 | total_size *= shape[i]; |
146 | } |
147 | set_array_runtime_size(arg_id, total_size); |
148 | } |
149 | #endif |
150 | }; |
151 | |
152 | #if defined(TI_RUNTIME_HOST) |
153 | } // namespace taichi::lang |
154 | #endif |
155 | |