1#include <numeric>
2
3#include "taichi/program/ndarray.h"
4#include "taichi/program/program.h"
5
6#ifdef TI_WITH_LLVM
7#include "taichi/runtime/llvm/llvm_context.h"
8#include "taichi/runtime/program_impls/llvm/llvm_program.h"
9#endif
10
11namespace taichi::lang {
12
13namespace {
14
15size_t flatten_index(const std::vector<int> &shapes,
16 const std::vector<int> &indices) {
17 TI_ASSERT(shapes.size() == indices.size());
18 if (indices.size() == 1) {
19 return indices[0];
20 } else {
21 size_t ind = indices[0];
22 for (int i = 1; i < indices.size(); i++) {
23 ind = ind * shapes[i] + indices[i];
24 }
25 return ind;
26 }
27}
28} // namespace
29
30Ndarray::Ndarray(Program *prog,
31 const DataType type,
32 const std::vector<int> &shape_,
33 ExternalArrayLayout layout_)
34 : dtype(type),
35 shape(shape_),
36 layout(layout_),
37 nelement_(std::accumulate(std::begin(shape_),
38 std::end(shape_),
39 1,
40 std::multiplies<>())),
41 element_size_(data_type_size(dtype)),
42 prog_(prog) {
43 // Now that we have two shapes which may be concatenated differently
44 // depending on layout, total_shape_ comes handy.
45 total_shape_ = shape;
46 auto element_shape = data_type_shape(dtype);
47 if (layout == ExternalArrayLayout::kAOS) {
48 total_shape_.insert(total_shape_.end(), element_shape.begin(),
49 element_shape.end());
50 } else if (layout == ExternalArrayLayout::kSOA) {
51 total_shape_.insert(total_shape_.begin(), element_shape.begin(),
52 element_shape.end());
53 }
54 auto total_num_scalar =
55 std::accumulate(std::begin(total_shape_), std::end(total_shape_), 1LL,
56 std::multiplies<>());
57 if (total_num_scalar > std::numeric_limits<int>::max()) {
58 TI_WARN(
59 "Ndarray index might be out of int32 boundary but int64 indexing is "
60 "not supported yet.");
61 }
62 ndarray_alloc_ = prog->allocate_memory_ndarray(nelement_ * element_size_,
63 prog->result_buffer);
64}
65
66Ndarray::Ndarray(DeviceAllocation &devalloc,
67 const DataType type,
68 const std::vector<int> &shape,
69 ExternalArrayLayout layout)
70 : ndarray_alloc_(devalloc),
71 dtype(type),
72 shape(shape),
73 layout(layout),
74 nelement_(std::accumulate(std::begin(shape),
75 std::end(shape),
76 1,
77 std::multiplies<>())),
78 element_size_(data_type_size(dtype)) {
79 // When element_shape is specified but layout is not, default layout is AOS.
80 auto element_shape = data_type_shape(dtype);
81 if (!element_shape.empty() && layout == ExternalArrayLayout::kNull) {
82 layout = ExternalArrayLayout::kAOS;
83 }
84 // Now that we have two shapes which may be concatenated differently
85 // depending on layout, total_shape_ comes handy.
86 total_shape_ = shape;
87 if (layout == ExternalArrayLayout::kAOS) {
88 total_shape_.insert(total_shape_.end(), element_shape.begin(),
89 element_shape.end());
90 } else if (layout == ExternalArrayLayout::kSOA) {
91 total_shape_.insert(total_shape_.begin(), element_shape.begin(),
92 element_shape.end());
93 }
94 auto total_num_scalar =
95 std::accumulate(std::begin(total_shape_), std::end(total_shape_), 1LL,
96 std::multiplies<>());
97 if (total_num_scalar > std::numeric_limits<int>::max()) {
98 TI_WARN(
99 "Ndarray index might be out of int32 boundary but int64 indexing is "
100 "not supported yet.");
101 }
102}
103
104Ndarray::Ndarray(DeviceAllocation &devalloc,
105 const DataType type,
106 const std::vector<int> &shape,
107 const std::vector<int> &element_shape,
108 ExternalArrayLayout layout)
109 : Ndarray(devalloc,
110 TypeFactory::create_tensor_type(element_shape, type),
111 shape,
112 layout) {
113 TI_ASSERT(type->is<PrimitiveType>());
114}
115
116Ndarray::~Ndarray() {
117 if (prog_) {
118 // prog_->flush();
119 ndarray_alloc_.device->dealloc_memory(ndarray_alloc_);
120 }
121}
122
123intptr_t Ndarray::get_device_allocation_ptr_as_int() const {
124 // taichi's own ndarray's ptr points to its |DeviceAllocation| on the
125 // specified device. Note that torch-based ndarray's ptr is a raw ptr but
126 // we'll get rid of it soon.
127 return reinterpret_cast<intptr_t>(&ndarray_alloc_);
128}
129
130DeviceAllocation Ndarray::get_device_allocation() const {
131 return ndarray_alloc_;
132}
133
134std::vector<int> Ndarray::get_element_shape() const {
135 return data_type_shape(dtype);
136}
137
138DataType Ndarray::get_element_data_type() const {
139 if (dtype->is<TensorType>()) {
140 return dtype->cast<TensorType>()->get_element_type();
141 }
142 return dtype;
143}
144
145std::size_t Ndarray::get_element_size() const {
146 return element_size_;
147}
148
149std::size_t Ndarray::get_nelement() const {
150 return nelement_;
151}
152
153TypedConstant Ndarray::read(const std::vector<int> &I) const {
154 prog_->synchronize();
155 size_t index = flatten_index(total_shape_, I);
156 size_t size = data_type_size(get_element_data_type());
157 taichi::lang::Device::AllocParams alloc_params;
158 alloc_params.host_write = false;
159 alloc_params.host_read = true;
160 alloc_params.size = size;
161 alloc_params.usage = taichi::lang::AllocUsage::Storage;
162 auto staging_buf_ =
163 this->ndarray_alloc_.device->allocate_memory_unique(alloc_params);
164 staging_buf_->device->memcpy_internal(
165 staging_buf_->get_ptr(),
166 this->ndarray_alloc_.get_ptr(/*offset=*/index * size), size);
167
168 char *device_arr_ptr{nullptr};
169 TI_ASSERT(staging_buf_->device->map(
170 *staging_buf_, (void **)&device_arr_ptr) == RhiResult::success);
171
172 TypedConstant data(get_element_data_type());
173 std::memcpy(&data.value_bits, device_arr_ptr, size);
174 staging_buf_->device->unmap(*staging_buf_);
175 return data;
176}
177
178template <typename T>
179void Ndarray::write(const std::vector<int> &I, T val) const {
180 size_t index = flatten_index(total_shape_, I);
181 size_t size_ = sizeof(T);
182 taichi::lang::Device::AllocParams alloc_params;
183 alloc_params.host_write = true;
184 alloc_params.host_read = false;
185 alloc_params.size = size_;
186 alloc_params.usage = taichi::lang::AllocUsage::Storage;
187 auto staging_buf_ =
188 this->ndarray_alloc_.device->allocate_memory_unique(alloc_params);
189
190 T *device_arr_ptr{nullptr};
191 TI_ASSERT(staging_buf_->device->map(
192 *staging_buf_, (void **)&device_arr_ptr) == RhiResult::success);
193
194 TI_ASSERT(device_arr_ptr);
195 device_arr_ptr[0] = val;
196
197 staging_buf_->device->unmap(*staging_buf_);
198 staging_buf_->device->memcpy_internal(
199 this->ndarray_alloc_.get_ptr(index * sizeof(T)), staging_buf_->get_ptr(),
200 size_);
201
202 prog_->synchronize();
203}
204
205int64 Ndarray::read_int(const std::vector<int> &i) {
206 return read(i).val_int();
207}
208
209uint64 Ndarray::read_uint(const std::vector<int> &i) {
210 return read(i).val_uint();
211}
212
213float64 Ndarray::read_float(const std::vector<int> &i) {
214 return read(i).val_float();
215}
216
217void Ndarray::write_int(const std::vector<int> &i, int64 val) {
218 write<int>(i, val);
219}
220
221void Ndarray::write_float(const std::vector<int> &i, float64 val) {
222 write<float>(i, val);
223}
224
225} // namespace taichi::lang
226