ndarray.cpp source code [taichi/taichi/program/ndarray.cpp]

1	#include <numeric>
2
3	#include "taichi/program/ndarray.h"
4	#include "taichi/program/program.h"
5
6	#ifdef TI_WITH_LLVM
7	#include "taichi/runtime/llvm/llvm_context.h"
8	#include "taichi/runtime/program_impls/llvm/llvm_program.h"
9	#endif
10
11	namespace taichi::lang {
12
13	namespace {
14
15	size_t flatten_index(const std::vector<int> &shapes,
16	const std::vector<int> &indices) {
17	TI_ASSERT(shapes.size() == indices.size());
18	if (indices.size() == `1`) {
19	return indices [`0`];
20	} else {
21	size_t ind = indices [`0`];
22	for (int i = `1`; i < indices.size(); i++) {
23	ind = ind * shapes [i] + indices [i];
24	}
25	return ind;
26	}
27	}
28	} // namespace
29
30	Ndarray::Ndarray(Program *prog,
31	const DataType type,
32	const std::vector<int> &shape_,
33	ExternalArrayLayout layout_)
34	: dtype (type),
35	shape (shape_),
36	layout(layout_),
37	nelement_(std::accumulate(std::begin(shape_),
38	std::end(shape_),
39	`1`,
40	std::multiplies<>())),
41	element_size_(data_type_size(dtype)),
42	prog_(prog) {
43	// Now that we have two shapes which may be concatenated differently
44	// depending on layout, total_shape_ comes handy.
45	total_shape_ = shape;
46	auto element_shape = data_type_shape(dtype);
47	if (layout == ExternalArrayLayout::kAOS) {
48	total_shape_.insert(total_shape_.end(), element_shape.begin(),
49	element_shape.end());
50	} else if (layout == ExternalArrayLayout::kSOA) {
51	total_shape_.insert(total_shape_.begin(), element_shape.begin(),
52	element_shape.end());
53	}
54	auto total_num_scalar =
55	std::accumulate(std::begin(total_shape_), std::end(total_shape_), `1LL`,
56	std::multiplies<>());
57	if (total_num_scalar > std::numeric_limits<int>::max()) {
58	TI_WARN(
59	"Ndarray index might be out of int32 boundary but int64 indexing is "
60	"not supported yet.");
61	}
62	ndarray_alloc_ = prog->allocate_memory_ndarray(nelement_ * element_size_,
63	prog->result_buffer);
64	}
65
66	Ndarray::Ndarray(DeviceAllocation &devalloc,
67	const DataType type,
68	const std::vector<int> &shape,
69	ExternalArrayLayout layout)
70	: ndarray_alloc_(devalloc),
71	dtype (type),
72	shape (shape),
73	layout(layout),
74	nelement_(std::accumulate(std::begin(shape),
75	std::end(shape),
76	`1`,
77	std::multiplies<>())),
78	element_size_(data_type_size(dtype)) {
79	// When element_shape is specified but layout is not, default layout is AOS.
80	auto element_shape = data_type_shape(dtype);
81	if (!element_shape.empty() && layout == ExternalArrayLayout::kNull) {
82	layout = ExternalArrayLayout::kAOS;
83	}
84	// Now that we have two shapes which may be concatenated differently
85	// depending on layout, total_shape_ comes handy.
86	total_shape_ = shape;
87	if (layout == ExternalArrayLayout::kAOS) {
88	total_shape_.insert(total_shape_.end(), element_shape.begin(),
89	element_shape.end());
90	} else if (layout == ExternalArrayLayout::kSOA) {
91	total_shape_.insert(total_shape_.begin(), element_shape.begin(),
92	element_shape.end());
93	}
94	auto total_num_scalar =
95	std::accumulate(std::begin(total_shape_), std::end(total_shape_), `1LL`,
96	std::multiplies<>());
97	if (total_num_scalar > std::numeric_limits<int>::max()) {
98	TI_WARN(
99	"Ndarray index might be out of int32 boundary but int64 indexing is "
100	"not supported yet.");
101	}
102	}
103
104	Ndarray::Ndarray(DeviceAllocation &devalloc,
105	const DataType type,
106	const std::vector<int> &shape,
107	const std::vector<int> &element_shape,
108	ExternalArrayLayout layout)
109	: Ndarray (devalloc,
110	TypeFactory::create_tensor_type(element_shape, type),
111	shape,
112	layout) {
113	TI_ASSERT(type ->is<PrimitiveType>());
114	}
115
116	Ndarray::~Ndarray() {
117	if (prog_) {
118	// prog_->flush();
119	ndarray_alloc_.device->dealloc_memory(ndarray_alloc_);
120	}
121	}
122
123	intptr_t Ndarray::get_device_allocation_ptr_as_int() const {
124	// taichi's own ndarray's ptr points to its \|DeviceAllocation\| on the
125	// specified device. Note that torch-based ndarray's ptr is a raw ptr but
126	// we'll get rid of it soon.
127	return reinterpret_cast<intptr_t>(&ndarray_alloc_);
128	}
129
130	DeviceAllocation Ndarray::get_device_allocation() const {
131	return ndarray_alloc_;
132	}
133
134	std::vector<int> Ndarray::get_element_shape() const {
135	return data_type_shape(dtype);
136	}
137
138	DataType Ndarray::get_element_data_type() const {
139	if (dtype ->is<TensorType>()) {
140	return dtype ->cast<TensorType>()->get_element_type();
141	}
142	return dtype;
143	}
144
145	std::size_t Ndarray::get_element_size() const {
146	return element_size_;
147	}
148
149	std::size_t Ndarray::get_nelement() const {
150	return nelement_;
151	}
152
153	TypedConstant Ndarray::read(const std::vector<int> &I) const {
154	prog_->synchronize();
155	size_t index = flatten_index(total_shape_, I);
156	size_t size = data_type_size(get_element_data_type());
157	taichi::lang::Device::AllocParams alloc_params;
158	alloc_params.host_write = false;
159	alloc_params.host_read = true;
160	alloc_params.size = size;
161	alloc_params.usage = taichi::lang::AllocUsage::Storage;
162	auto staging_buf_ =
163	this->ndarray_alloc_.device->allocate_memory_unique(alloc_params);
164	staging_buf_->device->memcpy_internal(
165	staging_buf_->get_ptr(),
166	this->ndarray_alloc_.get_ptr(/offset=/index * size), size);
167
168	char device_arr_ptr{nullptr*};
169	TI_ASSERT(staging_buf_->device->map(
170	staging_buf_, (void* **)&device_arr_ptr) == RhiResult::success);
171
172	TypedConstant data(get_element_data_type());
173	std::memcpy(&data.value_bits, device_arr_ptr, size);
174	staging_buf_->device->unmap(*staging_buf_);
175	return data;
176	}
177
178	template <typename T>
179	void Ndarray::write(const std::vector<int> &I, T val) const {
180	size_t index = flatten_index(total_shape_, I);
181	size_t size_ = sizeof(T);
182	taichi::lang::Device::AllocParams alloc_params;
183	alloc_params.host_write = true;
184	alloc_params.host_read = false;
185	alloc_params.size = size_;
186	alloc_params.usage = taichi::lang::AllocUsage::Storage;
187	auto staging_buf_ =
188	this->ndarray_alloc_.device->allocate_memory_unique(alloc_params);
189
190	T device_arr_ptr{nullptr*};
191	TI_ASSERT(staging_buf_->device->map(
192	staging_buf_, (void* **)&device_arr_ptr) == RhiResult::success);
193
194	TI_ASSERT(device_arr_ptr);
195	device_arr_ptr[`0`] = val;
196
197	staging_buf_->device->unmap(*staging_buf_);
198	staging_buf_->device->memcpy_internal(
199	this->ndarray_alloc_.get_ptr(index * sizeof(T)), staging_buf_->get_ptr(),
200	size_);
201
202	prog_->synchronize();
203	}
204
205	int64 Ndarray::read_int(const std::vector<int> &i) {
206	return read(i).val_int();
207	}
208
209	uint64 Ndarray::read_uint(const std::vector<int> &i) {
210	return read(i).val_uint();
211	}
212
213	float64 Ndarray::read_float(const std::vector<int> &i) {
214	return read(i).val_float();
215	}
216
217	void Ndarray::write_int(const std::vector<int> &i, int64 val) {
218	write<int>(i, val);
219	}
220
221	void Ndarray::write_float(const std::vector<int> &i, float64 val) {
222	write<float>(i, val);
223	}
224
225	} // namespace taichi::lang
226

Browse the source code of taichi/taichi/program/ndarray.cpp