device.cpp source code [taichi/taichi/rhi/device.cpp]

1	#include <taichi/rhi/device.h>
2
3	#if TI_WITH_VULKAN
4	#include <taichi/rhi/vulkan/vulkan_device.h>
5	#include <taichi/rhi/interop/vulkan_cpu_interop.h>
6	#if TI_WITH_LLVM
7	#include <taichi/rhi/cpu/cpu_device.h>
8	#endif
9	#if TI_WITH_CUDA
10	#include <taichi/rhi/cuda/cuda_device.h>
11	#include <taichi/rhi/interop/vulkan_cuda_interop.h>
12	#endif // TI_WITH_CUDA
13	#endif // TI_WITH_VULKAN
14
15	namespace taichi::lang {
16
17	DeviceAllocationGuard::~DeviceAllocationGuard() {
18	device->dealloc_memory(*this);
19	}
20
21	DeviceImageGuard::~DeviceImageGuard() {
22	dynamic_cast<GraphicsDevice >(device)->destroy_image(this);
23	}
24
25	DevicePtr DeviceAllocation::get_ptr(uint64_t offset) const {
26	return DevicePtr{{device, alloc_id}, offset};
27	}
28
29	Device::MemcpyCapability Device::check_memcpy_capability(DevicePtr dst,
30	DevicePtr src,
31	uint64_t size) {
32	if (dst.device == src.device) {
33	return Device::MemcpyCapability::Direct;
34	}
35
36	#if TI_WITH_VULKAN
37	#if TI_WITH_LLVM
38	if (dynamic_cast<vulkan::VulkanDevice *>(dst.device) &&
39	dynamic_cast<cpu::CpuDevice *>(src.device)) {
40	// TODO: support direct copy if dst itself supports host write.
41	return Device::MemcpyCapability::RequiresStagingBuffer;
42	} else if (dynamic_cast<cpu::CpuDevice *>(dst.device) &&
43	dynamic_cast<vulkan::VulkanDevice *>(src.device)) {
44	return Device::MemcpyCapability::RequiresStagingBuffer;
45	}
46	#endif
47	#if TI_WITH_CUDA
48	if (dynamic_cast<vulkan::VulkanDevice *>(dst.device) &&
49	dynamic_cast<cuda::CudaDevice *>(src.device)) {
50	// FIXME: direct copy isn't always possible.
51	// The vulkan buffer needs export_sharing turned on.
52	// Otherwise, needs staging buffer
53	return Device::MemcpyCapability::Direct;
54	} else if (dynamic_cast<cuda::CudaDevice *>(dst.device) &&
55	dynamic_cast<vulkan::VulkanDevice *>(src.device)) {
56	return Device::MemcpyCapability::Direct;
57	}
58	#endif // TI_WITH_CUDA
59	#endif // TI_WITH_VULKAN
60	return Device::MemcpyCapability::RequiresHost;
61	}
62
63	void Device::memcpy_direct(DevicePtr dst, DevicePtr src, uint64_t size) {
64	// Intra-device copy
65	if (dst.device == src.device) {
66	dst.device->memcpy_internal(dst, src, size);
67	return;
68	}
69	#if TI_WITH_VULKAN && TI_WITH_LLVM
70	// cross-device copy directly
71	else if (dynamic_cast<vulkan::VulkanDevice *>(dst.device) &&
72	dynamic_cast<cpu::CpuDevice *>(src.device)) {
73	memcpy_cpu_to_vulkan(dst, src, size);
74	return;
75	}
76	#endif
77	#if TI_WITH_VULKAN && TI_WITH_CUDA
78	if (dynamic_cast<vulkan::VulkanDevice *>(dst.device) &&
79	dynamic_cast<cuda::CudaDevice *>(src.device)) {
80	memcpy_cuda_to_vulkan(dst, src, size);
81	return;
82	} else if (dynamic_cast<cuda::CudaDevice *>(dst.device) &&
83	dynamic_cast<vulkan::VulkanDevice *>(src.device)) {
84	memcpy_vulkan_to_cuda(dst, src, size);
85	return;
86	}
87	#endif
88	TI_NOT_IMPLEMENTED;
89	}
90
91	void Device::memcpy_via_staging(DevicePtr dst,
92	DevicePtr staging,
93	DevicePtr src,
94	uint64_t size) {
95	// Inter-device copy
96	#if defined(TI_WITH_VULKAN) && defined(TI_WITH_LLVM)
97	if (dynamic_cast<vulkan::VulkanDevice *>(dst.device) &&
98	dynamic_cast<cpu::CpuDevice *>(src.device)) {
99	memcpy_cpu_to_vulkan_via_staging(dst, staging, src, size);
100	return;
101	}
102	#endif
103
104	TI_NOT_IMPLEMENTED;
105	}
106
107	void Device::memcpy_via_host(DevicePtr dst,
108	void *host_buffer,
109	DevicePtr src,
110	uint64_t size) {
111	TI_NOT_IMPLEMENTED;
112	}
113
114	void GraphicsDevice::image_transition(DeviceAllocation img,
115	ImageLayout old_layout,
116	ImageLayout new_layout) {
117	Stream *stream = get_graphics_stream();
118	auto [cmd_list, res] = stream->new_command_list_unique();
119	TI_ASSERT(res == RhiResult::success);
120	cmd_list ->image_transition(img, old_layout, new_layout);
121	stream->submit_synced(cmd_list.get());
122	}
123	void GraphicsDevice::buffer_to_image(DeviceAllocation dst_img,
124	DevicePtr src_buf,
125	ImageLayout img_layout,
126	const BufferImageCopyParams &params) {
127	Stream *stream = get_graphics_stream();
128	auto [cmd_list, res] = stream->new_command_list_unique();
129	TI_ASSERT(res == RhiResult::success);
130	cmd_list ->buffer_to_image(dst_img, src_buf, img_layout, params);
131	stream->submit_synced(cmd_list.get());
132	}
133	void GraphicsDevice::image_to_buffer(DevicePtr dst_buf,
134	DeviceAllocation src_img,
135	ImageLayout img_layout,
136	const BufferImageCopyParams &params) {
137	Stream *stream = get_graphics_stream();
138	auto [cmd_list, res] = stream->new_command_list_unique();
139	TI_ASSERT(res == RhiResult::success);
140	cmd_list ->image_to_buffer(dst_buf, src_img, img_layout, params);
141	stream->submit_synced(cmd_list.get());
142	}
143
144	RhiResult Device::upload_data(DevicePtr *device_ptr,
145	const void **data,
146	size_t *size,
147	int num_alloc) noexcept {
148	if (!device_ptr \|\| !data \|\| !size) {
149	return RhiResult::invalid_usage;
150	}
151
152	std::vector<DeviceAllocationUnique> stagings;
153	for (int i = `0`; i < num_alloc; i++) {
154	if (device_ptr[i].device != this \|\| !data[i]) {
155	return RhiResult::invalid_usage;
156	}
157	DeviceAllocationUnique staging = this->allocate_memory_unique(
158	{size[i], /host_write=/true, /host_read=/false,
159	/export_sharing=/false, AllocUsage::Upload});
160
161	void mapped{nullptr*};
162	RhiResult res = this->map(*staging, &mapped);
163	if (res != RhiResult::success) {
164	return res;
165	}
166	memcpy(mapped, data[i], size[i]);
167	this->unmap(*staging);
168
169	stagings.push_back(std::move(staging));
170	}
171
172	Stream s = this*->get_compute_stream();
173	auto [cmdlist, res] = s->new_command_list_unique();
174	if (res != RhiResult::success) {
175	return res;
176	}
177	for (int i = `0`; i < num_alloc; i++) {
178	cmdlist ->buffer_copy(device_ptr[i], stagings [i]->get_ptr(`0`), size[i]);
179	}
180	s->submit_synced(cmdlist.get());
181
182	return RhiResult::success;
183	}
184
185	RhiResult Device::readback_data(
186	DevicePtr *device_ptr,
187	void **data,
188	size_t *size,
189	int num_alloc,
190	const std::vector<StreamSemaphore> &wait_sema) noexcept {
191	if (!device_ptr \|\| !data \|\| !size) {
192	return RhiResult::invalid_usage;
193	}
194
195	Stream s = this*->get_compute_stream();
196	auto [cmdlist, res] = s->new_command_list_unique();
197	if (res != RhiResult::success) {
198	return res;
199	}
200
201	std::vector<DeviceAllocationUnique> stagings;
202	for (int i = `0`; i < num_alloc; i++) {
203	if (device_ptr[i].device != this \|\| !data[i]) {
204	return RhiResult::invalid_usage;
205	}
206	DeviceAllocationUnique staging = this->allocate_memory_unique(
207	{size[i], /host_write=/false, /host_read=/true,
208	/export_sharing=/false, AllocUsage::None});
209
210	cmdlist ->buffer_copy(staging ->get_ptr(`0`), device_ptr[i], size[i]);
211	stagings.push_back(std::move(staging));
212	}
213	s->submit_synced(cmdlist.get(), wait_sema);
214
215	for (int i = `0`; i < num_alloc; i++) {
216	void mapped{nullptr*};
217	RhiResult res = this->map(*stagings [i], &mapped);
218	if (res != RhiResult::success) {
219	return res;
220	}
221	memcpy(data[i], mapped, size[i]);
222	this->unmap(*stagings [i]);
223	}
224
225	return RhiResult::success;
226	}
227
228	} // namespace taichi::lang
229

Browse the source code of taichi/taichi/rhi/device.cpp