cuda_device.h source code [taichi/taichi/rhi/cuda/cuda_device.h]

1	#pragma once
2	#include <vector>
3	#include <set>
4
5	#include "taichi/common/core.h"
6	#include "taichi/rhi/cuda/cuda_driver.h"
7	#include "taichi/rhi/cuda/cuda_caching_allocator.h"
8	#include "taichi/rhi/cuda/cuda_context.h"
9	#include "taichi/rhi/llvm/llvm_device.h"
10
11	namespace taichi::lang {
12	namespace cuda {
13
14	class CudaPipeline : public Pipeline {
15	public:
16	~CudaPipeline() override {
17	}
18	};
19
20	class CudaCommandList : public CommandList {
21	public:
22	~CudaCommandList() override {
23	}
24
25	void bind_pipeline(Pipeline p) noexcept* override{TI_NOT_IMPLEMENTED};
26	RhiResult bind_shader_resources(ShaderResourceSet *res,
27	int set_index = `0`) noexcept final{
28	TI_NOT_IMPLEMENTED};
29	RhiResult bind_raster_resources(RasterResources res) noexcept* final{
30	TI_NOT_IMPLEMENTED};
31	void buffer_barrier(DevicePtr ptr,
32	size_t size) noexcept override{TI_NOT_IMPLEMENTED};
33	void buffer_barrier(DeviceAllocation alloc) noexcept override{
34	TI_NOT_IMPLEMENTED};
35	void memory_barrier() noexcept override{TI_NOT_IMPLEMENTED};
36	void buffer_copy(DevicePtr dst, DevicePtr src, size_t size) noexcept override{
37	TI_NOT_IMPLEMENTED};
38	void buffer_fill(DevicePtr ptr, size_t size, uint32_t data) noexcept override{
39	TI_NOT_IMPLEMENTED};
40	RhiResult dispatch(uint32_t x,
41	uint32_t y = `1`,
42	uint32_t z = `1`) noexcept override{TI_NOT_IMPLEMENTED};
43	};
44
45	class CudaStream : public Stream {
46	public:
47	~CudaStream() override{};
48
49	RhiResult new_command_list(CommandList out_cmdlist) noexcept** final{
50	TI_NOT_IMPLEMENTED};
51	StreamSemaphore submit(CommandList *cmdlist,
52	const std::vector<StreamSemaphore> &wait_semaphores =
53	{}) override{TI_NOT_IMPLEMENTED};
54	StreamSemaphore submit_synced(
55	CommandList *cmdlist,
56	const std::vector<StreamSemaphore> &wait_semaphores = {}) override{
57	TI_NOT_IMPLEMENTED};
58
59	void command_sync() override{TI_NOT_IMPLEMENTED};
60	};
61
62	class CudaDevice : public LlvmDevice {
63	public:
64	struct AllocInfo {
65	void ptr{nullptr*};
66	size_t size{`0`};
67	bool is_imported{false};
68	/ Note: Memory allocation in CUDA device.*
69	* CudaDevice can use either its own cuda malloc mechanism via
70	* `allocate_memory` or the preallocated memory managed by Llvmprogramimpl
71	* via `allocate_memory_runtime`. The `use_preallocated` is used to track
72	* this option. For now, we keep both options and the preallocated method is
73	* used by default for CUDA backend. The `use_cached` is to enable/disable
74	* the caching behavior in `allocate_memory_runtime`. Later it should be
75	* always enabled, for now we keep both options to allow a scenario when
76	* using preallocated memory while disabling the caching behavior.
77	* */
78	bool use_preallocated{true};
79	bool use_cached{false};
80	void mapped{nullptr*};
81	};
82
83	AllocInfo get_alloc_info(const DeviceAllocation handle);
84
85	~CudaDevice() override{};
86
87	DeviceAllocation allocate_memory(const AllocParams &params) override;
88	DeviceAllocation allocate_memory_runtime(
89	const LlvmRuntimeAllocParams &params) override;
90	void dealloc_memory(DeviceAllocation handle) override;
91
92	RhiResult upload_data(DevicePtr *device_ptr,
93	const void **data,
94	size_t *size,
95	int num_alloc = `1`) noexcept override;
96
97	RhiResult readback_data(
98	DevicePtr *device_ptr,
99	void **data,
100	size_t *size,
101	int num_alloc = `1`,
102	const std::vector<StreamSemaphore> &wait_sema = {}) noexcept override;
103
104	ShaderResourceSet *create_resource_set() final{TI_NOT_IMPLEMENTED};
105
106	RhiResult create_pipeline(Pipeline **out_pipeline,
107	const PipelineSourceDesc &src,
108	std::string name,
109	PipelineCache cache) noexcept* final {
110	TI_NOT_IMPLEMENTED;
111	}
112
113	uint64 fetch_result_uint64(int i, uint64 *result_buffer) override;
114
115	RhiResult map_range(DevicePtr ptr, uint64_t size, void **mapped_ptr) final {
116	TI_NOT_IMPLEMENTED;
117	}
118	RhiResult map(DeviceAllocation alloc, void **mapped_ptr) final;
119
120	void unmap(DevicePtr ptr) final{TI_NOT_IMPLEMENTED};
121	void unmap(DeviceAllocation alloc) final;
122
123	void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) override;
124
125	DeviceAllocation import_memory(void *ptr, size_t size);
126
127	Stream *get_compute_stream() override{TI_NOT_IMPLEMENTED};
128
129	void wait_idle() override{TI_NOT_IMPLEMENTED};
130
131	private:
132	std::vector<AllocInfo> allocations_;
133	void validate_device_alloc(const DeviceAllocation alloc) {
134	if (allocations_.size() <= alloc.alloc_id) {
135	TI_ERROR("invalid DeviceAllocation");
136	}
137	}
138	std::unique_ptr<CudaCachingAllocator> caching_allocator_{nullptr};
139	};
140
141	} // namespace cuda
142
143	} // namespace taichi::lang
144

Browse the source code of taichi/taichi/rhi/cuda/cuda_device.h