1 | #pragma once |
2 | |
3 | #include <string> |
4 | #include <vector> |
5 | #include <assert.h> |
6 | #include <initializer_list> |
7 | |
8 | #include "taichi/common/logging.h" |
9 | |
10 | #include "taichi/rhi/device_capability.h" |
11 | #include "taichi/rhi/arch.h" |
12 | |
13 | namespace taichi::lang { |
14 | |
15 | enum class RhiResult { |
16 | success = 0, |
17 | error = -1, |
18 | invalid_usage = -2, |
19 | not_supported = -3, |
20 | out_of_memory = -4, |
21 | }; |
22 | |
23 | constexpr size_t kBufferSizeEntireSize = std::numeric_limits<size_t>::max(); |
24 | |
25 | #define MAKE_ENUM_FLAGS(name) \ |
26 | inline name operator|(name a, name b) { \ |
27 | return static_cast<name>(int(a) | int(b)); \ |
28 | } \ |
29 | inline name operator&(name a, name b) { \ |
30 | return static_cast<name>(int(a) & int(b)); \ |
31 | } \ |
32 | inline bool operator&&(name a, name b) { return (int(a) & int(b)) != 0; } |
33 | |
34 | enum class BlendOp : uint32_t { add, subtract, reverse_subtract, min, max }; |
35 | |
36 | enum class BlendFactor : uint32_t { |
37 | zero, |
38 | one, |
39 | src_color, |
40 | one_minus_src_color, |
41 | dst_color, |
42 | one_minus_dst_color, |
43 | src_alpha, |
44 | one_minus_src_alpha, |
45 | dst_alpha, |
46 | one_minus_dst_alpha |
47 | }; |
48 | |
49 | class Device; |
50 | struct DeviceAllocation; |
51 | struct DevicePtr; |
52 | |
53 | // TODO: Figure out how to support images. Temporary solutions is to have all |
54 | // opque types such as images work as an allocation |
55 | using DeviceAllocationId = uint64_t; |
56 | |
57 | struct TI_DLL_EXPORT DeviceAllocation { |
58 | Device *device{nullptr}; |
59 | DeviceAllocationId alloc_id{0}; |
60 | // TODO: Shall we include size here? |
61 | |
62 | DevicePtr get_ptr(uint64_t offset = 0) const; |
63 | |
64 | bool operator==(const DeviceAllocation &other) const { |
65 | return other.device == device && other.alloc_id == alloc_id; |
66 | } |
67 | |
68 | bool operator!=(const DeviceAllocation &other) const { |
69 | return !(*this == other); |
70 | } |
71 | }; |
72 | |
73 | struct TI_DLL_EXPORT DeviceAllocationGuard : public DeviceAllocation { |
74 | explicit DeviceAllocationGuard(DeviceAllocation alloc) |
75 | : DeviceAllocation(alloc) { |
76 | } |
77 | DeviceAllocationGuard(const DeviceAllocationGuard &) = delete; |
78 | ~DeviceAllocationGuard(); |
79 | }; |
80 | |
81 | using DeviceAllocationUnique = std::unique_ptr<DeviceAllocationGuard>; |
82 | |
83 | struct TI_DLL_EXPORT DeviceImageGuard : public DeviceAllocation { |
84 | explicit DeviceImageGuard(DeviceAllocation alloc) : DeviceAllocation(alloc) { |
85 | } |
86 | DeviceImageGuard(const DeviceAllocationGuard &) = delete; |
87 | ~DeviceImageGuard(); |
88 | }; |
89 | |
90 | using DeviceImageUnique = std::unique_ptr<DeviceImageGuard>; |
91 | |
92 | struct TI_DLL_EXPORT DevicePtr : public DeviceAllocation { |
93 | uint64_t offset{0}; |
94 | |
95 | bool operator==(const DevicePtr &other) const { |
96 | return other.device == device && other.alloc_id == alloc_id && |
97 | other.offset == offset; |
98 | } |
99 | |
100 | bool operator!=(const DevicePtr &other) const { |
101 | return !(*this == other); |
102 | } |
103 | }; |
104 | |
105 | constexpr DeviceAllocation kDeviceNullAllocation{}; |
106 | constexpr DevicePtr kDeviceNullPtr{}; |
107 | |
108 | // TODO: fill this with the required options |
109 | struct ImageSamplerConfig {}; |
110 | |
111 | // A set of shader resources (that is bound at once) |
112 | class TI_DLL_EXPORT ShaderResourceSet { |
113 | public: |
114 | virtual ~ShaderResourceSet() = default; |
115 | |
116 | /** |
117 | * Bind a RW subregion of a buffer resource (StorgeBuffer / SSBO) |
118 | * @params[in] binding The binding index of the resource |
119 | * @params[in] ptr The Device Pointer that is going to be bound |
120 | * @params[in] size The size of the bound region of the buffer |
121 | */ |
122 | virtual ShaderResourceSet &rw_buffer(uint32_t binding, |
123 | DevicePtr ptr, |
124 | size_t size) = 0; |
125 | |
126 | /** |
127 | * Bind an entire RW buffer resource (StorgeBuffer / SSBO) |
128 | * @params[in] binding The binding index of the resource |
129 | * @params[in] alloc The Device Allocation that is going to be bound |
130 | */ |
131 | virtual ShaderResourceSet &rw_buffer(uint32_t binding, |
132 | DeviceAllocation alloc) = 0; |
133 | |
134 | /** |
135 | * Bind a read-only subregion of a buffer resource (Constants / UBO) |
136 | * @params[in] binding The binding index of the resource |
137 | * @params[in] ptr The Device Pointer that is going to be bound |
138 | * @params[in] size The size of the bound region of the buffer |
139 | */ |
140 | virtual ShaderResourceSet &buffer(uint32_t binding, |
141 | DevicePtr ptr, |
142 | size_t size) = 0; |
143 | |
144 | /** |
145 | * Bind an entire read-only buffer resource (Constants / UBO) |
146 | * @params[in] binding The binding index of the resource |
147 | * @params[in] alloc The Device Allocation that is going to be bound |
148 | */ |
149 | virtual ShaderResourceSet &buffer(uint32_t binding, |
150 | DeviceAllocation alloc) = 0; |
151 | |
152 | /** |
153 | * Bind a read-only image resource (SRV / Texture) |
154 | * @params[in] binding The binding index of the resource |
155 | * @params[in] alloc The Device Allocation that is going to be bound |
156 | * @params[in] sampler_config The texture sampling configuration |
157 | */ |
158 | virtual ShaderResourceSet &image(uint32_t binding, |
159 | DeviceAllocation alloc, |
160 | ImageSamplerConfig sampler_config) { |
161 | TI_NOT_IMPLEMENTED; |
162 | } |
163 | |
164 | /** |
165 | * Bind a RW image resource (UAV / Storage Image) |
166 | * @params binding The binding index of the resource |
167 | * @params alloc The Device Allocation that is going to be bound |
168 | */ |
169 | virtual ShaderResourceSet &rw_image(uint32_t binding, |
170 | DeviceAllocation alloc, |
171 | int lod) { |
172 | TI_NOT_IMPLEMENTED |
173 | } |
174 | }; |
175 | |
176 | // A set of states / resources for rasterization |
177 | class TI_DLL_EXPORT RasterResources { |
178 | public: |
179 | virtual ~RasterResources() = default; |
180 | |
181 | /** |
182 | * Set a vertex buffer for the rasterization |
183 | * @params ptr The Device Pointer to the vertices data |
184 | * @params binding The binding index of the vertex buffer |
185 | */ |
186 | virtual RasterResources &vertex_buffer(DevicePtr ptr, uint32_t binding = 0) { |
187 | TI_NOT_IMPLEMENTED |
188 | } |
189 | |
190 | /** |
191 | * Set an index buffer for the rasterization |
192 | * @params ptr The Device Pointer to the vertices data |
193 | * @params index_width The index data width (in bits). |
194 | * index_width = 32 -> uint32 index |
195 | * index_width = 16 -> uint16 index |
196 | */ |
197 | virtual RasterResources &index_buffer(DevicePtr ptr, size_t index_width) { |
198 | TI_NOT_IMPLEMENTED |
199 | } |
200 | }; |
201 | |
202 | enum class PipelineSourceType { |
203 | spirv_binary, |
204 | spirv_src, |
205 | glsl_src, |
206 | hlsl_src, |
207 | dxil_binary, |
208 | llvm_ir_src, |
209 | llvm_ir_binary, |
210 | metal_src, |
211 | metal_ir |
212 | }; |
213 | |
214 | enum class PipelineStageType { |
215 | compute, |
216 | fragment, |
217 | vertex, |
218 | tesselation_control, |
219 | tesselation_eval, |
220 | geometry, |
221 | raytracing |
222 | }; |
223 | |
224 | // FIXME: Drop the plural form? |
225 | enum class TopologyType : int { Triangles = 0, Lines = 1, Points = 2 }; |
226 | |
227 | enum class PolygonMode : int { |
228 | Fill = 0, |
229 | Line = 1, |
230 | Point = 2, |
231 | }; |
232 | |
233 | enum class BufferFormat : uint32_t { |
234 | #define PER_BUFFER_FORMAT(x) x, |
235 | #include "taichi/inc/rhi_constants.inc.h" |
236 | #undef PER_BUFFER_FORMAT |
237 | }; |
238 | |
239 | class TI_DLL_EXPORT Pipeline { |
240 | public: |
241 | virtual ~Pipeline() { |
242 | } |
243 | }; |
244 | |
245 | using UPipeline = std::unique_ptr<Pipeline>; |
246 | |
247 | enum class ImageDimension { |
248 | #define PER_IMAGE_DIMENSION(x) x, |
249 | #include "taichi/inc/rhi_constants.inc.h" |
250 | #undef PER_IMAGE_DIMENSION |
251 | }; |
252 | |
253 | enum class ImageLayout { |
254 | #define PER_IMAGE_LAYOUT(x) x, |
255 | #include "taichi/inc/rhi_constants.inc.h" |
256 | #undef PER_IMAGE_LAYOUT |
257 | }; |
258 | |
259 | struct BufferImageCopyParams { |
260 | uint32_t buffer_row_length{0}; |
261 | uint32_t buffer_image_height{0}; |
262 | uint32_t image_mip_level{0}; |
263 | struct { |
264 | uint32_t x{0}; |
265 | uint32_t y{0}; |
266 | uint32_t z{0}; |
267 | } image_offset; |
268 | struct { |
269 | uint32_t x{1}; |
270 | uint32_t y{1}; |
271 | uint32_t z{1}; |
272 | } image_extent; |
273 | uint32_t image_base_layer{0}; |
274 | uint32_t image_layer_count{1}; |
275 | uint32_t image_aspect_flag{1}; |
276 | }; |
277 | |
278 | struct ImageCopyParams { |
279 | uint32_t width{1}; |
280 | uint32_t height{1}; |
281 | uint32_t depth{1}; |
282 | }; |
283 | |
284 | class TI_DLL_EXPORT CommandList { |
285 | public: |
286 | virtual ~CommandList() { |
287 | } |
288 | |
289 | /** |
290 | * Bind a pipeline to the command list. |
291 | * Doing so resets all bound resources. |
292 | * @params[in] pipeline The pipeline to be bound |
293 | */ |
294 | virtual void bind_pipeline(Pipeline *p) noexcept = 0; |
295 | |
296 | /** |
297 | * Bind a ShaderResourceSet to a set index. |
298 | * - If the set index is already bound, the previous binding will be |
299 | * overwritten. |
300 | * - A set index can only be bound with a single ShaderResourceSet. |
301 | * - If the input set is empty, this command is a no-op. |
302 | * @params[in] res The ShaderResourceSet to be bound. |
303 | * @params[in] set_index The index the resources will be bound to. |
304 | * @return The binding result code |
305 | * `success` If the binding succeded |
306 | * `invalid_usage` If `res` is incompatible with current pipeline |
307 | * `not_supported` If some bindings are not supported by the backend |
308 | * `out_of_memory` If binding failed due to OOM conditions |
309 | * `error` If binding failed due to other reasons |
310 | */ |
311 | virtual RhiResult bind_shader_resources(ShaderResourceSet *res, |
312 | int set_index = 0) noexcept = 0; |
313 | |
314 | /** |
315 | * Bind RasterResources to the command list. |
316 | * - If the input resource is empty, this command is a no-op. |
317 | * @params res The RasterResources to be bound. |
318 | * @return The binding result code |
319 | * `success` If the binding succeded |
320 | * `invalid_usage` If `res` is incompatible with current pipeline |
321 | * `not_supported` If some bindings are not supported by the backend |
322 | * `error` If binding failed due to other reasons |
323 | */ |
324 | virtual RhiResult bind_raster_resources(RasterResources *res) noexcept = 0; |
325 | |
326 | /** |
327 | * Insert a memory barrier into the command list. |
328 | * The barrier affects a continous region of memory. |
329 | * Changes to memory before the barrier will be visible to accesses after the |
330 | * barrier (API command ordering). i.e. Command later to this barrier will see |
331 | * the changes made by commands before this barrier. |
332 | * This barrier is limited in scope to the Stream that the command list is |
333 | * submitted to. Other Streams or Devices may not observe this barrier. |
334 | * @params[in] ptr The pointer to the start of the region |
335 | * @params[in] size The size of the memory region. |
336 | * Size is clamped to the underlying buffer size. |
337 | */ |
338 | virtual void buffer_barrier(DevicePtr ptr, size_t size) noexcept = 0; |
339 | |
340 | /** |
341 | * Insert a memory barrier into the command list. |
342 | * The barrier affects an entire buffer. |
343 | * Behaviour is the same as `buffer_barrier(DevicePtr, size_t)` |
344 | * @params[in] alloc The memory allocation of this barrier |
345 | */ |
346 | virtual void buffer_barrier(DeviceAllocation alloc) noexcept = 0; |
347 | |
348 | /** |
349 | * Insert a memory barrier into the command list. |
350 | * The barrier affects all global memory. |
351 | * Behaviour is the same as `buffer_barrier(DevicePtr, size_t)` |
352 | * @params[in] alloc The memory allocation of this barrier |
353 | */ |
354 | virtual void memory_barrier() noexcept = 0; |
355 | |
356 | /** |
357 | * Insert a buffer copy operation into the command list. |
358 | * @params[in] src The source Device Pointer |
359 | * @params[in] dst The destination Device Pointer |
360 | * @params[in] size The size of the region to be copied. |
361 | * The size will be clamped to the minimum between |
362 | * `dst.size - dst.offset` and `src.size - src.offset` |
363 | */ |
364 | virtual void buffer_copy(DevicePtr dst, |
365 | DevicePtr src, |
366 | size_t size) noexcept = 0; |
367 | |
368 | /** |
369 | * Insert a memory region fill operation into the command list |
370 | * The memory region will be filled with the given (bit precise) value. |
371 | * - (Encouraged behavior) If the `data` is 0, the underlying API might |
372 | * provide a faster code path. |
373 | * - (Encouraged behavior) If the `size` is -1 (max of size_t) the underlying |
374 | * API might provide a faster code path. |
375 | * @params[in] ptr The start of the memory region. |
376 | * - ptr.offset will be aligned down to a multiple of 4 bytes. |
377 | * @params[in] size The size of the region. |
378 | * - The size will be clamped to the underlying buffer's size. |
379 | */ |
380 | virtual void buffer_fill(DevicePtr ptr, |
381 | size_t size, |
382 | uint32_t data) noexcept = 0; |
383 | |
384 | /** |
385 | * Enqueues a compute operation with {X, Y, Z} amount of workgroups. |
386 | * The block size / workgroup size is pre-determined within the pipeline. |
387 | * - This is only valid if the pipeline has a predetermined block size |
388 | * - This API has a device-dependent variable max values for X, Y, Z |
389 | * - The currently bound pipeline will be dispatched |
390 | * - The enqueued operation starts in CommandList API ordering. |
391 | * - The enqueued operation may end out-of-order, but it respects barriers |
392 | * @params[in] x The number of workgroups in X dimension |
393 | * @params[in] y The number of workgroups in Y dimension |
394 | * @params[in] z The number of workgroups in Y dimension |
395 | * @return The status of this operation |
396 | * - `success` if the operation is successful |
397 | * - `invalid_operation` if the current pipeline has variable block size |
398 | * - `not_supported` if the requested X, Y, or Z is not supported |
399 | */ |
400 | virtual RhiResult dispatch(uint32_t x, |
401 | uint32_t y = 1, |
402 | uint32_t z = 1) noexcept = 0; |
403 | |
404 | struct ComputeSize { |
405 | uint32_t x{0}; |
406 | uint32_t y{0}; |
407 | uint32_t z{0}; |
408 | }; |
409 | |
410 | /** |
411 | * Enqueues a compute operation with `grid_size` amount of threads. |
412 | * The workgroup size is dynamic and specified through `block_size` |
413 | * - This is only valid if the pipeline has a predetermined block size |
414 | * - This API has a device-dependent variable max values for `grid_size` |
415 | * - This API has a device-dependent supported values for `block_size` |
416 | * - The currently bound pipeline will be dispatched |
417 | * - The enqueued operation starts in CommandList API ordering. |
418 | * - The enqueued operation may end out-of-order, but it respects barriers |
419 | * @params[in] grid_size The number of threads dispatch |
420 | * @params[in] block_size The shape of each block / workgroup / threadsgroup |
421 | * @return The status of this operation |
422 | * - `success` if the operation is successful |
423 | * - `invalid_operation` if the current pipeline has variable block size |
424 | * - `not_supported` if the requested sizes are not supported |
425 | * - `error` if the operation failed due to other reasons |
426 | */ |
427 | virtual RhiResult dispatch(ComputeSize grid_size, |
428 | ComputeSize block_size) noexcept { |
429 | return RhiResult::not_supported; |
430 | } |
431 | |
432 | // Profiler support |
433 | virtual void begin_profiler_scope(const std::string &kernel_name) { |
434 | } |
435 | |
436 | virtual void end_profiler_scope() { |
437 | } |
438 | |
439 | // These are not implemented in compute only device |
440 | virtual void begin_renderpass(int x0, |
441 | int y0, |
442 | int x1, |
443 | int y1, |
444 | uint32_t num_color_attachments, |
445 | DeviceAllocation *color_attachments, |
446 | bool *color_clear, |
447 | std::vector<float> *clear_colors, |
448 | DeviceAllocation *depth_attachment, |
449 | bool depth_clear) { |
450 | TI_NOT_IMPLEMENTED |
451 | } |
452 | virtual void end_renderpass() { |
453 | TI_NOT_IMPLEMENTED |
454 | } |
455 | virtual void draw(uint32_t num_verticies, uint32_t start_vertex = 0) { |
456 | TI_NOT_IMPLEMENTED |
457 | } |
458 | virtual void draw_instance(uint32_t num_verticies, |
459 | uint32_t num_instances, |
460 | uint32_t start_vertex = 0, |
461 | uint32_t start_instance = 0) { |
462 | TI_NOT_IMPLEMENTED |
463 | } |
464 | virtual void set_line_width(float width) { |
465 | TI_NOT_IMPLEMENTED |
466 | } |
467 | virtual void draw_indexed(uint32_t num_indicies, |
468 | uint32_t start_vertex = 0, |
469 | uint32_t start_index = 0) { |
470 | TI_NOT_IMPLEMENTED |
471 | } |
472 | virtual void draw_indexed_instance(uint32_t num_indicies, |
473 | uint32_t num_instances, |
474 | uint32_t start_vertex = 0, |
475 | uint32_t start_index = 0, |
476 | uint32_t start_instance = 0) { |
477 | TI_NOT_IMPLEMENTED |
478 | } |
479 | virtual void image_transition(DeviceAllocation img, |
480 | ImageLayout old_layout, |
481 | ImageLayout new_layout) { |
482 | TI_NOT_IMPLEMENTED |
483 | } |
484 | virtual void buffer_to_image(DeviceAllocation dst_img, |
485 | DevicePtr src_buf, |
486 | ImageLayout img_layout, |
487 | const BufferImageCopyParams ¶ms) { |
488 | TI_NOT_IMPLEMENTED |
489 | } |
490 | virtual void image_to_buffer(DevicePtr dst_buf, |
491 | DeviceAllocation src_img, |
492 | ImageLayout img_layout, |
493 | const BufferImageCopyParams ¶ms) { |
494 | TI_NOT_IMPLEMENTED |
495 | } |
496 | virtual void copy_image(DeviceAllocation dst_img, |
497 | DeviceAllocation src_img, |
498 | ImageLayout dst_img_layout, |
499 | ImageLayout src_img_layout, |
500 | const ImageCopyParams ¶ms) { |
501 | TI_NOT_IMPLEMENTED |
502 | } |
503 | virtual void blit_image(DeviceAllocation dst_img, |
504 | DeviceAllocation src_img, |
505 | ImageLayout dst_img_layout, |
506 | ImageLayout src_img_layout, |
507 | const ImageCopyParams ¶ms) { |
508 | TI_NOT_IMPLEMENTED |
509 | } |
510 | }; |
511 | |
512 | struct PipelineSourceDesc { |
513 | PipelineSourceType type; |
514 | const void *data{nullptr}; |
515 | size_t size{0}; |
516 | PipelineStageType stage{PipelineStageType::compute}; |
517 | }; |
518 | |
519 | // FIXME: this probably isn't backend-neutral enough |
520 | enum class AllocUsage : int { |
521 | None = 0, |
522 | Storage = 1, |
523 | Uniform = 2, |
524 | Vertex = 4, |
525 | Index = 8, |
526 | Upload = 16, |
527 | }; |
528 | |
529 | MAKE_ENUM_FLAGS(AllocUsage) |
530 | |
531 | class TI_DLL_EXPORT StreamSemaphoreObject { |
532 | public: |
533 | virtual ~StreamSemaphoreObject() { |
534 | } |
535 | }; |
536 | |
537 | using StreamSemaphore = std::shared_ptr<StreamSemaphoreObject>; |
538 | |
539 | class TI_DLL_EXPORT Stream { |
540 | public: |
541 | virtual ~Stream() { |
542 | } |
543 | |
544 | /** |
545 | * Allocates a new CommandList object from the stream. |
546 | * @params[out] out_cmdlist The allocated command list. |
547 | * @return The status of this operation. |
548 | * - `success` If allocation succeeded. |
549 | * - `out_of_memory` If allocation failed due to lack of device or host |
550 | * memory. |
551 | */ |
552 | virtual RhiResult new_command_list(CommandList **out_cmdlist) noexcept = 0; |
553 | |
554 | inline std::pair<std::unique_ptr<CommandList>, RhiResult> |
555 | new_command_list_unique() { |
556 | CommandList *cmdlist{nullptr}; |
557 | RhiResult res = this->new_command_list(&cmdlist); |
558 | return std::make_pair(std::unique_ptr<CommandList>(cmdlist), res); |
559 | } |
560 | |
561 | virtual StreamSemaphore submit( |
562 | CommandList *cmdlist, |
563 | const std::vector<StreamSemaphore> &wait_semaphores = {}) = 0; |
564 | virtual StreamSemaphore submit_synced( |
565 | CommandList *cmdlist, |
566 | const std::vector<StreamSemaphore> &wait_semaphores = {}) = 0; |
567 | |
568 | virtual void command_sync() = 0; |
569 | }; |
570 | |
571 | class TI_DLL_EXPORT PipelineCache { |
572 | public: |
573 | virtual ~PipelineCache() = default; |
574 | |
575 | /** |
576 | * Get the pointer to the raw data of the cache. |
577 | * - Can return `nullptr` if cache is invalid or empty. |
578 | */ |
579 | virtual void *data() noexcept { |
580 | return nullptr; |
581 | } |
582 | |
583 | /** |
584 | * Get the size of the cache (in bytes). |
585 | */ |
586 | virtual size_t size() const noexcept { |
587 | return 0; |
588 | } |
589 | }; |
590 | |
591 | using UPipelineCache = std::unique_ptr<PipelineCache>; |
592 | |
593 | class TI_DLL_EXPORT Device { |
594 | DeviceCapabilityConfig caps_{}; |
595 | |
596 | public: |
597 | virtual ~Device(){}; |
598 | |
599 | struct AllocParams { |
600 | uint64_t size{0}; |
601 | bool host_write{false}; |
602 | bool host_read{false}; |
603 | bool export_sharing{false}; |
604 | AllocUsage usage{AllocUsage::Storage}; |
605 | }; |
606 | |
607 | virtual DeviceAllocation allocate_memory(const AllocParams ¶ms) = 0; |
608 | |
609 | virtual void dealloc_memory(DeviceAllocation handle) = 0; |
610 | |
611 | virtual uint64_t get_memory_physical_pointer(DeviceAllocation handle) { |
612 | // FIXME: (penguinliong) This method reports the actual device memory |
613 | // address, it's used for bindless (like argument buffer on Metal). If the |
614 | // backend doesn't have access to physical memory address, it should return |
615 | // null and it depends on the backend implementation to use the address in |
616 | // argument binders. |
617 | return 0; |
618 | } |
619 | |
620 | /** |
621 | * Create a Pipeline Cache, which acclerates backend API's pipeline creation. |
622 | * @params[out] out_cache The created pipeline cache. |
623 | * - If operation failed this will be set to `nullptr` |
624 | * @params[in] initial_size Size of the initial data, can be 0. |
625 | * @params[in] initial_data The initial data, can be nullptr. |
626 | * - This data can be used to load back the cache from previous invocations. |
627 | * - The backend API may ignore this data or deem it incompatible. |
628 | * @return The status of this operation. |
629 | * - `success` if the pipeline cache is created successfully. |
630 | * - `out_of_memory` if operation failed due to lack of device or host memory. |
631 | * - `error` if operation failed due to other errors. |
632 | */ |
633 | virtual RhiResult create_pipeline_cache( |
634 | PipelineCache **out_cache, |
635 | size_t initial_size = 0, |
636 | const void *initial_data = nullptr) noexcept { |
637 | *out_cache = nullptr; |
638 | return RhiResult::not_supported; |
639 | } |
640 | |
641 | inline std::pair<UPipelineCache, RhiResult> create_pipeline_cache_unique( |
642 | size_t initial_size = 0, |
643 | const void *initial_data = nullptr) noexcept { |
644 | PipelineCache *cache{nullptr}; |
645 | RhiResult res = |
646 | this->create_pipeline_cache(&cache, initial_size, initial_data); |
647 | return std::make_pair(UPipelineCache(cache), res); |
648 | } |
649 | |
650 | /** |
651 | * Create a Pipeline. A Pipeline is a program that can be dispatched into a |
652 | * stream through a command list. |
653 | * @params[out] out_pipeline The created pipeline. |
654 | * @params[in] src The source description of the pipeline. |
655 | * @params[in] name The name of such pipeline, for debug purposes. |
656 | * @params[in] cache The pipeline cache to use, can be nullptr. |
657 | * @return The status of this operation. |
658 | * - `success` if the pipeline is created successfully. |
659 | * - `out_of_memory` if operation failed due to lack of device or host memory. |
660 | * - `invalid_usage` if the specified source is incompatible or invalid. |
661 | * - `not_supported` if the pipeline uses features the device can't support. |
662 | * - `error` if the operation failed due to other reasons. |
663 | */ |
664 | virtual RhiResult create_pipeline( |
665 | Pipeline **out_pipeline, |
666 | const PipelineSourceDesc &src, |
667 | std::string name = "Pipeline" , |
668 | PipelineCache *cache = nullptr) noexcept = 0; |
669 | |
670 | inline std::pair<UPipeline, RhiResult> create_pipeline_unique( |
671 | const PipelineSourceDesc &src, |
672 | std::string name = "Pipeline" , |
673 | PipelineCache *cache = nullptr) noexcept { |
674 | Pipeline *pipeline{nullptr}; |
675 | RhiResult res = this->create_pipeline(&pipeline, src, name, cache); |
676 | return std::make_pair(UPipeline(pipeline), res); |
677 | } |
678 | |
679 | std::unique_ptr<DeviceAllocationGuard> allocate_memory_unique( |
680 | const AllocParams ¶ms) { |
681 | return std::make_unique<DeviceAllocationGuard>( |
682 | this->allocate_memory(params)); |
683 | } |
684 | |
685 | /** |
686 | * Upload data to device allocations immediately. |
687 | * - This is a synchronous operation, function returns when upload is complete |
688 | * - The host data pointers must be valid and large enough for the size of the |
689 | * copy, otherwise this function might segfault |
690 | * - `device_ptr`, `data`, and `sizes` must contain `count` number of valid |
691 | * values |
692 | * @params[in] device_ptr The array to destination device pointers. |
693 | * @params[in] data The array to source host pointers. |
694 | * @params[in] sizes The array to sizes of data/copy. |
695 | * @params[in] count The number of uploads to perform. |
696 | * @return The status of this operation |
697 | * - `success` if the upload is successful. |
698 | * - `out_of_memory` if operation failed due to lack of device or host memory. |
699 | * - `invalid_usage` if the specified source is incompatible or invalid. |
700 | * - `error` if the operation failed due to other reasons. |
701 | */ |
702 | virtual RhiResult upload_data(DevicePtr *device_ptr, |
703 | const void **data, |
704 | size_t *size, |
705 | int num_alloc = 1) noexcept; |
706 | |
707 | /** |
708 | * Read data from device allocations back to host immediately. |
709 | * - This is a synchronous operation, function returns when readback is |
710 | * complete |
711 | * - The host data pointers must be valid and large enough for the size of the |
712 | * copy, otherwise this function might segfault |
713 | * - `device_ptr`, `data`, and `sizes` must contain `count` number of valid |
714 | * values |
715 | * @params[in] device_ptr The array to source device pointers. |
716 | * @params[in] data The array to destination host pointers. |
717 | * @params[in] sizes The array to sizes of data/copy. |
718 | * @params[in] count The number of readbacks to perform. |
719 | * @params[in] wait_sema The semaphores to wait for before the copy is |
720 | * initiated. |
721 | * @return The status of this operation |
722 | * - `success` if the upload is successful. |
723 | * - `out_of_memory` if operation failed due to lack of device or host memory. |
724 | * - `invalid_usage` if the specified source is incompatible or invalid. |
725 | * - `error` if the operation failed due to other reasons. |
726 | */ |
727 | virtual RhiResult readback_data( |
728 | DevicePtr *device_ptr, |
729 | void **data, |
730 | size_t *size, |
731 | int num_alloc = 1, |
732 | const std::vector<StreamSemaphore> &wait_sema = {}) noexcept; |
733 | |
734 | virtual uint64_t fetch_result_uint64(int i, uint64_t *result_buffer) { |
735 | TI_NOT_IMPLEMENTED |
736 | } |
737 | |
738 | // Each thraed will acquire its own stream |
739 | virtual Stream *get_compute_stream() = 0; |
740 | |
741 | // Wait for all tasks to complete (task from all streams) |
742 | virtual void wait_idle() = 0; |
743 | |
744 | /** |
745 | * Create a new shader resource set |
746 | * @return The new shader resource set pointer |
747 | */ |
748 | virtual ShaderResourceSet *create_resource_set() = 0; |
749 | |
750 | /** |
751 | * Create a new shader resource set (wrapped in unique ptr) |
752 | * @return The new shader resource set unique pointer |
753 | */ |
754 | inline std::unique_ptr<ShaderResourceSet> create_resource_set_unique() { |
755 | return std::unique_ptr<ShaderResourceSet>(this->create_resource_set()); |
756 | } |
757 | |
758 | /** |
759 | * Map a range within a DeviceAllocation memory into host address space. |
760 | * |
761 | * @param[in] ptr The Device Pointer to map. |
762 | * @param[in] size The size of the mapped region. |
763 | * @param[out] mapped_ptr Outputs the pointer to the mapped region. |
764 | * @return The result status. |
765 | * `success` when the mapping is successful. |
766 | * `invalid_usage` when the memory is not host visible. |
767 | * `invalid_usage` when trying to map the memory multiple times. |
768 | * `invalid_usage` when `ptr.offset + size` is out-of-bounds. |
769 | * `error` when the mapping failed for other reasons. |
770 | */ |
771 | virtual RhiResult map_range(DevicePtr ptr, |
772 | uint64_t size, |
773 | void **mapped_ptr) = 0; |
774 | |
775 | /** |
776 | * Map an entire DeviceAllocation into host address space. |
777 | * @param[in] ptr The Device Pointer to map. |
778 | * @param[in] size The size of the mapped region. |
779 | * @param[out] mapped_ptr Outputs the pointer to the mapped region. |
780 | * @return The result status. |
781 | * `success` when the mapping is successful. |
782 | * `invalid_usage` when the memory is not host visible. |
783 | * `invalid_usage` when trying to map the memory multiple times. |
784 | * `invalid_usage` when `ptr.offset + size` is out-of-bounds. |
785 | * `error` when the mapping failed for other reasons. |
786 | */ |
787 | virtual RhiResult map(DeviceAllocation alloc, void **mapped_ptr) = 0; |
788 | |
789 | /** |
790 | * Unmap a previously mapped DevicePtr or DeviceAllocation. |
791 | * @param[in] ptr The DevicePtr to unmap. |
792 | */ |
793 | virtual void unmap(DevicePtr ptr) = 0; |
794 | |
795 | /** |
796 | * Unmap a previously mapped DevicePtr or DeviceAllocation. |
797 | * @param[in] alloc The DeviceAllocation to unmap |
798 | */ |
799 | virtual void unmap(DeviceAllocation alloc) = 0; |
800 | |
801 | // Directly share memory in the form of alias |
802 | static DeviceAllocation share_to(DeviceAllocation *alloc, Device *target); |
803 | |
804 | // Strictly intra device copy (synced) |
805 | virtual void memcpy_internal(DevicePtr dst, DevicePtr src, uint64_t size) = 0; |
806 | |
807 | // Copy memory inter or intra devices (synced) |
808 | enum class MemcpyCapability { Direct, RequiresStagingBuffer, RequiresHost }; |
809 | |
810 | static MemcpyCapability check_memcpy_capability(DevicePtr dst, |
811 | DevicePtr src, |
812 | uint64_t size); |
813 | |
814 | static void memcpy_direct(DevicePtr dst, DevicePtr src, uint64_t size); |
815 | |
816 | static void memcpy_via_staging(DevicePtr dst, |
817 | DevicePtr staging, |
818 | DevicePtr src, |
819 | uint64_t size); |
820 | |
821 | static void memcpy_via_host(DevicePtr dst, |
822 | void *host_buffer, |
823 | DevicePtr src, |
824 | uint64_t size); |
825 | |
826 | // Get all supported capabilities of the current created device. |
827 | virtual Arch arch() const = 0; |
828 | inline const DeviceCapabilityConfig &get_caps() const { |
829 | return caps_; |
830 | } |
831 | inline void set_caps(DeviceCapabilityConfig &&caps) { |
832 | caps_ = std::move(caps); |
833 | } |
834 | |
835 | // Profiler support |
836 | virtual void profiler_sync() { |
837 | } |
838 | |
839 | virtual size_t profiler_get_sampler_count() { |
840 | return 0; |
841 | } |
842 | |
843 | virtual std::vector<std::pair<std::string, double>> |
844 | profiler_flush_sampled_time() { |
845 | return std::vector<std::pair<std::string, double>>(); |
846 | } |
847 | }; |
848 | |
849 | class TI_DLL_EXPORT Surface { |
850 | public: |
851 | virtual ~Surface() { |
852 | } |
853 | |
854 | virtual StreamSemaphore acquire_next_image() = 0; |
855 | virtual DeviceAllocation get_target_image() = 0; |
856 | virtual void present_image( |
857 | const std::vector<StreamSemaphore> &wait_semaphores = {}) = 0; |
858 | virtual std::pair<uint32_t, uint32_t> get_size() = 0; |
859 | virtual int get_image_count() = 0; |
860 | virtual BufferFormat image_format() = 0; |
861 | virtual void resize(uint32_t width, uint32_t height) = 0; |
862 | virtual DeviceAllocation get_depth_data(DeviceAllocation &depth_alloc) = 0; |
863 | virtual DeviceAllocation get_image_data() { |
864 | TI_NOT_IMPLEMENTED |
865 | } |
866 | }; |
867 | |
868 | struct VertexInputBinding { |
869 | uint32_t binding{0}; |
870 | size_t stride{0}; |
871 | bool instance{false}; |
872 | }; |
873 | |
874 | struct VertexInputAttribute { |
875 | uint32_t location{0}; |
876 | uint32_t binding{0}; |
877 | BufferFormat format; |
878 | uint32_t offset{0}; |
879 | }; |
880 | |
881 | struct SurfaceConfig { |
882 | // VSync: |
883 | // - true: will attempt to wait for V-Blank |
884 | // - when adaptive is true: when supported, if a V-Blank is missed, instead of |
885 | // waiting, a tearing may appear, reduces overall latency |
886 | bool vsync{false}; |
887 | bool adaptive{true}; |
888 | void *window_handle{nullptr}; |
889 | uint32_t width{1}; |
890 | uint32_t height{1}; |
891 | void *native_surface_handle{nullptr}; |
892 | }; |
893 | |
894 | enum class ImageAllocUsage : int { |
895 | None = 0, |
896 | Storage = 1, |
897 | Sampled = 2, |
898 | Attachment = 4, |
899 | }; |
900 | inline ImageAllocUsage operator|(ImageAllocUsage a, ImageAllocUsage b) { |
901 | return static_cast<ImageAllocUsage>(static_cast<int>(a) | |
902 | static_cast<int>(b)); |
903 | } |
904 | inline bool operator&(ImageAllocUsage a, ImageAllocUsage b) { |
905 | return static_cast<int>(a) & static_cast<int>(b); |
906 | } |
907 | |
908 | struct ImageParams { |
909 | ImageDimension dimension; |
910 | BufferFormat format; |
911 | ImageLayout initial_layout{ImageLayout::undefined}; |
912 | uint32_t x{1}; |
913 | uint32_t y{1}; |
914 | uint32_t z{1}; |
915 | bool export_sharing{false}; |
916 | ImageAllocUsage usage{ImageAllocUsage::Storage | ImageAllocUsage::Sampled | |
917 | ImageAllocUsage::Attachment}; |
918 | }; |
919 | |
920 | struct BlendFunc { |
921 | BlendOp op{BlendOp::add}; |
922 | BlendFactor src_factor{BlendFactor::src_alpha}; |
923 | BlendFactor dst_factor{BlendFactor::one_minus_src_alpha}; |
924 | }; |
925 | |
926 | struct BlendingParams { |
927 | bool enable{true}; |
928 | BlendFunc color; |
929 | BlendFunc alpha; |
930 | }; |
931 | |
932 | struct RasterParams { |
933 | TopologyType prim_topology{TopologyType::Triangles}; |
934 | PolygonMode polygon_mode{PolygonMode::Fill}; |
935 | bool front_face_cull{false}; |
936 | bool back_face_cull{false}; |
937 | bool depth_test{false}; |
938 | bool depth_write{false}; |
939 | std::vector<BlendingParams> blending{}; |
940 | }; |
941 | |
942 | class TI_DLL_EXPORT GraphicsDevice : public Device { |
943 | public: |
944 | virtual std::unique_ptr<Pipeline> create_raster_pipeline( |
945 | const std::vector<PipelineSourceDesc> &src, |
946 | const RasterParams &raster_params, |
947 | const std::vector<VertexInputBinding> &vertex_inputs, |
948 | const std::vector<VertexInputAttribute> &vertex_attrs, |
949 | std::string name = "Pipeline" ) = 0; |
950 | |
951 | virtual Stream *get_graphics_stream() = 0; |
952 | |
953 | /** |
954 | * Create a new raster resources set |
955 | * @return The new RasterResources pointer |
956 | */ |
957 | virtual RasterResources *create_raster_resources() = 0; |
958 | |
959 | /** |
960 | * Create a new raster resources set (wrapped in unique ptr) |
961 | * @return The new RasterResources unique pointer |
962 | */ |
963 | inline std::unique_ptr<RasterResources> create_raster_resources_unique() { |
964 | return std::unique_ptr<RasterResources>(this->create_raster_resources()); |
965 | } |
966 | |
967 | virtual std::unique_ptr<Surface> create_surface( |
968 | const SurfaceConfig &config) = 0; |
969 | // You are not expected to call this directly. If you want to use this image |
970 | // in a taichi kernel, you usually want to create the image via |
971 | // `GfxRuntime::create_image`. `GfxRuntime` is available in `ProgramImpl` |
972 | // of GPU backends. |
973 | virtual DeviceAllocation create_image(const ImageParams ¶ms) = 0; |
974 | inline DeviceImageUnique create_image_unique(const ImageParams ¶ms) { |
975 | return std::make_unique<DeviceImageGuard>(this->create_image(params)); |
976 | } |
977 | virtual void destroy_image(DeviceAllocation handle) = 0; |
978 | |
979 | virtual void image_transition(DeviceAllocation img, |
980 | ImageLayout old_layout, |
981 | ImageLayout new_layout); |
982 | virtual void buffer_to_image(DeviceAllocation dst_img, |
983 | DevicePtr src_buf, |
984 | ImageLayout img_layout, |
985 | const BufferImageCopyParams ¶ms); |
986 | virtual void image_to_buffer(DevicePtr dst_buf, |
987 | DeviceAllocation src_img, |
988 | ImageLayout img_layout, |
989 | const BufferImageCopyParams ¶ms); |
990 | }; |
991 | |
992 | } // namespace taichi::lang |
993 | |