1#include <iostream>
2#include <stdexcept>
3#include <string>
4#include <unordered_set>
5#include <unordered_map>
6#include <vector>
7#include <array>
8#include <set>
9
10#include "taichi/rhi/vulkan/vulkan_common.h"
11#include "taichi/rhi/vulkan/vulkan_utils.h"
12#include "taichi/rhi/vulkan/vulkan_loader.h"
13#include "taichi/rhi/vulkan/vulkan_device.h"
14
15#include "spirv_reflect.h"
16
17namespace taichi::lang {
18namespace vulkan {
19
20using namespace rhi_impl;
21
22const BidirMap<BufferFormat, VkFormat> buffer_format_map = {
23 {BufferFormat::r8, VK_FORMAT_R8_UNORM},
24 {BufferFormat::rg8, VK_FORMAT_R8G8_UNORM},
25 {BufferFormat::rgba8, VK_FORMAT_R8G8B8A8_UNORM},
26 {BufferFormat::rgba8srgb, VK_FORMAT_R8G8B8A8_SRGB},
27 {BufferFormat::bgra8, VK_FORMAT_B8G8R8A8_UNORM},
28 {BufferFormat::bgra8srgb, VK_FORMAT_B8G8R8A8_SRGB},
29 {BufferFormat::r8u, VK_FORMAT_R8_UINT},
30 {BufferFormat::rg8u, VK_FORMAT_R8G8_UINT},
31 {BufferFormat::rgba8u, VK_FORMAT_R8G8B8A8_UINT},
32 {BufferFormat::r8i, VK_FORMAT_R8_SINT},
33 {BufferFormat::rg8i, VK_FORMAT_R8G8_SINT},
34 {BufferFormat::rgba8i, VK_FORMAT_R8G8B8A8_SINT},
35 {BufferFormat::r16, VK_FORMAT_R16_UNORM},
36 {BufferFormat::rg16, VK_FORMAT_R16G16_UNORM},
37 {BufferFormat::rgb16, VK_FORMAT_R16G16B16_UNORM},
38 {BufferFormat::rgba16, VK_FORMAT_R16G16B16A16_UNORM},
39 {BufferFormat::r16u, VK_FORMAT_R16_UNORM},
40 {BufferFormat::rg16u, VK_FORMAT_R16G16_UNORM},
41 {BufferFormat::rgb16u, VK_FORMAT_R16G16B16_UNORM},
42 {BufferFormat::rgba16u, VK_FORMAT_R16G16B16A16_UNORM},
43 {BufferFormat::r16i, VK_FORMAT_R16_SINT},
44 {BufferFormat::rg16i, VK_FORMAT_R16G16_SINT},
45 {BufferFormat::rgb16i, VK_FORMAT_R16G16B16_SINT},
46 {BufferFormat::rgba16i, VK_FORMAT_R16G16B16A16_SINT},
47 {BufferFormat::r16f, VK_FORMAT_R16_SFLOAT},
48 {BufferFormat::rg16f, VK_FORMAT_R16G16_SFLOAT},
49 {BufferFormat::rgb16f, VK_FORMAT_R16G16B16_SFLOAT},
50 {BufferFormat::rgba16f, VK_FORMAT_R16G16B16A16_SFLOAT},
51 {BufferFormat::r32u, VK_FORMAT_R32_UINT},
52 {BufferFormat::rg32u, VK_FORMAT_R32G32_UINT},
53 {BufferFormat::rgb32u, VK_FORMAT_R32G32B32_UINT},
54 {BufferFormat::rgba32u, VK_FORMAT_R32G32B32A32_UINT},
55 {BufferFormat::r32i, VK_FORMAT_R32_SINT},
56 {BufferFormat::rg32i, VK_FORMAT_R32G32_SINT},
57 {BufferFormat::rgb32i, VK_FORMAT_R32G32B32_SINT},
58 {BufferFormat::rgba32i, VK_FORMAT_R32G32B32A32_SINT},
59 {BufferFormat::r32f, VK_FORMAT_R32_SFLOAT},
60 {BufferFormat::rg32f, VK_FORMAT_R32G32_SFLOAT},
61 {BufferFormat::rgb32f, VK_FORMAT_R32G32B32_SFLOAT},
62 {BufferFormat::rgba32f, VK_FORMAT_R32G32B32A32_SFLOAT},
63 {BufferFormat::depth16, VK_FORMAT_D16_UNORM},
64 {BufferFormat::depth24stencil8, VK_FORMAT_D24_UNORM_S8_UINT},
65 {BufferFormat::depth32f, VK_FORMAT_D32_SFLOAT}};
66
67RhiReturn<VkFormat> buffer_format_ti_to_vk(BufferFormat f) {
68 if (!buffer_format_map.exists(f)) {
69 RHI_LOG_ERROR("BufferFormat cannot be mapped to vk");
70 return {RhiResult::not_supported, VK_FORMAT_UNDEFINED};
71 }
72 return {RhiResult::success, buffer_format_map.at(f)};
73}
74
75RhiReturn<BufferFormat> buffer_format_vk_to_ti(VkFormat f) {
76 if (!buffer_format_map.exists(f)) {
77 RHI_LOG_ERROR("VkFormat cannot be mapped to ti");
78 return {RhiResult::not_supported, BufferFormat::unknown};
79 }
80 return {RhiResult::success, buffer_format_map.backend2rhi.at(f)};
81}
82
83const BidirMap<ImageLayout, VkImageLayout> image_layout_map = {
84 {ImageLayout::undefined, VK_IMAGE_LAYOUT_UNDEFINED},
85 {ImageLayout::shader_read, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL},
86 {ImageLayout::shader_write, VK_IMAGE_LAYOUT_GENERAL},
87 {ImageLayout::shader_read_write, VK_IMAGE_LAYOUT_GENERAL},
88 {ImageLayout::color_attachment, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL},
89 {ImageLayout::color_attachment_read,
90 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL},
91 {ImageLayout::depth_attachment, VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL},
92 {ImageLayout::depth_attachment_read,
93 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL},
94 {ImageLayout::transfer_dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL},
95 {ImageLayout::transfer_src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL},
96 {ImageLayout::present_src, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR}};
97
98VkImageLayout image_layout_ti_to_vk(ImageLayout layout) {
99 if (!image_layout_map.exists(layout)) {
100 RHI_LOG_ERROR("ImageLayout cannot be mapped to vk");
101 return VK_IMAGE_LAYOUT_UNDEFINED;
102 }
103 return image_layout_map.at(layout);
104}
105
106const BidirMap<BlendOp, VkBlendOp> blend_op_map = {
107 {BlendOp::add, VK_BLEND_OP_ADD},
108 {BlendOp::subtract, VK_BLEND_OP_SUBTRACT},
109 {BlendOp::reverse_subtract, VK_BLEND_OP_REVERSE_SUBTRACT},
110 {BlendOp::min, VK_BLEND_OP_MIN},
111 {BlendOp::max, VK_BLEND_OP_MAX}};
112
113RhiReturn<VkBlendOp> blend_op_ti_to_vk(BlendOp op) {
114 if (!blend_op_map.exists(op)) {
115 RHI_LOG_ERROR("BlendOp cannot be mapped to vk");
116 return {RhiResult::not_supported, VK_BLEND_OP_ADD};
117 }
118 return {RhiResult::success, blend_op_map.at(op)};
119}
120
121const BidirMap<BlendFactor, VkBlendFactor> blend_factor_map = {
122 {BlendFactor::zero, VK_BLEND_FACTOR_ZERO},
123 {BlendFactor::one, VK_BLEND_FACTOR_ONE},
124 {BlendFactor::src_color, VK_BLEND_FACTOR_SRC_COLOR},
125 {BlendFactor::one_minus_src_color, VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR},
126 {BlendFactor::dst_color, VK_BLEND_FACTOR_DST_COLOR},
127 {BlendFactor::one_minus_dst_color, VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR},
128 {BlendFactor::src_alpha, VK_BLEND_FACTOR_SRC_ALPHA},
129 {BlendFactor::one_minus_src_alpha, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA},
130 {BlendFactor::dst_alpha, VK_BLEND_FACTOR_DST_ALPHA},
131 {BlendFactor::one_minus_dst_alpha, VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA},
132};
133
134RhiReturn<VkBlendFactor> blend_factor_ti_to_vk(BlendFactor factor) {
135 if (!blend_factor_map.exists(factor)) {
136 RHI_LOG_ERROR("BlendFactor cannot be mapped to vk");
137 return {RhiResult::not_supported, VK_BLEND_FACTOR_ONE};
138 }
139 return {RhiResult::success, blend_factor_map.at(factor)};
140}
141
142VulkanPipelineCache::VulkanPipelineCache(VulkanDevice *device,
143 size_t initial_size,
144 const void *initial_data)
145 : device_(device) {
146 cache_ = vkapi::create_pipeline_cache(device_->vk_device(), 0, initial_size,
147 initial_data);
148}
149
150VulkanPipelineCache ::~VulkanPipelineCache() {
151}
152
153void *VulkanPipelineCache::data() noexcept {
154 try {
155 data_shadow_.resize(size());
156 size_t size = 0;
157 vkGetPipelineCacheData(device_->vk_device(), cache_->cache, &size,
158 data_shadow_.data());
159 } catch (std::bad_alloc &) {
160 return nullptr;
161 }
162
163 return data_shadow_.data();
164}
165
166size_t VulkanPipelineCache::size() const noexcept {
167 size_t size = 0;
168 vkGetPipelineCacheData(device_->vk_device(), cache_->cache, &size, nullptr);
169 return size;
170}
171
172VulkanPipeline::VulkanPipeline(const Params &params)
173 : ti_device_(*params.device),
174 device_(params.device->vk_device()),
175 name_(params.name) {
176 create_descriptor_set_layout(params);
177 create_shader_stages(params);
178 create_pipeline_layout();
179 create_compute_pipeline(params);
180
181 for (VkShaderModule shader_module : shader_modules_) {
182 vkDestroyShaderModule(device_, shader_module, kNoVkAllocCallbacks);
183 }
184 shader_modules_.clear();
185}
186
187VulkanPipeline::VulkanPipeline(
188 const Params &params,
189 const RasterParams &raster_params,
190 const std::vector<VertexInputBinding> &vertex_inputs,
191 const std::vector<VertexInputAttribute> &vertex_attrs)
192 : ti_device_(*params.device),
193 device_(params.device->vk_device()),
194 name_(params.name) {
195 this->graphics_pipeline_template_ =
196 std::make_unique<GraphicsPipelineTemplate>();
197
198 create_descriptor_set_layout(params);
199 create_shader_stages(params);
200 create_pipeline_layout();
201 create_graphics_pipeline(raster_params, vertex_inputs, vertex_attrs);
202}
203
204VulkanPipeline::~VulkanPipeline() {
205 for (VkShaderModule shader_module : shader_modules_) {
206 vkDestroyShaderModule(device_, shader_module, kNoVkAllocCallbacks);
207 }
208 shader_modules_.clear();
209}
210
211VkShaderModule VulkanPipeline::create_shader_module(VkDevice device,
212 const SpirvCodeView &code) {
213 VkShaderModuleCreateInfo create_info{};
214 create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
215 create_info.codeSize = code.size;
216 create_info.pCode = code.data;
217
218 VkShaderModule shader_module;
219 VkResult res = vkCreateShaderModule(device, &create_info, kNoVkAllocCallbacks,
220 &shader_module);
221 RHI_THROW_UNLESS(res == VK_SUCCESS,
222 std::runtime_error("vkCreateShaderModule failed"));
223 return shader_module;
224}
225
226vkapi::IVkPipeline VulkanPipeline::graphics_pipeline(
227 const VulkanRenderPassDesc &renderpass_desc,
228 vkapi::IVkRenderPass renderpass) {
229 if (graphics_pipeline_.find(renderpass) != graphics_pipeline_.end()) {
230 return graphics_pipeline_.at(renderpass);
231 }
232
233 vkapi::IVkPipeline pipeline = vkapi::create_graphics_pipeline(
234 device_, &graphics_pipeline_template_->pipeline_info, renderpass,
235 pipeline_layout_);
236
237 graphics_pipeline_[renderpass] = pipeline;
238
239 return pipeline;
240}
241
242vkapi::IVkPipeline VulkanPipeline::graphics_pipeline_dynamic(
243 const VulkanRenderPassDesc &renderpass_desc) {
244 if (graphics_pipeline_dynamic_.find(renderpass_desc) !=
245 graphics_pipeline_dynamic_.end()) {
246 return graphics_pipeline_dynamic_.at(renderpass_desc);
247 }
248
249 std::vector<VkFormat> color_attachment_formats;
250 for (const auto &color_attachment : renderpass_desc.color_attachments) {
251 color_attachment_formats.push_back(color_attachment.first);
252 }
253
254 VkPipelineRenderingCreateInfoKHR rendering_info{};
255 rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR;
256 rendering_info.pNext = nullptr;
257 rendering_info.viewMask = 0;
258 rendering_info.colorAttachmentCount =
259 renderpass_desc.color_attachments.size();
260 rendering_info.pColorAttachmentFormats = color_attachment_formats.data();
261 rendering_info.depthAttachmentFormat = renderpass_desc.depth_attachment;
262 rendering_info.stencilAttachmentFormat = VK_FORMAT_UNDEFINED;
263
264 vkapi::IVkPipeline pipeline = vkapi::create_graphics_pipeline_dynamic(
265 device_, &graphics_pipeline_template_->pipeline_info, &rendering_info,
266 pipeline_layout_);
267
268 graphics_pipeline_dynamic_[renderpass_desc] = pipeline;
269
270 return pipeline;
271}
272
273void VulkanPipeline::create_descriptor_set_layout(const Params &params) {
274 for (auto &code_view : params.code) {
275 SpvReflectShaderModule module;
276 SpvReflectResult result =
277 spvReflectCreateShaderModule(code_view.size, code_view.data, &module);
278 RHI_THROW_UNLESS(result == SPV_REFLECT_RESULT_SUCCESS,
279 std::runtime_error("spvReflectCreateShaderModule failed"));
280
281 uint32_t set_count = 0;
282 result = spvReflectEnumerateDescriptorSets(&module, &set_count, nullptr);
283 RHI_THROW_UNLESS(result == SPV_REFLECT_RESULT_SUCCESS,
284 std::runtime_error("Failed to enumerate number of sets"));
285 std::vector<SpvReflectDescriptorSet *> desc_sets(set_count);
286 result = spvReflectEnumerateDescriptorSets(&module, &set_count,
287 desc_sets.data());
288 RHI_THROW_UNLESS(
289 result == SPV_REFLECT_RESULT_SUCCESS,
290 std::runtime_error("spvReflectEnumerateDescriptorSets failed"));
291
292 for (SpvReflectDescriptorSet *desc_set : desc_sets) {
293 uint32_t set_index = desc_set->set;
294 if (set_templates_.find(set_index) == set_templates_.end()) {
295 set_templates_.insert({set_index, VulkanResourceSet(&ti_device_)});
296 }
297 VulkanResourceSet &set = set_templates_.at(set_index);
298
299 for (int i = 0; i < desc_set->binding_count; i++) {
300 SpvReflectDescriptorBinding *desc_binding = desc_set->bindings[i];
301
302 if (desc_binding->descriptor_type ==
303 SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
304 set.rw_buffer(desc_binding->binding, kDeviceNullPtr, 0);
305 } else if (desc_binding->descriptor_type ==
306 SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
307 set.buffer(desc_binding->binding, kDeviceNullPtr, 0);
308 } else if (desc_binding->descriptor_type ==
309 SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
310 set.image(desc_binding->binding, kDeviceNullAllocation, {});
311 } else if (desc_binding->descriptor_type ==
312 SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
313 set.rw_image(desc_binding->binding, kDeviceNullAllocation, {});
314 } else {
315 RHI_LOG_ERROR("Unrecognized binding ignored");
316 }
317 }
318 }
319
320 // Handle special vertex shaders stuff
321 // if (code_view.stage == VK_SHADER_STAGE_VERTEX_BIT) {
322 // uint32_t attrib_count;
323 // result =
324 // spvReflectEnumerateInputVariables(&module, &attrib_count, nullptr);
325 // RHI_ASSERT(result == SPV_REFLECT_RESULT_SUCCESS);
326 // std::vector<SpvReflectInterfaceVariable *> attribs(attrib_count);
327 // result = spvReflectEnumerateInputVariables(&module, &attrib_count,
328 // attribs.data());
329 // RHI_ASSERT(result == SPV_REFLECT_RESULT_SUCCESS);
330
331 // for (SpvReflectInterfaceVariable *attrib : attribs) {
332 // uint32_t location = attrib->location;
333 // SpvReflectTypeDescription *type = attrib->type_description;
334 // TI_WARN("attrib {}:{}", location, type->type_name);
335 // }
336 // }
337
338 if (code_view.stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
339 uint32_t render_target_count = 0;
340 result = spvReflectEnumerateOutputVariables(&module, &render_target_count,
341 nullptr);
342 RHI_THROW_UNLESS(
343 result == SPV_REFLECT_RESULT_SUCCESS,
344 std::runtime_error("Failed to enumerate number of output vars"));
345
346 std::vector<SpvReflectInterfaceVariable *> variables(render_target_count);
347 result = spvReflectEnumerateOutputVariables(&module, &render_target_count,
348 variables.data());
349
350 RHI_THROW_UNLESS(
351 result == SPV_REFLECT_RESULT_SUCCESS,
352 std::runtime_error("spvReflectEnumerateOutputVariables failed"));
353
354 render_target_count = 0;
355
356 for (auto var : variables) {
357 // We want to remove auxiliary outputs such as frag depth
358 if (static_cast<int>(var->built_in) == -1) {
359 render_target_count++;
360 }
361 }
362
363 graphics_pipeline_template_->blend_attachments.resize(
364 render_target_count);
365
366 VkPipelineColorBlendAttachmentState default_state{};
367 default_state.colorWriteMask =
368 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
369 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
370 default_state.blendEnable = VK_FALSE;
371
372 std::fill(graphics_pipeline_template_->blend_attachments.begin(),
373 graphics_pipeline_template_->blend_attachments.end(),
374 default_state);
375 }
376 spvReflectDestroyShaderModule(&module);
377 }
378
379 // A program can have no binding sets at all.
380 if (set_templates_.size()) {
381 // We need to verify the set layouts are all continous
382 uint32_t max_set = 0;
383 for (auto &[index, layout_template] : set_templates_) {
384 max_set = std::max(index, max_set);
385 }
386 RHI_THROW_UNLESS(
387 max_set + 1 == set_templates_.size(),
388 std::invalid_argument("Sets must be continous & start with 0"));
389
390 set_layouts_.resize(set_templates_.size(), nullptr);
391 for (auto &[index, layout_template] : set_templates_) {
392 set_layouts_[index] = ti_device_.get_desc_set_layout(layout_template);
393 }
394 }
395}
396
397void VulkanPipeline::create_shader_stages(const Params &params) {
398 for (auto &code_view : params.code) {
399 VkPipelineShaderStageCreateInfo &shader_stage_info =
400 shader_stages_.emplace_back();
401
402 VkShaderModule shader_module = create_shader_module(device_, code_view);
403
404 shader_stage_info.sType =
405 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
406 shader_stage_info.stage = code_view.stage;
407 shader_stage_info.module = shader_module;
408 shader_stage_info.pName = "main";
409
410 shader_modules_.push_back(shader_module);
411 }
412}
413
414void VulkanPipeline::create_pipeline_layout() {
415 pipeline_layout_ = vkapi::create_pipeline_layout(device_, set_layouts_);
416}
417
418void VulkanPipeline::create_compute_pipeline(const Params &params) {
419 char msg_buf[512];
420 RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), "Compiling Vulkan pipeline %s",
421 params.name.data());
422 RHI_LOG_DEBUG(msg_buf);
423 pipeline_ = vkapi::create_compute_pipeline(device_, 0, shader_stages_[0],
424 pipeline_layout_, params.cache);
425}
426
427void VulkanPipeline::create_graphics_pipeline(
428 const RasterParams &raster_params,
429 const std::vector<VertexInputBinding> &vertex_inputs,
430 const std::vector<VertexInputAttribute> &vertex_attrs) {
431 // Use dynamic viewport state. These two are just dummies
432 VkViewport viewport{};
433 viewport.width = 1;
434 viewport.height = 1;
435 viewport.x = 0;
436 viewport.y = 0;
437 viewport.minDepth = 0.0;
438 viewport.maxDepth = 1.0;
439
440 VkRect2D scissor{/*offset*/ {0, 0}, /*extent*/ {1, 1}};
441
442 VkPipelineViewportStateCreateInfo &viewport_state =
443 graphics_pipeline_template_->viewport_state;
444 viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
445 viewport_state.viewportCount = 1;
446 viewport_state.pViewports = &viewport;
447 viewport_state.scissorCount = 1;
448 viewport_state.pScissors = &scissor;
449
450 for (const VertexInputBinding &binding : vertex_inputs) {
451 VkVertexInputBindingDescription &desc =
452 graphics_pipeline_template_->input_bindings.emplace_back();
453 desc.binding = binding.binding;
454 desc.stride = binding.stride;
455 desc.inputRate = binding.instance ? VK_VERTEX_INPUT_RATE_INSTANCE
456 : VK_VERTEX_INPUT_RATE_VERTEX;
457 }
458
459 for (const VertexInputAttribute &attr : vertex_attrs) {
460 VkVertexInputAttributeDescription &desc =
461 graphics_pipeline_template_->input_attrs.emplace_back();
462 desc.binding = attr.binding;
463 desc.location = attr.location;
464 auto [result, vk_format] = buffer_format_ti_to_vk(attr.format);
465 RHI_ASSERT(result == RhiResult::success);
466 desc.format = vk_format;
467 assert(desc.format != VK_FORMAT_UNDEFINED);
468 desc.offset = attr.offset;
469 }
470
471 VkPipelineVertexInputStateCreateInfo &vertex_input =
472 graphics_pipeline_template_->input;
473 vertex_input.sType =
474 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
475 vertex_input.pNext = nullptr;
476 vertex_input.flags = 0;
477 vertex_input.vertexBindingDescriptionCount =
478 graphics_pipeline_template_->input_bindings.size();
479 vertex_input.pVertexBindingDescriptions =
480 graphics_pipeline_template_->input_bindings.data();
481 vertex_input.vertexAttributeDescriptionCount =
482 graphics_pipeline_template_->input_attrs.size();
483 vertex_input.pVertexAttributeDescriptions =
484 graphics_pipeline_template_->input_attrs.data();
485
486 VkPipelineInputAssemblyStateCreateInfo &input_assembly =
487 graphics_pipeline_template_->input_assembly;
488 input_assembly.sType =
489 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
490 static const std::unordered_map<TopologyType, VkPrimitiveTopology>
491 topo_types = {
492 {TopologyType::Triangles, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST},
493 {TopologyType::Lines, VK_PRIMITIVE_TOPOLOGY_LINE_LIST},
494 {TopologyType::Points, VK_PRIMITIVE_TOPOLOGY_POINT_LIST},
495 };
496 input_assembly.topology = topo_types.at(raster_params.prim_topology);
497 input_assembly.primitiveRestartEnable = VK_FALSE;
498
499 static const std::unordered_map<PolygonMode, VkPolygonMode> polygon_modes = {
500 {PolygonMode::Fill, VK_POLYGON_MODE_FILL},
501 {PolygonMode::Line, VK_POLYGON_MODE_LINE},
502 {PolygonMode::Point, VK_POLYGON_MODE_POINT},
503 };
504
505 VkPipelineRasterizationStateCreateInfo &rasterizer =
506 graphics_pipeline_template_->rasterizer;
507 rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
508 rasterizer.depthClampEnable = VK_FALSE;
509 rasterizer.rasterizerDiscardEnable = VK_FALSE;
510 rasterizer.polygonMode = polygon_modes.at(raster_params.polygon_mode);
511 rasterizer.lineWidth = 1.0f;
512 rasterizer.cullMode = 0;
513 if (raster_params.front_face_cull) {
514 rasterizer.cullMode |= VK_CULL_MODE_FRONT_BIT;
515 }
516 if (raster_params.back_face_cull) {
517 rasterizer.cullMode |= VK_CULL_MODE_BACK_BIT;
518 }
519 rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
520 rasterizer.depthBiasEnable = VK_FALSE;
521
522 VkPipelineMultisampleStateCreateInfo &multisampling =
523 graphics_pipeline_template_->multisampling;
524 multisampling.sType =
525 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
526 multisampling.sampleShadingEnable = VK_FALSE;
527 multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
528
529 VkPipelineDepthStencilStateCreateInfo &depth_stencil =
530 graphics_pipeline_template_->depth_stencil;
531 depth_stencil.sType =
532 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
533 depth_stencil.depthTestEnable = raster_params.depth_test;
534 depth_stencil.depthWriteEnable = raster_params.depth_write;
535 depth_stencil.depthCompareOp = VK_COMPARE_OP_GREATER_OR_EQUAL;
536 depth_stencil.depthBoundsTestEnable = VK_FALSE;
537 depth_stencil.stencilTestEnable = VK_FALSE;
538
539 VkPipelineColorBlendStateCreateInfo &color_blending =
540 graphics_pipeline_template_->color_blending;
541 color_blending.sType =
542 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
543 color_blending.logicOpEnable = VK_FALSE;
544 color_blending.logicOp = VK_LOGIC_OP_COPY;
545 color_blending.attachmentCount =
546 graphics_pipeline_template_->blend_attachments.size();
547 color_blending.pAttachments =
548 graphics_pipeline_template_->blend_attachments.data();
549 color_blending.blendConstants[0] = 0.0f;
550 color_blending.blendConstants[1] = 0.0f;
551 color_blending.blendConstants[2] = 0.0f;
552 color_blending.blendConstants[3] = 0.0f;
553
554 if (raster_params.blending.size()) {
555 if (raster_params.blending.size() != color_blending.attachmentCount) {
556 std::array<char, 256> buf;
557 snprintf(buf.data(), buf.size(),
558 "RasterParams::blending (size=%u) must either be zero sized "
559 "or match the number of fragment shader outputs (size=%u).",
560 uint32_t(raster_params.blending.size()),
561 uint32_t(color_blending.attachmentCount));
562 RHI_LOG_ERROR(buf.data());
563 RHI_ASSERT(false);
564 }
565
566 for (int i = 0; i < raster_params.blending.size(); i++) {
567 auto &state = graphics_pipeline_template_->blend_attachments[i];
568 auto &ti_param = raster_params.blending[i];
569 state.blendEnable = ti_param.enable;
570 if (ti_param.enable) {
571 {
572 auto [res, op] = blend_op_ti_to_vk(ti_param.color.op);
573 RHI_ASSERT(res == RhiResult::success);
574 state.colorBlendOp = op;
575 }
576 {
577 auto [res, factor] = blend_factor_ti_to_vk(ti_param.color.src_factor);
578 RHI_ASSERT(res == RhiResult::success);
579 state.srcColorBlendFactor = factor;
580 }
581 {
582 auto [res, factor] = blend_factor_ti_to_vk(ti_param.color.dst_factor);
583 RHI_ASSERT(res == RhiResult::success);
584 state.dstColorBlendFactor = factor;
585 }
586 {
587 auto [res, op] = blend_op_ti_to_vk(ti_param.alpha.op);
588 RHI_ASSERT(res == RhiResult::success);
589 state.alphaBlendOp = op;
590 }
591 {
592 auto [res, factor] = blend_factor_ti_to_vk(ti_param.alpha.src_factor);
593 RHI_ASSERT(res == RhiResult::success);
594 state.srcAlphaBlendFactor = factor;
595 }
596 {
597 auto [res, factor] = blend_factor_ti_to_vk(ti_param.alpha.dst_factor);
598 RHI_ASSERT(res == RhiResult::success);
599 state.dstAlphaBlendFactor = factor;
600 }
601 state.colorWriteMask =
602 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
603 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
604 }
605 }
606 }
607
608 VkPipelineDynamicStateCreateInfo &dynamic_state =
609 graphics_pipeline_template_->dynamic_state;
610 dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
611 dynamic_state.pNext = nullptr;
612 dynamic_state.pDynamicStates =
613 graphics_pipeline_template_->dynamic_state_enables.data();
614 dynamic_state.dynamicStateCount =
615 graphics_pipeline_template_->dynamic_state_enables.size();
616
617 VkGraphicsPipelineCreateInfo &pipeline_info =
618 graphics_pipeline_template_->pipeline_info;
619 pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
620 pipeline_info.stageCount = shader_stages_.size();
621 pipeline_info.pStages = shader_stages_.data();
622 pipeline_info.pVertexInputState = &vertex_input;
623 pipeline_info.pInputAssemblyState = &input_assembly;
624 pipeline_info.pViewportState = &viewport_state;
625 pipeline_info.pRasterizationState = &rasterizer;
626 pipeline_info.pMultisampleState = &multisampling;
627 pipeline_info.pDepthStencilState = &depth_stencil;
628 pipeline_info.pColorBlendState = &color_blending;
629 pipeline_info.pDynamicState = &dynamic_state;
630 pipeline_info.renderPass = VK_NULL_HANDLE; // Filled in later
631 pipeline_info.subpass = 0;
632 pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
633}
634
635VulkanResourceSet::VulkanResourceSet(VulkanDevice *device) : device_(device) {
636}
637
638VulkanResourceSet::~VulkanResourceSet() {
639}
640
641ShaderResourceSet &VulkanResourceSet::rw_buffer(uint32_t binding,
642 DevicePtr ptr,
643 size_t size) {
644 dirty_ = true;
645
646 vkapi::IVkBuffer buffer =
647 (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr;
648 bindings_[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
649 Buffer{buffer, ptr.offset, size}};
650 return *this;
651}
652
653ShaderResourceSet &VulkanResourceSet::rw_buffer(uint32_t binding,
654 DeviceAllocation alloc) {
655 return rw_buffer(binding, alloc.get_ptr(0), VK_WHOLE_SIZE);
656}
657
658ShaderResourceSet &VulkanResourceSet::buffer(uint32_t binding,
659 DevicePtr ptr,
660 size_t size) {
661 dirty_ = true;
662
663 vkapi::IVkBuffer buffer =
664 (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr;
665 bindings_[binding] = {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
666 Buffer{buffer, ptr.offset, size}};
667 return *this;
668}
669
670ShaderResourceSet &VulkanResourceSet::buffer(uint32_t binding,
671 DeviceAllocation alloc) {
672 return buffer(binding, alloc.get_ptr(0), VK_WHOLE_SIZE);
673}
674
675ShaderResourceSet &VulkanResourceSet::image(uint32_t binding,
676 DeviceAllocation alloc,
677 ImageSamplerConfig sampler_config) {
678 dirty_ = true;
679
680 vkapi::IVkSampler sampler = nullptr;
681 vkapi::IVkImageView view = nullptr;
682
683 if (alloc != kDeviceNullAllocation) {
684 VkSamplerCreateInfo sampler_info{};
685 sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
686 sampler_info.magFilter = VK_FILTER_LINEAR;
687 sampler_info.minFilter = VK_FILTER_LINEAR;
688 sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
689 sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
690 sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
691 sampler_info.anisotropyEnable = VK_FALSE;
692 sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
693 sampler_info.unnormalizedCoordinates = VK_FALSE;
694 sampler_info.compareEnable = VK_FALSE;
695 sampler_info.compareOp = VK_COMPARE_OP_ALWAYS;
696 sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
697
698 sampler = vkapi::create_sampler(device_->vk_device(), sampler_info);
699 view = device_->get_vk_imageview(alloc);
700 }
701
702 bindings_[binding] = {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
703 Texture{view, sampler}};
704
705 return *this;
706}
707
708ShaderResourceSet &VulkanResourceSet::rw_image(uint32_t binding,
709 DeviceAllocation alloc,
710 int lod) {
711 dirty_ = true;
712
713 vkapi::IVkImageView view = (alloc != kDeviceNullAllocation)
714 ? device_->get_vk_lod_imageview(alloc, lod)
715 : nullptr;
716
717 bindings_[binding] = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, Image{view}};
718
719 return *this;
720}
721
722RhiReturn<vkapi::IVkDescriptorSet> VulkanResourceSet::finalize() {
723 if (!dirty_ && set_) {
724 // If nothing changed directly return the set
725 return {RhiResult::success, set_};
726 }
727
728 if (bindings_.size() <= 0) {
729 // A set can't be empty
730 return {RhiResult::invalid_usage, nullptr};
731 }
732
733 vkapi::IVkDescriptorSetLayout new_layout =
734 device_->get_desc_set_layout(*this);
735 if (new_layout != layout_) {
736 // Layout changed, reset `set`
737 set_ = nullptr;
738 layout_ = new_layout;
739 }
740
741 if (!set_) {
742 // If set_ is null, create a new one
743 auto [status, new_set] = device_->alloc_desc_set(layout_);
744 if (status != RhiResult::success) {
745 return {status, nullptr};
746 }
747 set_ = new_set;
748 }
749
750 std::forward_list<VkDescriptorBufferInfo> buffer_infos;
751 std::forward_list<VkDescriptorImageInfo> image_infos;
752 std::vector<VkWriteDescriptorSet> desc_writes;
753
754 set_->ref_binding_objs.clear();
755
756 for (auto &pair : bindings_) {
757 uint32_t binding = pair.first;
758 VkDescriptorType type = pair.second.type;
759 auto &resource = pair.second.res;
760
761 VkWriteDescriptorSet &write = desc_writes.emplace_back();
762 write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
763 write.pNext = nullptr;
764 write.dstSet = set_->set;
765 write.dstBinding = binding;
766 write.dstArrayElement = 0;
767 write.descriptorCount = 1;
768 write.descriptorType = type;
769 write.pImageInfo = nullptr;
770 write.pBufferInfo = nullptr;
771 write.pTexelBufferView = nullptr;
772
773 if (Buffer *buf = std::get_if<Buffer>(&resource)) {
774 VkDescriptorBufferInfo &buffer_info = buffer_infos.emplace_front();
775 buffer_info.buffer = buf->buffer ? buf->buffer->buffer : VK_NULL_HANDLE;
776 buffer_info.offset = buf->offset;
777 buffer_info.range = buf->size;
778
779 write.pBufferInfo = &buffer_info;
780 if (buf->buffer) {
781 set_->ref_binding_objs.push_back(buf->buffer);
782 }
783 } else if (Image *img = std::get_if<Image>(&resource)) {
784 VkDescriptorImageInfo &image_info = image_infos.emplace_front();
785 image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
786 image_info.imageView = img->view ? img->view->view : VK_NULL_HANDLE;
787 image_info.sampler = VK_NULL_HANDLE;
788
789 write.pImageInfo = &image_info;
790 if (img->view) {
791 set_->ref_binding_objs.push_back(img->view);
792 }
793 } else if (Texture *tex = std::get_if<Texture>(&resource)) {
794 VkDescriptorImageInfo &image_info = image_infos.emplace_front();
795 image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
796 image_info.imageView = tex->view ? tex->view->view : VK_NULL_HANDLE;
797 image_info.sampler =
798 tex->sampler ? tex->sampler->sampler : VK_NULL_HANDLE;
799
800 write.pImageInfo = &image_info;
801 if (tex->view) {
802 set_->ref_binding_objs.push_back(tex->view);
803 }
804 if (tex->sampler) {
805 set_->ref_binding_objs.push_back(tex->sampler);
806 }
807 } else {
808 RHI_LOG_ERROR("Ignoring unsupported Descriptor Type");
809 }
810 }
811
812 vkUpdateDescriptorSets(device_->vk_device(), desc_writes.size(),
813 desc_writes.data(), /*descriptorCopyCount=*/0,
814 /*pDescriptorCopies=*/nullptr);
815
816 dirty_ = false;
817
818 return {RhiResult::success, set_};
819}
820
821RasterResources &VulkanRasterResources::vertex_buffer(DevicePtr ptr,
822 uint32_t binding) {
823 vkapi::IVkBuffer buffer =
824 (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr;
825 if (buffer == nullptr) {
826 vertex_buffers.erase(binding);
827 } else {
828 vertex_buffers[binding] = {buffer, ptr.offset};
829 }
830 return *this;
831}
832
833RasterResources &VulkanRasterResources::index_buffer(DevicePtr ptr,
834 size_t index_width) {
835 vkapi::IVkBuffer buffer =
836 (ptr != kDeviceNullPtr) ? device_->get_vkbuffer(ptr) : nullptr;
837 if (buffer == nullptr) {
838 index_binding = BufferBinding();
839 index_type = VK_INDEX_TYPE_MAX_ENUM;
840 } else {
841 index_binding = {buffer, ptr.offset};
842 if (index_width == 32) {
843 index_type = VK_INDEX_TYPE_UINT32;
844 } else if (index_width == 16) {
845 index_type = VK_INDEX_TYPE_UINT16;
846 }
847 }
848 return *this;
849}
850
851VulkanCommandList::VulkanCommandList(VulkanDevice *ti_device,
852 VulkanStream *stream,
853 vkapi::IVkCommandBuffer buffer)
854 : ti_device_(ti_device),
855 stream_(stream),
856 device_(ti_device->vk_device()),
857 buffer_(buffer) {
858 VkCommandBufferBeginInfo info{};
859 info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
860 info.pNext = nullptr;
861 info.pInheritanceInfo = nullptr;
862 info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
863
864 vkBeginCommandBuffer(buffer->buffer, &info);
865}
866
867VulkanCommandList::~VulkanCommandList() {
868}
869
870void VulkanCommandList::bind_pipeline(Pipeline *p) noexcept {
871 auto pipeline = static_cast<VulkanPipeline *>(p);
872
873 if (current_pipeline_ == pipeline)
874 return;
875
876 if (pipeline->is_graphics()) {
877 vkapi::IVkPipeline vk_pipeline =
878 ti_device_->vk_caps().dynamic_rendering
879 ? pipeline->graphics_pipeline_dynamic(current_renderpass_desc_)
880 : pipeline->graphics_pipeline(current_renderpass_desc_,
881 current_renderpass_);
882 vkCmdBindPipeline(buffer_->buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
883 vk_pipeline->pipeline);
884
885 VkViewport viewport{};
886 viewport.width = viewport_width_;
887 viewport.height = viewport_height_;
888 viewport.x = 0;
889 viewport.y = 0;
890 viewport.minDepth = 0.0;
891 viewport.maxDepth = 1.0;
892
893 VkRect2D scissor{/*offset*/ {0, 0},
894 /*extent*/ {viewport_width_, viewport_height_}};
895
896 vkCmdSetViewport(buffer_->buffer, 0, 1, &viewport);
897 vkCmdSetScissor(buffer_->buffer, 0, 1, &scissor);
898 vkCmdSetLineWidth(buffer_->buffer, 1.0f);
899 buffer_->refs.push_back(vk_pipeline);
900 } else {
901 auto vk_pipeline = pipeline->pipeline();
902 vkCmdBindPipeline(buffer_->buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
903 vk_pipeline->pipeline);
904 buffer_->refs.push_back(vk_pipeline);
905 }
906
907 current_pipeline_ = pipeline;
908}
909
910RhiResult VulkanCommandList::bind_shader_resources(ShaderResourceSet *res,
911 int set_index) noexcept {
912 VulkanResourceSet *set = static_cast<VulkanResourceSet *>(res);
913 if (set->get_bindings().size() <= 0) {
914 return RhiResult::success;
915 }
916
917 auto [status, vk_set] = set->finalize();
918 if (status != RhiResult::success) {
919 return status;
920 }
921
922 vkapi::IVkDescriptorSetLayout set_layout = set->get_layout();
923
924 if (current_pipeline_->pipeline_layout()->ref_desc_layouts.empty() ||
925 current_pipeline_->pipeline_layout()->ref_desc_layouts[set_index] !=
926 set_layout) {
927 // WARN: we have a layout mismatch
928 RHI_LOG_ERROR("Layout mismatch");
929
930 auto &templates = current_pipeline_->get_resource_set_templates();
931 VulkanResourceSet &set_template = templates.at(set_index);
932
933 for (const auto &template_binding : set_template.get_bindings()) {
934 char msg[512];
935 snprintf(msg, 512, "Template binding %d: (VkDescriptorType) %d",
936 template_binding.first, template_binding.second.type);
937 RHI_LOG_ERROR(msg);
938 }
939
940 for (const auto &binding : set->get_bindings()) {
941 char msg[512];
942 snprintf(msg, 512, "Binding %d: (VkDescriptorType) %d", binding.first,
943 binding.second.type);
944 RHI_LOG_ERROR(msg);
945 }
946
947 return RhiResult::invalid_usage;
948 }
949
950 VkPipelineLayout pipeline_layout =
951 current_pipeline_->pipeline_layout()->layout;
952 VkPipelineBindPoint bind_point = current_pipeline_->is_graphics()
953 ? VK_PIPELINE_BIND_POINT_GRAPHICS
954 : VK_PIPELINE_BIND_POINT_COMPUTE;
955
956 vkCmdBindDescriptorSets(buffer_->buffer, bind_point, pipeline_layout,
957 /*firstSet=*/set_index,
958 /*descriptorSetCount=*/1, &vk_set->set,
959 /*dynamicOffsetCount=*/0,
960 /*pDynamicOffsets=*/nullptr);
961 buffer_->refs.push_back(vk_set);
962
963 return RhiResult::success;
964}
965
966RhiResult VulkanCommandList::bind_raster_resources(
967 RasterResources *_res) noexcept {
968 VulkanRasterResources *res = static_cast<VulkanRasterResources *>(_res);
969
970 if (!current_pipeline_->is_graphics()) {
971 return RhiResult::invalid_usage;
972 }
973
974 if (res->index_binding.buffer != nullptr) {
975 // We have a valid index buffer
976 if (res->index_type >= VK_INDEX_TYPE_MAX_ENUM) {
977 return RhiResult::not_supported;
978 }
979
980 vkapi::IVkBuffer index_buffer = res->index_binding.buffer;
981 vkCmdBindIndexBuffer(buffer_->buffer, index_buffer->buffer,
982 res->index_binding.offset, res->index_type);
983 buffer_->refs.push_back(index_buffer);
984 }
985
986 for (auto &[binding, buffer] : res->vertex_buffers) {
987 VkDeviceSize offset_vk = buffer.offset;
988 vkCmdBindVertexBuffers(buffer_->buffer, binding, 1, &buffer.buffer->buffer,
989 &offset_vk);
990 buffer_->refs.push_back(buffer.buffer);
991 }
992
993 return RhiResult::success;
994}
995
996void VulkanCommandList::buffer_barrier(DevicePtr ptr, size_t size) noexcept {
997 auto buffer = ti_device_->get_vkbuffer(ptr);
998 size_t buffer_size = ti_device_->get_vkbuffer_size(ptr);
999
1000 // Clamp to buffer size
1001 if (ptr.offset > buffer_size) {
1002 return;
1003 }
1004
1005 if (saturate_uadd<size_t>(ptr.offset, size) > buffer_size) {
1006 size = VK_WHOLE_SIZE;
1007 }
1008
1009 VkBufferMemoryBarrier barrier{};
1010 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
1011 barrier.pNext = nullptr;
1012 barrier.buffer = buffer->buffer;
1013 barrier.offset = ptr.offset;
1014 barrier.size = size;
1015 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1016 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1017 barrier.srcAccessMask =
1018 (VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
1019 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
1020 barrier.dstAccessMask =
1021 (VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
1022 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
1023
1024 vkCmdPipelineBarrier(
1025 buffer_->buffer,
1026 /*srcStageMask=*/
1027 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1028 /*dstStageMask=*/VK_PIPELINE_STAGE_TRANSFER_BIT |
1029 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1030 /*srcStageMask=*/0, /*memoryBarrierCount=*/0, nullptr,
1031 /*bufferMemoryBarrierCount=*/1,
1032 /*pBufferMemoryBarriers=*/&barrier,
1033 /*imageMemoryBarrierCount=*/0,
1034 /*pImageMemoryBarriers=*/nullptr);
1035 buffer_->refs.push_back(buffer);
1036}
1037
1038void VulkanCommandList::buffer_barrier(DeviceAllocation alloc) noexcept {
1039 buffer_barrier(DevicePtr{alloc, 0}, std::numeric_limits<size_t>::max());
1040}
1041
1042void VulkanCommandList::memory_barrier() noexcept {
1043 VkMemoryBarrier barrier{};
1044 barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
1045 barrier.pNext = nullptr;
1046 barrier.srcAccessMask =
1047 (VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
1048 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
1049 barrier.dstAccessMask =
1050 (VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
1051 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
1052
1053 vkCmdPipelineBarrier(
1054 buffer_->buffer,
1055 /*srcStageMask=*/
1056 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1057 /*dstStageMask=*/VK_PIPELINE_STAGE_TRANSFER_BIT |
1058 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
1059 /*srcStageMask=*/0, /*memoryBarrierCount=*/1, &barrier,
1060 /*bufferMemoryBarrierCount=*/0,
1061 /*pBufferMemoryBarriers=*/nullptr,
1062 /*imageMemoryBarrierCount=*/0,
1063 /*pImageMemoryBarriers=*/nullptr);
1064}
1065
1066void VulkanCommandList::buffer_copy(DevicePtr dst,
1067 DevicePtr src,
1068 size_t size) noexcept {
1069 size_t src_size = ti_device_->get_vkbuffer_size(src);
1070 size_t dst_size = ti_device_->get_vkbuffer_size(dst);
1071
1072 // Clamp to minimum available size
1073 if (saturate_uadd<size_t>(src.offset, size) > src_size) {
1074 size = saturate_usub<size_t>(src_size, src.offset);
1075 }
1076 if (saturate_uadd<size_t>(dst.offset, size) > dst_size) {
1077 size = saturate_usub<size_t>(dst_size, dst.offset);
1078 }
1079
1080 if (size == 0) {
1081 return;
1082 }
1083
1084 VkBufferCopy copy_region{};
1085 copy_region.srcOffset = src.offset;
1086 copy_region.dstOffset = dst.offset;
1087 copy_region.size = size;
1088
1089 auto src_buffer = ti_device_->get_vkbuffer(src);
1090 auto dst_buffer = ti_device_->get_vkbuffer(dst);
1091 vkCmdCopyBuffer(buffer_->buffer, src_buffer->buffer, dst_buffer->buffer,
1092 /*regionCount=*/1, &copy_region);
1093 buffer_->refs.push_back(src_buffer);
1094 buffer_->refs.push_back(dst_buffer);
1095}
1096
1097void VulkanCommandList::buffer_fill(DevicePtr ptr,
1098 size_t size,
1099 uint32_t data) noexcept {
1100 // Align to 4 bytes
1101 ptr.offset = ptr.offset & size_t(-4);
1102
1103 auto buffer = ti_device_->get_vkbuffer(ptr);
1104 size_t buffer_size = ti_device_->get_vkbuffer_size(ptr);
1105
1106 // Check for overflow
1107 if (ptr.offset > buffer_size) {
1108 return;
1109 }
1110
1111 if (saturate_uadd<size_t>(ptr.offset, size) > buffer_size) {
1112 size = VK_WHOLE_SIZE;
1113 }
1114
1115 vkCmdFillBuffer(buffer_->buffer, buffer->buffer, ptr.offset, size, data);
1116 buffer_->refs.push_back(buffer);
1117}
1118
1119RhiResult VulkanCommandList::dispatch(uint32_t x,
1120 uint32_t y,
1121 uint32_t z) noexcept {
1122 auto &dev_props = ti_device_->get_vk_physical_device_props();
1123 if (x > dev_props.limits.maxComputeWorkGroupCount[0] ||
1124 y > dev_props.limits.maxComputeWorkGroupCount[1] ||
1125 z > dev_props.limits.maxComputeWorkGroupCount[2]) {
1126 return RhiResult::not_supported;
1127 }
1128 vkCmdDispatch(buffer_->buffer, x, y, z);
1129 return RhiResult::success;
1130}
1131
1132vkapi::IVkCommandBuffer VulkanCommandList::vk_command_buffer() {
1133 return buffer_;
1134}
1135
1136void VulkanCommandList::begin_renderpass(int x0,
1137 int y0,
1138 int x1,
1139 int y1,
1140 uint32_t num_color_attachments,
1141 DeviceAllocation *color_attachments,
1142 bool *color_clear,
1143 std::vector<float> *clear_colors,
1144 DeviceAllocation *depth_attachment,
1145 bool depth_clear) {
1146 VulkanRenderPassDesc &rp_desc = current_renderpass_desc_;
1147 current_renderpass_desc_.color_attachments.clear();
1148 rp_desc.clear_depth = depth_clear;
1149
1150 VkRect2D render_area{/*offset*/ {x0, y0},
1151 /*extent*/ {uint32_t(x1 - x0), uint32_t(y1 - y0)}};
1152
1153 viewport_width_ = render_area.extent.width;
1154 viewport_height_ = render_area.extent.height;
1155
1156 // Dynamic rendering codepath
1157 if (ti_device_->vk_caps().dynamic_rendering) {
1158 current_dynamic_targets_.clear();
1159
1160 std::vector<VkRenderingAttachmentInfoKHR> color_attachment_infos(
1161 num_color_attachments);
1162 for (uint32_t i = 0; i < num_color_attachments; i++) {
1163 auto [image, view, format] =
1164 ti_device_->get_vk_image(color_attachments[i]);
1165 bool clear = color_clear[i];
1166 rp_desc.color_attachments.emplace_back(format, clear);
1167
1168 VkRenderingAttachmentInfoKHR &attachment_info = color_attachment_infos[i];
1169 attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
1170 attachment_info.pNext = nullptr;
1171 attachment_info.imageView = view->view;
1172 attachment_info.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1173 attachment_info.resolveMode = VK_RESOLVE_MODE_NONE;
1174 attachment_info.resolveImageView = VK_NULL_HANDLE;
1175 attachment_info.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1176 attachment_info.loadOp =
1177 clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
1178 attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1179 if (clear) {
1180 attachment_info.clearValue.color = {
1181 {clear_colors[i][0], clear_colors[i][1], clear_colors[i][2],
1182 clear_colors[i][3]}};
1183 }
1184
1185 current_dynamic_targets_.push_back(image);
1186 }
1187
1188 VkRenderingInfoKHR render_info{};
1189 render_info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR;
1190 render_info.pNext = nullptr;
1191 render_info.flags = 0;
1192 render_info.renderArea = render_area;
1193 render_info.layerCount = 1;
1194 render_info.viewMask = 0;
1195 render_info.colorAttachmentCount = num_color_attachments;
1196 render_info.pColorAttachments = color_attachment_infos.data();
1197 render_info.pDepthAttachment = nullptr;
1198 render_info.pStencilAttachment = nullptr;
1199
1200 VkRenderingAttachmentInfo depth_attachment_info{};
1201 if (depth_attachment) {
1202 auto [image, view, format] = ti_device_->get_vk_image(*depth_attachment);
1203 rp_desc.depth_attachment = format;
1204
1205 depth_attachment_info.sType =
1206 VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
1207 depth_attachment_info.pNext = nullptr;
1208 depth_attachment_info.imageView = view->view;
1209 depth_attachment_info.imageLayout =
1210 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL;
1211 depth_attachment_info.resolveMode = VK_RESOLVE_MODE_NONE;
1212 depth_attachment_info.resolveImageView = VK_NULL_HANDLE;
1213 depth_attachment_info.resolveImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1214 depth_attachment_info.loadOp = depth_clear ? VK_ATTACHMENT_LOAD_OP_CLEAR
1215 : VK_ATTACHMENT_LOAD_OP_LOAD;
1216 depth_attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
1217 depth_attachment_info.clearValue.depthStencil = {0.0, 0};
1218
1219 render_info.pDepthAttachment = &depth_attachment_info;
1220
1221 current_dynamic_targets_.push_back(image);
1222 } else {
1223 rp_desc.depth_attachment = VK_FORMAT_UNDEFINED;
1224 }
1225
1226 vkCmdBeginRenderingKHR(buffer_->buffer, &render_info);
1227
1228 return;
1229 }
1230
1231 // VkRenderpass & VkFramebuffer codepath
1232 bool has_depth = false;
1233
1234 if (depth_attachment) {
1235 auto [image, view, format] = ti_device_->get_vk_image(*depth_attachment);
1236 rp_desc.depth_attachment = format;
1237 has_depth = true;
1238 } else {
1239 rp_desc.depth_attachment = VK_FORMAT_UNDEFINED;
1240 }
1241
1242 std::vector<VkClearValue> clear_values(num_color_attachments +
1243 (has_depth ? 1 : 0));
1244
1245 VulkanFramebufferDesc fb_desc;
1246
1247 for (uint32_t i = 0; i < num_color_attachments; i++) {
1248 auto [image, view, format] = ti_device_->get_vk_image(color_attachments[i]);
1249 rp_desc.color_attachments.emplace_back(format, color_clear[i]);
1250 fb_desc.attachments.push_back(view);
1251 clear_values[i].color =
1252 VkClearColorValue{{clear_colors[i][0], clear_colors[i][1],
1253 clear_colors[i][2], clear_colors[i][3]}};
1254 }
1255
1256 if (has_depth) {
1257 auto [depth_image, depth_view, depth_format] =
1258 ti_device_->get_vk_image(*depth_attachment);
1259 clear_values[num_color_attachments].depthStencil =
1260 VkClearDepthStencilValue{0.0, 0};
1261 fb_desc.attachments.push_back(depth_view);
1262 }
1263
1264 current_renderpass_ = ti_device_->get_renderpass(rp_desc);
1265
1266 fb_desc.width = x1 - x0;
1267 fb_desc.height = y1 - y0;
1268 fb_desc.renderpass = current_renderpass_;
1269
1270 current_framebuffer_ = ti_device_->get_framebuffer(fb_desc);
1271
1272 VkRenderPassBeginInfo begin_info{};
1273 begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
1274 begin_info.pNext = nullptr;
1275 begin_info.renderPass = current_renderpass_->renderpass;
1276 begin_info.framebuffer = current_framebuffer_->framebuffer;
1277 begin_info.renderArea = render_area;
1278 begin_info.clearValueCount = clear_values.size();
1279 begin_info.pClearValues = clear_values.data();
1280
1281 vkCmdBeginRenderPass(buffer_->buffer, &begin_info,
1282 VK_SUBPASS_CONTENTS_INLINE);
1283 buffer_->refs.push_back(current_renderpass_);
1284 buffer_->refs.push_back(current_framebuffer_);
1285}
1286
1287void VulkanCommandList::end_renderpass() {
1288 if (ti_device_->vk_caps().dynamic_rendering) {
1289 vkCmdEndRenderingKHR(buffer_->buffer);
1290
1291 if (0) {
1292 std::vector<VkImageMemoryBarrier> memory_barriers(
1293 current_dynamic_targets_.size());
1294 for (int i = 0; i < current_dynamic_targets_.size(); i++) {
1295 VkImageMemoryBarrier &barrier = memory_barriers[i];
1296 barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1297 barrier.pNext = nullptr;
1298 barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1299 barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1300 barrier.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1301 // FIXME: Change this spec to stay in color attachment
1302 barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
1303 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1304 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1305 barrier.image = current_dynamic_targets_[i]->image;
1306 barrier.subresourceRange.aspectMask =
1307 VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT;
1308 barrier.subresourceRange.baseMipLevel = 0;
1309 barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
1310 barrier.subresourceRange.baseArrayLayer = 0;
1311 barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
1312 }
1313
1314 vkCmdPipelineBarrier(buffer_->buffer,
1315 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
1316 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1317 /*dependencyFlags=*/0, /*memoryBarrierCount=*/0,
1318 /*pMemoryBarriers=*/nullptr,
1319 /*bufferMemoryBarrierCount=*/0,
1320 /*pBufferMemoryBarriers=*/nullptr,
1321 /*imageMemoryBarrierCount=*/memory_barriers.size(),
1322 /*pImageMemoryBarriers=*/memory_barriers.data());
1323 }
1324 current_dynamic_targets_.clear();
1325
1326 return;
1327 }
1328
1329 vkCmdEndRenderPass(buffer_->buffer);
1330
1331 current_renderpass_ = VK_NULL_HANDLE;
1332 current_framebuffer_ = VK_NULL_HANDLE;
1333}
1334
1335void VulkanCommandList::draw(uint32_t num_verticies, uint32_t start_vertex) {
1336 vkCmdDraw(buffer_->buffer, num_verticies, /*instanceCount=*/1, start_vertex,
1337 /*firstInstance=*/0);
1338}
1339
1340void VulkanCommandList::draw_instance(uint32_t num_verticies,
1341 uint32_t num_instances,
1342 uint32_t start_vertex,
1343 uint32_t start_instance) {
1344 vkCmdDraw(buffer_->buffer, num_verticies, num_instances, start_vertex,
1345 start_instance);
1346}
1347
1348void VulkanCommandList::draw_indexed(uint32_t num_indicies,
1349 uint32_t start_vertex,
1350 uint32_t start_index) {
1351 vkCmdDrawIndexed(buffer_->buffer, num_indicies, /*instanceCount=*/1,
1352 start_index, start_vertex,
1353 /*firstInstance=*/0);
1354}
1355
1356void VulkanCommandList::draw_indexed_instance(uint32_t num_indicies,
1357 uint32_t num_instances,
1358 uint32_t start_vertex,
1359 uint32_t start_index,
1360 uint32_t start_instance) {
1361 vkCmdDrawIndexed(buffer_->buffer, num_indicies, num_instances, start_index,
1362 start_vertex, start_instance);
1363}
1364
1365void VulkanCommandList::image_transition(DeviceAllocation img,
1366 ImageLayout old_layout_,
1367 ImageLayout new_layout_) {
1368 auto [image, view, format] = ti_device_->get_vk_image(img);
1369
1370 VkImageLayout old_layout = image_layout_ti_to_vk(old_layout_);
1371 VkImageLayout new_layout = image_layout_ti_to_vk(new_layout_);
1372
1373 VkImageMemoryBarrier barrier{};
1374 barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1375 barrier.oldLayout = old_layout;
1376 barrier.newLayout = new_layout;
1377 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1378 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1379 barrier.image = image->image;
1380 barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1381 barrier.subresourceRange.baseMipLevel = 0;
1382 barrier.subresourceRange.levelCount = 1;
1383 barrier.subresourceRange.baseArrayLayer = 0;
1384 barrier.subresourceRange.layerCount = 1;
1385
1386 VkPipelineStageFlags source_stage;
1387 VkPipelineStageFlags destination_stage;
1388
1389 static std::unordered_map<VkImageLayout, VkPipelineStageFlagBits> stages;
1390 stages[VK_IMAGE_LAYOUT_UNDEFINED] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1391 stages[VK_IMAGE_LAYOUT_GENERAL] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1392 stages[VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL] = VK_PIPELINE_STAGE_TRANSFER_BIT;
1393 stages[VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL] = VK_PIPELINE_STAGE_TRANSFER_BIT;
1394 stages[VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL] =
1395 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1396 stages[VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL] =
1397 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
1398 stages[VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL] =
1399 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
1400 stages[VK_IMAGE_LAYOUT_PRESENT_SRC_KHR] = VK_PIPELINE_STAGE_TRANSFER_BIT;
1401
1402 static std::unordered_map<VkImageLayout, VkAccessFlagBits> access;
1403 access[VK_IMAGE_LAYOUT_UNDEFINED] = (VkAccessFlagBits)0;
1404 access[VK_IMAGE_LAYOUT_GENERAL] =
1405 VkAccessFlagBits(VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT);
1406 access[VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL] = VK_ACCESS_TRANSFER_WRITE_BIT;
1407 access[VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL] = VK_ACCESS_TRANSFER_READ_BIT;
1408 access[VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL] = VK_ACCESS_MEMORY_READ_BIT;
1409 access[VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL] =
1410 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
1411 access[VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL] =
1412 VkAccessFlagBits(VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
1413 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
1414 access[VK_IMAGE_LAYOUT_PRESENT_SRC_KHR] = VK_ACCESS_MEMORY_READ_BIT;
1415
1416 if (stages.find(old_layout) == stages.end() ||
1417 stages.find(new_layout) == stages.end()) {
1418 throw std::invalid_argument("unsupported layout transition!");
1419 }
1420 source_stage = stages.at(old_layout);
1421 destination_stage = stages.at(new_layout);
1422
1423 if (access.find(old_layout) == access.end() ||
1424 access.find(new_layout) == access.end()) {
1425 throw std::invalid_argument("unsupported layout transition!");
1426 }
1427 barrier.srcAccessMask = access.at(old_layout);
1428 barrier.dstAccessMask = access.at(new_layout);
1429
1430 vkCmdPipelineBarrier(buffer_->buffer, source_stage, destination_stage, 0, 0,
1431 nullptr, 0, nullptr, 1, &barrier);
1432 buffer_->refs.push_back(image);
1433}
1434
1435inline void buffer_image_copy_ti_to_vk(VkBufferImageCopy &copy_info,
1436 size_t offset,
1437 const BufferImageCopyParams &params) {
1438 copy_info.bufferOffset = offset;
1439 copy_info.bufferRowLength = params.buffer_row_length;
1440 copy_info.bufferImageHeight = params.buffer_image_height;
1441 copy_info.imageExtent.width = params.image_extent.x;
1442 copy_info.imageExtent.height = params.image_extent.y;
1443 copy_info.imageExtent.depth = params.image_extent.z;
1444 copy_info.imageOffset.x = params.image_offset.x;
1445 copy_info.imageOffset.y = params.image_offset.y;
1446 copy_info.imageOffset.z = params.image_offset.z;
1447 copy_info.imageSubresource.aspectMask =
1448 params.image_aspect_flag; // FIXME: add option in BufferImageCopyParams
1449 // to support copying depth images
1450 // FIXED: added an option in
1451 // BufferImageCopyParams as image_aspect_flag
1452 // by yuhaoLong(mocki)
1453 copy_info.imageSubresource.baseArrayLayer = params.image_base_layer;
1454 copy_info.imageSubresource.layerCount = params.image_layer_count;
1455 copy_info.imageSubresource.mipLevel = params.image_mip_level;
1456}
1457
1458void VulkanCommandList::buffer_to_image(DeviceAllocation dst_img,
1459 DevicePtr src_buf,
1460 ImageLayout img_layout,
1461 const BufferImageCopyParams &params) {
1462 VkBufferImageCopy copy_info{};
1463 buffer_image_copy_ti_to_vk(copy_info, src_buf.offset, params);
1464
1465 auto [image, view, format] = ti_device_->get_vk_image(dst_img);
1466 auto buffer = ti_device_->get_vkbuffer(src_buf);
1467
1468 vkCmdCopyBufferToImage(buffer_->buffer, buffer->buffer, image->image,
1469 image_layout_ti_to_vk(img_layout), 1, &copy_info);
1470 buffer_->refs.push_back(image);
1471 buffer_->refs.push_back(buffer);
1472}
1473
1474void VulkanCommandList::image_to_buffer(DevicePtr dst_buf,
1475 DeviceAllocation src_img,
1476 ImageLayout img_layout,
1477 const BufferImageCopyParams &params) {
1478 VkBufferImageCopy copy_info{};
1479 buffer_image_copy_ti_to_vk(copy_info, dst_buf.offset, params);
1480
1481 auto [image, view, format] = ti_device_->get_vk_image(src_img);
1482 auto buffer = ti_device_->get_vkbuffer(dst_buf);
1483
1484 vkCmdCopyImageToBuffer(buffer_->buffer, image->image,
1485 image_layout_ti_to_vk(img_layout), buffer->buffer, 1,
1486 &copy_info);
1487 buffer_->refs.push_back(image);
1488 buffer_->refs.push_back(buffer);
1489}
1490
1491void VulkanCommandList::copy_image(DeviceAllocation dst_img,
1492 DeviceAllocation src_img,
1493 ImageLayout dst_img_layout,
1494 ImageLayout src_img_layout,
1495 const ImageCopyParams &params) {
1496 VkImageCopy copy{};
1497 copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1498 copy.srcSubresource.layerCount = 1;
1499 copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1500 copy.dstSubresource.layerCount = 1;
1501 copy.extent.width = params.width;
1502 copy.extent.height = params.height;
1503 copy.extent.depth = params.depth;
1504
1505 auto [dst_vk_image, dst_view, dst_format] = ti_device_->get_vk_image(dst_img);
1506 auto [src_vk_image, src_view, src_format] = ti_device_->get_vk_image(src_img);
1507
1508 vkCmdCopyImage(buffer_->buffer, src_vk_image->image,
1509 image_layout_ti_to_vk(src_img_layout), dst_vk_image->image,
1510 image_layout_ti_to_vk(dst_img_layout), 1, &copy);
1511
1512 buffer_->refs.push_back(dst_vk_image);
1513 buffer_->refs.push_back(src_vk_image);
1514}
1515
1516void VulkanCommandList::blit_image(DeviceAllocation dst_img,
1517 DeviceAllocation src_img,
1518 ImageLayout dst_img_layout,
1519 ImageLayout src_img_layout,
1520 const ImageCopyParams &params) {
1521 VkOffset3D blit_size{/*x*/ int(params.width),
1522 /*y*/ int(params.height),
1523 /*z*/ int(params.depth)};
1524 VkImageBlit blit{};
1525 blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1526 blit.srcSubresource.layerCount = 1;
1527 blit.srcOffsets[1] = blit_size;
1528 blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1529 blit.dstSubresource.layerCount = 1;
1530 blit.dstOffsets[1] = blit_size;
1531
1532 auto [dst_vk_image, dst_view, dst_format] = ti_device_->get_vk_image(dst_img);
1533 auto [src_vk_image, src_view, src_format] = ti_device_->get_vk_image(src_img);
1534
1535 vkCmdBlitImage(buffer_->buffer, src_vk_image->image,
1536 image_layout_ti_to_vk(src_img_layout), dst_vk_image->image,
1537 image_layout_ti_to_vk(dst_img_layout), 1, &blit,
1538 VK_FILTER_NEAREST);
1539
1540 buffer_->refs.push_back(dst_vk_image);
1541 buffer_->refs.push_back(src_vk_image);
1542}
1543
1544void VulkanCommandList::set_line_width(float width) {
1545 if (ti_device_->vk_caps().wide_line) {
1546 vkCmdSetLineWidth(buffer_->buffer, width);
1547 }
1548}
1549
1550vkapi::IVkRenderPass VulkanCommandList::current_renderpass() {
1551 if (ti_device_->vk_caps().dynamic_rendering) {
1552 vkapi::IVkRenderPass rp =
1553 ti_device_->get_renderpass(current_renderpass_desc_);
1554 buffer_->refs.push_back(rp);
1555 return rp;
1556 }
1557 return current_renderpass_;
1558}
1559
1560vkapi::IVkCommandBuffer VulkanCommandList::finalize() {
1561 if (!finalized_) {
1562 vkEndCommandBuffer(buffer_->buffer);
1563 finalized_ = true;
1564 }
1565 return buffer_;
1566}
1567
1568struct VulkanDevice::ThreadLocalStreams {
1569 unordered_map<std::thread::id, std::unique_ptr<VulkanStream>> map;
1570};
1571
1572VulkanDevice::VulkanDevice()
1573 : compute_streams_(std::make_unique<ThreadLocalStreams>()),
1574 graphics_streams_(std::make_unique<ThreadLocalStreams>()) {
1575 DeviceCapabilityConfig caps{};
1576 caps.set(DeviceCapability::spirv_version, 0x10000);
1577 set_caps(std::move(caps));
1578}
1579
1580void VulkanDevice::init_vulkan_structs(Params &params) {
1581 instance_ = params.instance;
1582 device_ = params.device;
1583 physical_device_ = params.physical_device;
1584 compute_queue_ = params.compute_queue;
1585 compute_queue_family_index_ = params.compute_queue_family_index;
1586 graphics_queue_ = params.graphics_queue;
1587 graphics_queue_family_index_ = params.graphics_queue_family_index;
1588
1589 create_vma_allocator();
1590 RHI_ASSERT(new_descriptor_pool() == RhiResult::success &&
1591 "Failed to allocate initial descriptor pool");
1592
1593 vkGetPhysicalDeviceProperties(physical_device_, &vk_device_properties_);
1594}
1595
1596VulkanDevice::~VulkanDevice() {
1597 // Note: Ideally whoever allocated the buffer & image should be responsible
1598 // for deallocation as well.
1599 // These manual deallocations work as last resort for the case where we
1600 // have GGUI window whose lifetime is controlled by Python but
1601 // shares the same underlying VulkanDevice with Program. In an extreme
1602 // edge case when Python shuts down and program gets destructed before
1603 // GGUI Window, buffers and images allocated through GGUI window won't
1604 // be properly deallocated before VulkanDevice destruction. This isn't
1605 // the most proper fix but is less intrusive compared to other
1606 // approaches.
1607 vkDeviceWaitIdle(device_);
1608
1609 allocations_.clear();
1610 image_allocations_.clear();
1611
1612 compute_streams_.reset();
1613 graphics_streams_.reset();
1614
1615 framebuffer_pools_.clear();
1616 renderpass_pools_.clear();
1617 desc_set_layouts_.clear();
1618 desc_pool_ = nullptr;
1619
1620 vmaDestroyAllocator(allocator_);
1621 vmaDestroyAllocator(allocator_export_);
1622}
1623
1624RhiResult VulkanDevice::create_pipeline_cache(
1625 PipelineCache **out_cache,
1626 size_t initial_size,
1627 const void *initial_data) noexcept {
1628 try {
1629 *out_cache = new VulkanPipelineCache(this, initial_size, initial_data);
1630 } catch (std::bad_alloc &) {
1631 *out_cache = nullptr;
1632 return RhiResult::out_of_memory;
1633 }
1634 return RhiResult::success;
1635}
1636
1637RhiResult VulkanDevice::create_pipeline(Pipeline **out_pipeline,
1638 const PipelineSourceDesc &src,
1639 std::string name,
1640 PipelineCache *cache) noexcept {
1641 if (src.type != PipelineSourceType::spirv_binary ||
1642 src.stage != PipelineStageType::compute) {
1643 return RhiResult::invalid_usage;
1644 }
1645
1646 if (src.data == nullptr || src.size == 0) {
1647 RHI_LOG_ERROR("pipeline source cannot be empty");
1648 return RhiResult::invalid_usage;
1649 }
1650
1651 SpirvCodeView code;
1652 code.data = (uint32_t *)src.data;
1653 code.size = src.size;
1654 code.stage = VK_SHADER_STAGE_COMPUTE_BIT;
1655
1656 VulkanPipeline::Params params;
1657 params.code = {code};
1658 params.device = this;
1659 params.name = name;
1660 params.cache =
1661 cache ? static_cast<VulkanPipelineCache *>(cache)->vk_pipeline_cache()
1662 : nullptr;
1663
1664 try {
1665 *out_pipeline = new VulkanPipeline(params);
1666 } catch (std::invalid_argument &e) {
1667 *out_pipeline = nullptr;
1668 RHI_LOG_ERROR(e.what());
1669 return RhiResult::invalid_usage;
1670 } catch (std::runtime_error &e) {
1671 *out_pipeline = nullptr;
1672 RHI_LOG_ERROR(e.what());
1673 return RhiResult::error;
1674 } catch (std::bad_alloc &e) {
1675 *out_pipeline = nullptr;
1676 RHI_LOG_ERROR(e.what());
1677 return RhiResult::out_of_memory;
1678 }
1679
1680 return RhiResult::success;
1681}
1682
1683DeviceAllocation VulkanDevice::allocate_memory(const AllocParams &params) {
1684 AllocationInternal &alloc = allocations_.acquire();
1685
1686 RHI_ASSERT(params.size > 0);
1687
1688 VkBufferCreateInfo buffer_info{};
1689 buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
1690 buffer_info.pNext = nullptr;
1691 buffer_info.size = params.size;
1692 // FIXME: How to express this in a backend-neutral way?
1693 buffer_info.usage =
1694 VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1695 if (params.usage && AllocUsage::Storage) {
1696 buffer_info.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
1697 }
1698 if (params.usage && AllocUsage::Uniform) {
1699 buffer_info.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
1700 }
1701 if (params.usage && AllocUsage::Vertex) {
1702 buffer_info.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
1703 }
1704 if (params.usage && AllocUsage::Index) {
1705 buffer_info.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
1706 }
1707
1708 uint32_t queue_family_indices[] = {compute_queue_family_index_,
1709 graphics_queue_family_index_};
1710
1711 if (compute_queue_family_index_ == graphics_queue_family_index_) {
1712 buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
1713 } else {
1714 buffer_info.sharingMode = VK_SHARING_MODE_CONCURRENT;
1715 buffer_info.queueFamilyIndexCount = 2;
1716 buffer_info.pQueueFamilyIndices = queue_family_indices;
1717 }
1718
1719 VkExternalMemoryBufferCreateInfo external_mem_buffer_create_info = {};
1720 external_mem_buffer_create_info.sType =
1721 VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO;
1722 external_mem_buffer_create_info.pNext = nullptr;
1723
1724#ifdef _WIN64
1725 external_mem_buffer_create_info.handleTypes =
1726 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
1727#else
1728 external_mem_buffer_create_info.handleTypes =
1729 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
1730#endif
1731
1732 bool export_sharing = params.export_sharing && vk_caps().external_memory;
1733
1734 VmaAllocationCreateInfo alloc_info{};
1735 if (export_sharing) {
1736 alloc_info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
1737 buffer_info.pNext = &external_mem_buffer_create_info;
1738 }
1739#ifdef __APPLE__
1740 // weird behavior on apple: these flags are needed even if either read or
1741 // write is required
1742 if (params.host_read || params.host_write) {
1743#else
1744 if (params.host_read && params.host_write) {
1745#endif //__APPLE__
1746 // This should be the unified memory on integrated GPUs
1747 alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1748 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1749 alloc_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1750#ifdef __APPLE__
1751 // weird behavior on apple: if coherent bit is not set, then the memory
1752 // writes between map() and unmap() cannot be seen by gpu
1753 alloc_info.preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1754#endif //__APPLE__
1755 } else if (params.host_read) {
1756 alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
1757 alloc_info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1758 } else if (params.host_write) {
1759 alloc_info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
1760 if (int(params.usage & AllocUsage::Upload)) {
1761 alloc_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
1762 } else {
1763 alloc_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1764 }
1765 } else {
1766 alloc_info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1767 }
1768
1769 if (get_caps().get(DeviceCapability::spirv_has_physical_storage_buffer) &&
1770 ((alloc_info.usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) ||
1771 (alloc_info.usage &
1772 VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR) ||
1773 (alloc_info.usage &
1774 VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR) ||
1775 (alloc_info.usage & VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR))) {
1776 buffer_info.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR;
1777 }
1778
1779 alloc.buffer = vkapi::create_buffer(
1780 device_, export_sharing ? allocator_export_ : allocator_, &buffer_info,
1781 &alloc_info);
1782 vmaGetAllocationInfo(alloc.buffer->allocator, alloc.buffer->allocation,
1783 &alloc.alloc_info);
1784
1785 if (get_caps().get(DeviceCapability::spirv_has_physical_storage_buffer)) {
1786 VkBufferDeviceAddressInfoKHR info{};
1787 info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR;
1788 info.buffer = alloc.buffer->buffer;
1789 info.pNext = nullptr;
1790 alloc.addr = vkGetBufferDeviceAddressKHR(device_, &info);
1791 }
1792
1793 return DeviceAllocation{this, (uint64_t)&alloc};
1794}
1795
1796RhiResult VulkanDevice::map_internal(AllocationInternal &alloc_int,
1797 size_t offset,
1798 size_t size,
1799 void **mapped_ptr) {
1800 if (alloc_int.mapped != nullptr) {
1801 RHI_LOG_ERROR("Memory can not be mapped multiple times");
1802 return RhiResult::invalid_usage;
1803 }
1804
1805 if (size != VK_WHOLE_SIZE && alloc_int.alloc_info.size < offset + size) {
1806 RHI_LOG_ERROR("Mapping out of range");
1807 return RhiResult::invalid_usage;
1808 }
1809
1810 VkResult res;
1811 if (alloc_int.buffer->allocator) {
1812 res = vmaMapMemory(alloc_int.buffer->allocator,
1813 alloc_int.buffer->allocation, &alloc_int.mapped);
1814 alloc_int.mapped = (uint8_t *)(alloc_int.mapped) + offset;
1815 } else {
1816 res = vkMapMemory(device_, alloc_int.alloc_info.deviceMemory,
1817 alloc_int.alloc_info.offset + offset, size, 0,
1818 &alloc_int.mapped);
1819 }
1820
1821 if (alloc_int.mapped == nullptr || res == VK_ERROR_MEMORY_MAP_FAILED) {
1822 RHI_LOG_ERROR(
1823 "cannot map memory, potentially because the memory is not "
1824 "accessible from the host: ensure your memory is allocated with "
1825 "`host_read=true` or `host_write=true` (or `host_access=true` in C++ "
1826 "wrapper)");
1827 return RhiResult::invalid_usage;
1828 } else if (res != VK_SUCCESS) {
1829 char msg_buf[256];
1830 snprintf(msg_buf, sizeof(msg_buf),
1831 "failed to map memory for unknown reasons. VkResult = %d", res);
1832 RHI_LOG_ERROR(msg_buf);
1833 return RhiResult::error;
1834 }
1835
1836 *mapped_ptr = alloc_int.mapped;
1837
1838 return RhiResult::success;
1839}
1840
1841void VulkanDevice::dealloc_memory(DeviceAllocation handle) {
1842 allocations_.release(&get_alloc_internal(handle));
1843}
1844
1845ShaderResourceSet *VulkanDevice::create_resource_set() {
1846 return new VulkanResourceSet(this);
1847}
1848
1849RasterResources *VulkanDevice::create_raster_resources() {
1850 return new VulkanRasterResources(this);
1851}
1852
1853uint64_t VulkanDevice::get_memory_physical_pointer(DeviceAllocation handle) {
1854 return uint64_t(get_alloc_internal(handle).addr);
1855}
1856
1857RhiResult VulkanDevice::map_range(DevicePtr ptr,
1858 uint64_t size,
1859 void **mapped_ptr) {
1860 return map_internal(get_alloc_internal(ptr), ptr.offset, size, mapped_ptr);
1861}
1862
1863RhiResult VulkanDevice::map(DeviceAllocation alloc, void **mapped_ptr) {
1864 return map_internal(get_alloc_internal(alloc), 0, VK_WHOLE_SIZE, mapped_ptr);
1865}
1866
1867void VulkanDevice::unmap(DevicePtr ptr) {
1868 return this->VulkanDevice::unmap(DeviceAllocation(ptr));
1869}
1870
1871void VulkanDevice::unmap(DeviceAllocation alloc) {
1872 AllocationInternal &alloc_int = get_alloc_internal(alloc);
1873
1874 if (alloc_int.mapped == nullptr) {
1875 RHI_LOG_ERROR("Unmapping memory that is not mapped");
1876 return;
1877 }
1878
1879 if (alloc_int.buffer->allocator) {
1880 vmaUnmapMemory(alloc_int.buffer->allocator, alloc_int.buffer->allocation);
1881 } else {
1882 vkUnmapMemory(device_, alloc_int.alloc_info.deviceMemory);
1883 }
1884
1885 alloc_int.mapped = nullptr;
1886}
1887
1888void VulkanDevice::memcpy_internal(DevicePtr dst,
1889 DevicePtr src,
1890 uint64_t size) {
1891 // TODO: always create a queue specifically for transfer
1892 Stream *stream = get_compute_stream();
1893 auto [cmd, res] = stream->new_command_list_unique();
1894 TI_ASSERT(res == RhiResult::success);
1895 cmd->buffer_copy(dst, src, size);
1896 stream->submit_synced(cmd.get());
1897}
1898
1899Stream *VulkanDevice::get_compute_stream() {
1900 auto tid = std::this_thread::get_id();
1901 auto &stream_map = compute_streams_->map;
1902 auto iter = stream_map.find(tid);
1903 if (iter == stream_map.end()) {
1904 stream_map[tid] = std::make_unique<VulkanStream>(
1905 *this, compute_queue_, compute_queue_family_index_);
1906 return stream_map.at(tid).get();
1907 }
1908 return iter->second.get();
1909}
1910
1911void VulkanCommandList::begin_profiler_scope(const std::string &kernel_name) {
1912 auto pool = vkapi::create_query_pool(ti_device_->vk_device());
1913 vkCmdResetQueryPool(buffer_->buffer, pool->query_pool, 0, 2);
1914 vkCmdWriteTimestamp(buffer_->buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1915 pool->query_pool, 0);
1916 ti_device_->profiler_add_sampler(kernel_name, pool);
1917}
1918
1919void VulkanCommandList::end_profiler_scope() {
1920 auto pool = ti_device_->profiler_get_last_query_pool();
1921 vkCmdWriteTimestamp(buffer_->buffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
1922 pool->query_pool, 1);
1923}
1924
1925void VulkanDevice::profiler_sync() {
1926 for (auto &sampler : samplers_) {
1927 auto kernel_name = sampler.first;
1928 auto query_pool = sampler.second->query_pool;
1929
1930 double duration_ms = 0.0;
1931
1932 uint64_t t[2];
1933 vkGetQueryPoolResults(vk_device(), query_pool, 0, 2, sizeof(uint64_t) * 2,
1934 &t, sizeof(uint64_t),
1935 VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
1936 duration_ms = (t[1] - t[0]) * vk_device_properties_.limits.timestampPeriod /
1937 1000000.0;
1938 sampled_records_.push_back(std::make_pair(kernel_name, duration_ms));
1939 }
1940 samplers_.clear();
1941}
1942
1943std::vector<std::pair<std::string, double>>
1944VulkanDevice::profiler_flush_sampled_time() {
1945 auto records_ = sampled_records_;
1946 sampled_records_.clear();
1947 return records_;
1948}
1949
1950Stream *VulkanDevice::get_graphics_stream() {
1951 auto tid = std::this_thread::get_id();
1952 auto &stream_map = graphics_streams_->map;
1953 auto iter = stream_map.find(tid);
1954 if (iter == stream_map.end()) {
1955 stream_map[tid] = std::make_unique<VulkanStream>(
1956 *this, graphics_queue_, graphics_queue_family_index_);
1957 return stream_map.at(tid).get();
1958 }
1959 return iter->second.get();
1960}
1961
1962void VulkanDevice::wait_idle() {
1963 for (auto &[tid, stream] : compute_streams_->map) {
1964 stream->command_sync();
1965 }
1966 for (auto &[tid, stream] : graphics_streams_->map) {
1967 stream->command_sync();
1968 }
1969}
1970
1971RhiResult VulkanStream::new_command_list(CommandList **out_cmdlist) noexcept {
1972 vkapi::IVkCommandBuffer buffer =
1973 vkapi::allocate_command_buffer(command_pool_);
1974
1975 if (buffer == nullptr) {
1976 return RhiResult::out_of_memory;
1977 }
1978
1979 *out_cmdlist = new VulkanCommandList(&device_, this, buffer);
1980 return RhiResult::success;
1981}
1982
1983StreamSemaphore VulkanStream::submit(
1984 CommandList *cmdlist_,
1985 const std::vector<StreamSemaphore> &wait_semaphores) {
1986 VulkanCommandList *cmdlist = static_cast<VulkanCommandList *>(cmdlist_);
1987 vkapi::IVkCommandBuffer buffer = cmdlist->finalize();
1988
1989 /*
1990 if (in_flight_cmdlists_.find(buffer) != in_flight_cmdlists_.end()) {
1991 TI_ERROR("Can not submit command list that is still in-flight");
1992 return;
1993 }
1994 */
1995
1996 VkSubmitInfo submit_info{};
1997 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
1998 submit_info.commandBufferCount = 1;
1999 submit_info.pCommandBuffers = &buffer->buffer;
2000
2001 std::vector<VkSemaphore> vk_wait_semaphores;
2002 std::vector<VkPipelineStageFlags> vk_wait_stages;
2003
2004 for (const StreamSemaphore &sema_ : wait_semaphores) {
2005 auto sema = std::static_pointer_cast<VulkanStreamSemaphoreObject>(sema_);
2006 vk_wait_semaphores.push_back(sema->vkapi_ref->semaphore);
2007 vk_wait_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
2008 buffer->refs.push_back(sema->vkapi_ref);
2009 }
2010
2011 submit_info.pWaitSemaphores = vk_wait_semaphores.data();
2012 submit_info.waitSemaphoreCount = vk_wait_semaphores.size();
2013 submit_info.pWaitDstStageMask = vk_wait_stages.data();
2014
2015 auto semaphore = vkapi::create_semaphore(buffer->device, 0);
2016 buffer->refs.push_back(semaphore);
2017
2018 submit_info.signalSemaphoreCount = 1;
2019 submit_info.pSignalSemaphores = &semaphore->semaphore;
2020
2021 auto fence = vkapi::create_fence(buffer->device, 0);
2022
2023 // Resource tracking, check previously submitted commands
2024 // FIXME: Figure out why it doesn't work
2025 /*
2026 std::remove_if(submitted_cmdbuffers_.begin(), submitted_cmdbuffers_.end(),
2027 [&](const TrackedCmdbuf &tracked) {
2028 // If fence is signaled, cmdbuf has completed
2029 VkResult res =
2030 vkGetFenceStatus(buffer->device, tracked.fence->fence);
2031 return res == VK_SUCCESS;
2032 });
2033 */
2034
2035 submitted_cmdbuffers_.push_back(TrackedCmdbuf{fence, buffer});
2036
2037 BAIL_ON_VK_BAD_RESULT_NO_RETURN(
2038 vkQueueSubmit(queue_, /*submitCount=*/1, &submit_info,
2039 /*fence=*/fence->fence),
2040 "failed to submit command buffer");
2041
2042 return std::make_shared<VulkanStreamSemaphoreObject>(semaphore);
2043}
2044
2045StreamSemaphore VulkanStream::submit_synced(
2046 CommandList *cmdlist,
2047 const std::vector<StreamSemaphore> &wait_semaphores) {
2048 auto sema = submit(cmdlist, wait_semaphores);
2049 command_sync();
2050 return sema;
2051}
2052
2053void VulkanStream::command_sync() {
2054 vkQueueWaitIdle(queue_);
2055
2056 VkPhysicalDeviceProperties props{};
2057 vkGetPhysicalDeviceProperties(device_.vk_physical_device(), &props);
2058
2059 device_.profiler_sync();
2060
2061 submitted_cmdbuffers_.clear();
2062}
2063
2064std::unique_ptr<Pipeline> VulkanDevice::create_raster_pipeline(
2065 const std::vector<PipelineSourceDesc> &src,
2066 const RasterParams &raster_params,
2067 const std::vector<VertexInputBinding> &vertex_inputs,
2068 const std::vector<VertexInputAttribute> &vertex_attrs,
2069 std::string name) {
2070 VulkanPipeline::Params params;
2071 params.code = {};
2072 params.device = this;
2073 params.name = name;
2074
2075 for (auto &src_desc : src) {
2076 SpirvCodeView &code = params.code.emplace_back();
2077 code.data = (uint32_t *)src_desc.data;
2078 code.size = src_desc.size;
2079 code.stage = VK_SHADER_STAGE_COMPUTE_BIT;
2080 if (src_desc.stage == PipelineStageType::fragment) {
2081 code.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
2082 } else if (src_desc.stage == PipelineStageType::vertex) {
2083 code.stage = VK_SHADER_STAGE_VERTEX_BIT;
2084 } else if (src_desc.stage == PipelineStageType::geometry) {
2085 code.stage = VK_SHADER_STAGE_GEOMETRY_BIT;
2086 } else if (src_desc.stage == PipelineStageType::tesselation_control) {
2087 code.stage = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2088 } else if (src_desc.stage == PipelineStageType::tesselation_eval) {
2089 code.stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2090 }
2091 }
2092
2093 return std::make_unique<VulkanPipeline>(params, raster_params, vertex_inputs,
2094 vertex_attrs);
2095}
2096
2097std::unique_ptr<Surface> VulkanDevice::create_surface(
2098 const SurfaceConfig &config) {
2099 return std::make_unique<VulkanSurface>(this, config);
2100}
2101
2102std::tuple<VkDeviceMemory, size_t, size_t>
2103VulkanDevice::get_vkmemory_offset_size(const DeviceAllocation &alloc) const {
2104 auto &buffer_alloc = get_alloc_internal(alloc);
2105 return std::make_tuple(buffer_alloc.alloc_info.deviceMemory,
2106 buffer_alloc.alloc_info.offset,
2107 buffer_alloc.alloc_info.size);
2108}
2109
2110vkapi::IVkBuffer VulkanDevice::get_vkbuffer(
2111 const DeviceAllocation &alloc) const {
2112 const AllocationInternal &alloc_int = get_alloc_internal(alloc);
2113
2114 return alloc_int.buffer;
2115}
2116
2117size_t VulkanDevice::get_vkbuffer_size(const DeviceAllocation &alloc) const {
2118 const AllocationInternal &alloc_int = get_alloc_internal(alloc);
2119
2120 return alloc_int.alloc_info.size;
2121}
2122
2123std::tuple<vkapi::IVkImage, vkapi::IVkImageView, VkFormat>
2124VulkanDevice::get_vk_image(const DeviceAllocation &alloc) const {
2125 const ImageAllocInternal &alloc_int = get_image_alloc_internal(alloc);
2126
2127 return std::make_tuple(alloc_int.image, alloc_int.view,
2128 alloc_int.image->format);
2129}
2130
2131vkapi::IVkFramebuffer VulkanDevice::get_framebuffer(
2132 const VulkanFramebufferDesc &desc) {
2133 if (framebuffer_pools_.find(desc) != framebuffer_pools_.end()) {
2134 return framebuffer_pools_.at(desc);
2135 }
2136
2137 vkapi::IVkFramebuffer framebuffer = vkapi::create_framebuffer(
2138 0, desc.renderpass, desc.attachments, desc.width, desc.height, 1);
2139
2140 framebuffer_pools_.insert({desc, framebuffer});
2141
2142 return framebuffer;
2143}
2144
2145DeviceAllocation VulkanDevice::import_vkbuffer(vkapi::IVkBuffer buffer,
2146 size_t size,
2147 VkDeviceMemory memory,
2148 VkDeviceSize offset) {
2149 AllocationInternal &alloc_int = allocations_.acquire();
2150
2151 alloc_int.external = true;
2152 alloc_int.buffer = buffer;
2153 alloc_int.mapped = nullptr;
2154 if (get_caps().get(DeviceCapability::spirv_has_physical_storage_buffer)) {
2155 VkBufferDeviceAddressInfoKHR info{};
2156 info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
2157 info.buffer = buffer->buffer;
2158 info.pNext = nullptr;
2159 alloc_int.addr = vkGetBufferDeviceAddress(device_, &info);
2160 }
2161
2162 alloc_int.alloc_info.size = size;
2163 alloc_int.alloc_info.deviceMemory = memory;
2164 alloc_int.alloc_info.offset = offset;
2165
2166 return DeviceAllocation{this, reinterpret_cast<uint64_t>(&alloc_int)};
2167}
2168
2169DeviceAllocation VulkanDevice::import_vk_image(vkapi::IVkImage image,
2170 vkapi::IVkImageView view,
2171 VkImageLayout layout) {
2172 ImageAllocInternal &alloc_int = image_allocations_.acquire();
2173
2174 alloc_int.external = true;
2175 alloc_int.image = image;
2176 alloc_int.view = view;
2177 alloc_int.view_lods.emplace_back(view);
2178
2179 return DeviceAllocation{this, reinterpret_cast<uint64_t>(&alloc_int)};
2180}
2181
2182vkapi::IVkImageView VulkanDevice::get_vk_imageview(
2183 const DeviceAllocation &alloc) const {
2184 return std::get<1>(get_vk_image(alloc));
2185}
2186
2187vkapi::IVkImageView VulkanDevice::get_vk_lod_imageview(
2188 const DeviceAllocation &alloc,
2189 int lod) const {
2190 return get_image_alloc_internal(alloc).view_lods[lod];
2191}
2192
2193DeviceAllocation VulkanDevice::create_image(const ImageParams &params) {
2194 ImageAllocInternal &alloc = image_allocations_.acquire();
2195
2196 int num_mip_levels = 1;
2197
2198 bool is_depth = params.format == BufferFormat::depth16 ||
2199 params.format == BufferFormat::depth24stencil8 ||
2200 params.format == BufferFormat::depth32f;
2201
2202 VkImageCreateInfo image_info{};
2203 image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
2204 image_info.pNext = nullptr;
2205 if (params.dimension == ImageDimension::d1D) {
2206 image_info.imageType = VK_IMAGE_TYPE_1D;
2207 } else if (params.dimension == ImageDimension::d2D) {
2208 image_info.imageType = VK_IMAGE_TYPE_2D;
2209 } else if (params.dimension == ImageDimension::d3D) {
2210 image_info.imageType = VK_IMAGE_TYPE_3D;
2211 }
2212 image_info.extent.width = params.x;
2213 image_info.extent.height = params.y;
2214 image_info.extent.depth = params.z;
2215 image_info.mipLevels = num_mip_levels;
2216 image_info.arrayLayers = 1;
2217 auto [result, vk_format] = buffer_format_ti_to_vk(params.format);
2218 assert(result == RhiResult::success);
2219 image_info.format = vk_format;
2220 image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
2221 image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
2222 image_info.usage =
2223 VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2224 if (params.usage & ImageAllocUsage::Sampled) {
2225 image_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
2226 }
2227
2228 if (is_depth) {
2229 if (params.usage & ImageAllocUsage::Storage) {
2230 image_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
2231 }
2232 if (params.usage & ImageAllocUsage::Attachment) {
2233 image_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
2234 }
2235 } else {
2236 if (params.usage & ImageAllocUsage::Storage) {
2237 image_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
2238 }
2239 if (params.usage & ImageAllocUsage::Attachment) {
2240 image_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
2241 }
2242 }
2243 image_info.samples = VK_SAMPLE_COUNT_1_BIT;
2244
2245 uint32_t queue_family_indices[] = {compute_queue_family_index_,
2246 graphics_queue_family_index_};
2247
2248 if (compute_queue_family_index_ == graphics_queue_family_index_) {
2249 image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
2250 } else {
2251 image_info.sharingMode = VK_SHARING_MODE_CONCURRENT;
2252 image_info.queueFamilyIndexCount = 2;
2253 image_info.pQueueFamilyIndices = queue_family_indices;
2254 }
2255
2256 bool export_sharing = params.export_sharing && vk_caps_.external_memory;
2257
2258 VkExternalMemoryImageCreateInfo external_mem_image_create_info = {};
2259 if (export_sharing) {
2260 external_mem_image_create_info.sType =
2261 VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
2262 external_mem_image_create_info.pNext = nullptr;
2263
2264#ifdef _WIN64
2265 external_mem_image_create_info.handleTypes =
2266 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
2267#else
2268 external_mem_image_create_info.handleTypes =
2269 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
2270#endif
2271 image_info.pNext = &external_mem_image_create_info;
2272 }
2273
2274 VmaAllocationCreateInfo alloc_info{};
2275 if (params.export_sharing) {
2276 alloc_info.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2277 }
2278 alloc_info.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2279
2280 alloc.image = vkapi::create_image(
2281 device_, export_sharing ? allocator_export_ : allocator_, &image_info,
2282 &alloc_info);
2283 vmaGetAllocationInfo(alloc.image->allocator, alloc.image->allocation,
2284 &alloc.alloc_info);
2285
2286 VkImageViewCreateInfo view_info{};
2287 view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
2288 view_info.pNext = nullptr;
2289 if (params.dimension == ImageDimension::d1D) {
2290 view_info.viewType = VK_IMAGE_VIEW_TYPE_1D;
2291 } else if (params.dimension == ImageDimension::d2D) {
2292 view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
2293 } else if (params.dimension == ImageDimension::d3D) {
2294 view_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
2295 }
2296 view_info.format = image_info.format;
2297 view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
2298 view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
2299 view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
2300 view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
2301 view_info.subresourceRange.aspectMask =
2302 is_depth ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
2303 view_info.subresourceRange.baseMipLevel = 0;
2304 view_info.subresourceRange.levelCount = num_mip_levels;
2305 view_info.subresourceRange.baseArrayLayer = 0;
2306 view_info.subresourceRange.layerCount = 1;
2307
2308 alloc.view = vkapi::create_image_view(device_, alloc.image, &view_info);
2309
2310 for (int i = 0; i < num_mip_levels; i++) {
2311 view_info.subresourceRange.baseMipLevel = i;
2312 view_info.subresourceRange.levelCount = 1;
2313 alloc.view_lods.push_back(
2314 vkapi::create_image_view(device_, alloc.image, &view_info));
2315 }
2316
2317 DeviceAllocation handle{this, reinterpret_cast<uint64_t>(&alloc)};
2318
2319 if (params.initial_layout != ImageLayout::undefined) {
2320 image_transition(handle, ImageLayout::undefined, params.initial_layout);
2321 }
2322
2323 return handle;
2324}
2325
2326void VulkanDevice::destroy_image(DeviceAllocation handle) {
2327 image_allocations_.release(&get_image_alloc_internal(handle));
2328}
2329
2330vkapi::IVkRenderPass VulkanDevice::get_renderpass(
2331 const VulkanRenderPassDesc &desc) {
2332 if (renderpass_pools_.find(desc) != renderpass_pools_.end()) {
2333 return renderpass_pools_.at(desc);
2334 }
2335
2336 std::vector<VkAttachmentDescription> attachments;
2337 std::vector<VkAttachmentReference> color_attachments;
2338
2339 VkAttachmentReference depth_attachment{};
2340
2341 uint32_t i = 0;
2342 for (auto &[format, clear] : desc.color_attachments) {
2343 VkAttachmentDescription &description = attachments.emplace_back();
2344 description.flags = 0;
2345 description.format = format;
2346 description.samples = VK_SAMPLE_COUNT_1_BIT;
2347 description.loadOp =
2348 clear ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
2349 description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2350 description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2351 description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
2352 description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
2353
2354 description.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
2355
2356 VkAttachmentReference &ref = color_attachments.emplace_back();
2357 ref.attachment = i;
2358 ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
2359 i += 1;
2360 }
2361
2362 if (desc.depth_attachment != VK_FORMAT_UNDEFINED) {
2363 VkAttachmentDescription &description = attachments.emplace_back();
2364 description.flags = 0;
2365 description.format = desc.depth_attachment;
2366 description.samples = VK_SAMPLE_COUNT_1_BIT;
2367 description.loadOp = desc.clear_depth ? VK_ATTACHMENT_LOAD_OP_CLEAR
2368 : VK_ATTACHMENT_LOAD_OP_LOAD;
2369 description.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2370 description.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2371 description.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
2372 description.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
2373
2374 description.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
2375
2376 depth_attachment.attachment = i;
2377 depth_attachment.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
2378 }
2379
2380 VkSubpassDescription subpass{};
2381 subpass.flags = 0;
2382 subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
2383 subpass.inputAttachmentCount = 0;
2384 subpass.pInputAttachments = nullptr;
2385 subpass.colorAttachmentCount = color_attachments.size();
2386 subpass.pColorAttachments = color_attachments.data();
2387 subpass.pResolveAttachments = nullptr;
2388 subpass.pDepthStencilAttachment = desc.depth_attachment == VK_FORMAT_UNDEFINED
2389 ? nullptr
2390 : &depth_attachment;
2391 subpass.preserveAttachmentCount = 0;
2392 subpass.pPreserveAttachments = nullptr;
2393
2394 VkRenderPassCreateInfo renderpass_info{};
2395 renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
2396 renderpass_info.pNext = nullptr;
2397 renderpass_info.flags = 0;
2398 renderpass_info.attachmentCount = attachments.size();
2399 renderpass_info.pAttachments = attachments.data();
2400 renderpass_info.subpassCount = 1;
2401 renderpass_info.pSubpasses = &subpass;
2402 renderpass_info.dependencyCount = 0;
2403 renderpass_info.pDependencies = nullptr;
2404
2405 vkapi::IVkRenderPass renderpass =
2406 vkapi::create_render_pass(device_, &renderpass_info);
2407
2408 renderpass_pools_.insert({desc, renderpass});
2409
2410 return renderpass;
2411}
2412
2413vkapi::IVkDescriptorSetLayout VulkanDevice::get_desc_set_layout(
2414 VulkanResourceSet &set) {
2415 if (desc_set_layouts_.find(set) == desc_set_layouts_.end()) {
2416 std::vector<VkDescriptorSetLayoutBinding> bindings;
2417 for (const auto &pair : set.get_bindings()) {
2418 bindings.push_back(VkDescriptorSetLayoutBinding{
2419 /*binding=*/pair.first, pair.second.type, /*descriptorCount=*/1,
2420 VK_SHADER_STAGE_ALL,
2421 /*pImmutableSamplers=*/nullptr});
2422 }
2423
2424 VkDescriptorSetLayoutCreateInfo create_info{};
2425 create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
2426 create_info.pNext = nullptr;
2427 create_info.flags = 0;
2428 create_info.bindingCount = bindings.size();
2429 create_info.pBindings = bindings.data();
2430
2431 auto layout = vkapi::create_descriptor_set_layout(device_, &create_info);
2432 desc_set_layouts_[set] = layout;
2433
2434 return layout;
2435 } else {
2436 return desc_set_layouts_.at(set);
2437 }
2438}
2439
2440RhiReturn<vkapi::IVkDescriptorSet> VulkanDevice::alloc_desc_set(
2441 vkapi::IVkDescriptorSetLayout layout) {
2442 // This returns nullptr if can't allocate (OOM or pool is full)
2443 vkapi::IVkDescriptorSet set =
2444 vkapi::allocate_descriptor_sets(desc_pool_, layout);
2445
2446 if (set == nullptr) {
2447 RhiResult status = new_descriptor_pool();
2448 // Allocating new descriptor pool failed
2449 if (status != RhiResult::success) {
2450 return {status, nullptr};
2451 }
2452 set = vkapi::allocate_descriptor_sets(desc_pool_, layout);
2453 }
2454
2455 return {RhiResult::success, set};
2456}
2457
2458void VulkanDevice::create_vma_allocator() {
2459 VmaAllocatorCreateInfo allocatorInfo = {};
2460 allocatorInfo.vulkanApiVersion = vk_caps().vk_api_version;
2461 allocatorInfo.physicalDevice = physical_device_;
2462 allocatorInfo.device = device_;
2463 allocatorInfo.instance = instance_;
2464
2465 VolkDeviceTable table;
2466 VmaVulkanFunctions vk_vma_functions{nullptr};
2467
2468 volkLoadDeviceTable(&table, device_);
2469 vk_vma_functions.vkGetPhysicalDeviceProperties =
2470 PFN_vkGetPhysicalDeviceProperties(vkGetInstanceProcAddr(
2471 volkGetLoadedInstance(), "vkGetPhysicalDeviceProperties"));
2472 vk_vma_functions.vkGetPhysicalDeviceMemoryProperties =
2473 PFN_vkGetPhysicalDeviceMemoryProperties(vkGetInstanceProcAddr(
2474 volkGetLoadedInstance(), "vkGetPhysicalDeviceMemoryProperties"));
2475 vk_vma_functions.vkAllocateMemory = table.vkAllocateMemory;
2476 vk_vma_functions.vkFreeMemory = table.vkFreeMemory;
2477 vk_vma_functions.vkMapMemory = table.vkMapMemory;
2478 vk_vma_functions.vkUnmapMemory = table.vkUnmapMemory;
2479 vk_vma_functions.vkFlushMappedMemoryRanges = table.vkFlushMappedMemoryRanges;
2480 vk_vma_functions.vkInvalidateMappedMemoryRanges =
2481 table.vkInvalidateMappedMemoryRanges;
2482 vk_vma_functions.vkBindBufferMemory = table.vkBindBufferMemory;
2483 vk_vma_functions.vkBindImageMemory = table.vkBindImageMemory;
2484 vk_vma_functions.vkGetBufferMemoryRequirements =
2485 table.vkGetBufferMemoryRequirements;
2486 vk_vma_functions.vkGetImageMemoryRequirements =
2487 table.vkGetImageMemoryRequirements;
2488 vk_vma_functions.vkCreateBuffer = table.vkCreateBuffer;
2489 vk_vma_functions.vkDestroyBuffer = table.vkDestroyBuffer;
2490 vk_vma_functions.vkCreateImage = table.vkCreateImage;
2491 vk_vma_functions.vkDestroyImage = table.vkDestroyImage;
2492 vk_vma_functions.vkCmdCopyBuffer = table.vkCmdCopyBuffer;
2493 vk_vma_functions.vkGetBufferMemoryRequirements2KHR =
2494 table.vkGetBufferMemoryRequirements2KHR;
2495 vk_vma_functions.vkGetImageMemoryRequirements2KHR =
2496 table.vkGetImageMemoryRequirements2KHR;
2497 vk_vma_functions.vkBindBufferMemory2KHR = table.vkBindBufferMemory2KHR;
2498 vk_vma_functions.vkBindImageMemory2KHR = table.vkBindImageMemory2KHR;
2499 vk_vma_functions.vkGetPhysicalDeviceMemoryProperties2KHR =
2500 (PFN_vkGetPhysicalDeviceMemoryProperties2KHR)(std::max(
2501 vkGetInstanceProcAddr(volkGetLoadedInstance(),
2502 "vkGetPhysicalDeviceMemoryProperties2KHR"),
2503 vkGetInstanceProcAddr(volkGetLoadedInstance(),
2504 "vkGetPhysicalDeviceMemoryProperties2")));
2505 vk_vma_functions.vkGetDeviceBufferMemoryRequirements =
2506 table.vkGetDeviceBufferMemoryRequirements;
2507 vk_vma_functions.vkGetDeviceImageMemoryRequirements =
2508 table.vkGetDeviceImageMemoryRequirements;
2509
2510 allocatorInfo.pVulkanFunctions = &vk_vma_functions;
2511
2512 if (get_caps().get(DeviceCapability::spirv_has_physical_storage_buffer)) {
2513 allocatorInfo.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
2514 }
2515
2516 vmaCreateAllocator(&allocatorInfo, &allocator_);
2517
2518 VkPhysicalDeviceMemoryProperties properties;
2519 vkGetPhysicalDeviceMemoryProperties(physical_device_, &properties);
2520
2521 std::vector<VkExternalMemoryHandleTypeFlags> flags(
2522 properties.memoryTypeCount);
2523
2524 for (int i = 0; i < properties.memoryTypeCount; i++) {
2525 auto flag = properties.memoryTypes[i].propertyFlags;
2526 if (flag & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
2527#ifdef _WIN64
2528 flags[i] = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
2529#else
2530 flags[i] = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
2531#endif
2532 } else {
2533 flags[i] = 0;
2534 }
2535 }
2536
2537 allocatorInfo.pTypeExternalMemoryHandleTypes = flags.data();
2538
2539 vmaCreateAllocator(&allocatorInfo, &allocator_export_);
2540}
2541
2542RhiResult VulkanDevice::new_descriptor_pool() {
2543 std::vector<VkDescriptorPoolSize> pool_sizes{
2544 {VK_DESCRIPTOR_TYPE_SAMPLER, 64},
2545 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 256},
2546 {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 256},
2547 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 256},
2548 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, 256},
2549 {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, 256},
2550 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 256},
2551 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 512},
2552 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 128},
2553 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 128},
2554 {VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 128}};
2555 VkDescriptorPoolCreateInfo pool_info = {};
2556 pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
2557 pool_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
2558 pool_info.maxSets = 64;
2559 pool_info.poolSizeCount = pool_sizes.size();
2560 pool_info.pPoolSizes = pool_sizes.data();
2561 auto new_desc_pool = vkapi::create_descriptor_pool(device_, &pool_info);
2562
2563 if (!new_desc_pool) {
2564 return RhiResult::out_of_memory;
2565 }
2566
2567 desc_pool_ = new_desc_pool;
2568
2569 return RhiResult::success;
2570}
2571
2572VkPresentModeKHR choose_swap_present_mode(
2573 const std::vector<VkPresentModeKHR> &available_present_modes,
2574 bool vsync,
2575 bool adaptive) {
2576 if (vsync) {
2577 if (adaptive) {
2578 for (const auto &available_present_mode : available_present_modes) {
2579 if (available_present_mode == VK_PRESENT_MODE_FIFO_RELAXED_KHR) {
2580 return available_present_mode;
2581 }
2582 }
2583 }
2584 for (const auto &available_present_mode : available_present_modes) {
2585 if (available_present_mode == VK_PRESENT_MODE_FIFO_KHR) {
2586 return available_present_mode;
2587 }
2588 }
2589 } else {
2590 for (const auto &available_present_mode : available_present_modes) {
2591 if (available_present_mode == VK_PRESENT_MODE_MAILBOX_KHR) {
2592 return available_present_mode;
2593 }
2594 }
2595 for (const auto &available_present_mode : available_present_modes) {
2596 if (available_present_mode == VK_PRESENT_MODE_IMMEDIATE_KHR) {
2597 return available_present_mode;
2598 }
2599 }
2600 }
2601
2602 if (available_present_modes.size() == 0) {
2603 throw std::runtime_error("no avialble present modes");
2604 }
2605
2606 return available_present_modes[0];
2607}
2608
2609VulkanSurface::VulkanSurface(VulkanDevice *device, const SurfaceConfig &config)
2610 : config_(config), device_(device) {
2611#ifdef ANDROID
2612 window_ = (ANativeWindow *)config.window_handle;
2613#else
2614 window_ = (GLFWwindow *)config.window_handle;
2615#endif
2616 if (window_) {
2617 if (config.native_surface_handle) {
2618 surface_ = (VkSurfaceKHR)config.native_surface_handle;
2619 } else {
2620#ifdef ANDROID
2621 VkAndroidSurfaceCreateInfoKHR createInfo{
2622 .sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR,
2623 .pNext = nullptr,
2624 .flags = 0,
2625 .window = window_};
2626
2627 vkCreateAndroidSurfaceKHR(device->vk_instance(), &createInfo, nullptr,
2628 &surface_);
2629#else
2630 glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
2631 BAIL_ON_VK_BAD_RESULT_NO_RETURN(
2632 glfwCreateWindowSurface(device->vk_instance(), window_, nullptr,
2633 &surface_),
2634 "Failed to create window surface ({})");
2635#endif
2636 }
2637
2638 create_swap_chain();
2639
2640 image_available_ = vkapi::create_semaphore(device->vk_device(), 0);
2641 } else {
2642 ImageParams params = {ImageDimension::d2D,
2643 BufferFormat::rgba8,
2644 ImageLayout::present_src,
2645 config.width,
2646 config.height,
2647 1,
2648 false};
2649 // screenshot_image_ = device->create_image(params);
2650 swapchain_images_.push_back(device->create_image(params));
2651 swapchain_images_.push_back(device->create_image(params));
2652 width_ = config.width;
2653 height_ = config.height;
2654 }
2655}
2656
2657void VulkanSurface::create_swap_chain() {
2658 auto choose_surface_format =
2659 [](const std::vector<VkSurfaceFormatKHR> &availableFormats) {
2660 for (const auto &availableFormat : availableFormats) {
2661 if (availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM &&
2662 availableFormat.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) {
2663 return availableFormat;
2664 }
2665 }
2666 return availableFormats[0];
2667 };
2668
2669 VkSurfaceCapabilitiesKHR capabilities;
2670 vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device_->vk_physical_device(),
2671 surface_, &capabilities);
2672
2673 VkBool32 supported = false;
2674 vkGetPhysicalDeviceSurfaceSupportKHR(device_->vk_physical_device(),
2675 device_->graphics_queue_family_index(),
2676 surface_, &supported);
2677
2678 if (!supported) {
2679 RHI_LOG_ERROR("Selected queue does not support presenting");
2680 return;
2681 }
2682
2683 uint32_t formatCount;
2684 vkGetPhysicalDeviceSurfaceFormatsKHR(device_->vk_physical_device(), surface_,
2685 &formatCount, nullptr);
2686 std::vector<VkSurfaceFormatKHR> surface_formats(formatCount);
2687 vkGetPhysicalDeviceSurfaceFormatsKHR(device_->vk_physical_device(), surface_,
2688 &formatCount, surface_formats.data());
2689
2690 VkSurfaceFormatKHR surface_format = choose_surface_format(surface_formats);
2691
2692 uint32_t present_mode_count;
2693 std::vector<VkPresentModeKHR> present_modes;
2694 vkGetPhysicalDeviceSurfacePresentModesKHR(
2695 device_->vk_physical_device(), surface_, &present_mode_count, nullptr);
2696
2697 if (present_mode_count != 0) {
2698 present_modes.resize(present_mode_count);
2699 vkGetPhysicalDeviceSurfacePresentModesKHR(device_->vk_physical_device(),
2700 surface_, &present_mode_count,
2701 present_modes.data());
2702 }
2703 VkPresentModeKHR present_mode =
2704 choose_swap_present_mode(present_modes, config_.vsync, config_.adaptive);
2705
2706 int width, height;
2707#ifdef ANDROID
2708 width = ANativeWindow_getWidth(window_);
2709 height = ANativeWindow_getHeight(window_);
2710#else
2711 glfwGetFramebufferSize(window_, &width, &height);
2712#endif
2713
2714 VkExtent2D extent = {uint32_t(width), uint32_t(height)};
2715 extent.width =
2716 std::max(capabilities.minImageExtent.width,
2717 std::min(capabilities.maxImageExtent.width, extent.width));
2718 extent.height =
2719 std::max(capabilities.minImageExtent.height,
2720 std::min(capabilities.maxImageExtent.height, extent.height));
2721 {
2722 char msg_buf[512];
2723 RHI_DEBUG_SNPRINTF(msg_buf, sizeof(msg_buf), "Creating suface of %u x %u",
2724 extent.width, extent.height);
2725 RHI_LOG_DEBUG(msg_buf);
2726 }
2727 VkImageUsageFlags usage =
2728 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2729
2730 this->width_ = extent.width;
2731 this->height_ = extent.height;
2732
2733 VkSwapchainCreateInfoKHR createInfo{};
2734 createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
2735 createInfo.pNext = nullptr;
2736 createInfo.flags = 0;
2737 createInfo.surface = surface_;
2738 createInfo.minImageCount = std::min<uint32_t>(capabilities.maxImageCount, 3);
2739 createInfo.imageFormat = surface_format.format;
2740 createInfo.imageColorSpace = surface_format.colorSpace;
2741 createInfo.imageExtent = extent;
2742 createInfo.imageArrayLayers = 1;
2743 createInfo.imageUsage = usage;
2744 createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
2745 createInfo.queueFamilyIndexCount = 0;
2746 createInfo.pQueueFamilyIndices = nullptr;
2747 createInfo.preTransform = capabilities.currentTransform;
2748 createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
2749 createInfo.presentMode = present_mode;
2750 createInfo.clipped = VK_TRUE;
2751 createInfo.oldSwapchain = VK_NULL_HANDLE;
2752
2753 if (vkCreateSwapchainKHR(device_->vk_device(), &createInfo,
2754 kNoVkAllocCallbacks, &swapchain_) != VK_SUCCESS) {
2755 RHI_LOG_ERROR("Failed to create swapchain");
2756 return;
2757 }
2758
2759 uint32_t num_images;
2760 vkGetSwapchainImagesKHR(device_->vk_device(), swapchain_, &num_images,
2761 nullptr);
2762 std::vector<VkImage> swapchain_images(num_images);
2763 vkGetSwapchainImagesKHR(device_->vk_device(), swapchain_, &num_images,
2764 swapchain_images.data());
2765
2766 auto [result, image_format] = buffer_format_vk_to_ti(surface_format.format);
2767 RHI_ASSERT(result == RhiResult::success);
2768 image_format_ = image_format;
2769
2770 for (VkImage img : swapchain_images) {
2771 vkapi::IVkImage image = vkapi::create_image(
2772 device_->vk_device(), img, surface_format.format, VK_IMAGE_TYPE_2D,
2773 VkExtent3D{uint32_t(width), uint32_t(height), 1}, 1u, 1u, usage);
2774
2775 VkImageViewCreateInfo create_info{};
2776 create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
2777 create_info.image = image->image;
2778 create_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
2779 create_info.format = image->format;
2780 create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2781 create_info.subresourceRange.baseMipLevel = 0;
2782 create_info.subresourceRange.levelCount = 1;
2783 create_info.subresourceRange.baseArrayLayer = 0;
2784 create_info.subresourceRange.layerCount = 1;
2785
2786 vkapi::IVkImageView view =
2787 vkapi::create_image_view(device_->vk_device(), image, &create_info);
2788
2789 swapchain_images_.push_back(
2790 device_->import_vk_image(image, view, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR));
2791 }
2792}
2793
2794void VulkanSurface::destroy_swap_chain() {
2795 for (auto &alloc : swapchain_images_) {
2796 std::get<1>(device_->get_vk_image(alloc)) = nullptr;
2797 device_->destroy_image(alloc);
2798 }
2799 swapchain_images_.clear();
2800 vkDestroySwapchainKHR(device_->vk_device(), swapchain_, nullptr);
2801}
2802
2803int VulkanSurface::get_image_count() {
2804 return swapchain_images_.size();
2805}
2806
2807VulkanSurface::~VulkanSurface() {
2808 if (config_.window_handle) {
2809 destroy_swap_chain();
2810 image_available_ = nullptr;
2811 vkDestroySurfaceKHR(device_->vk_instance(), surface_, nullptr);
2812 } else {
2813 for (auto &img : swapchain_images_) {
2814 device_->destroy_image(img);
2815 }
2816 swapchain_images_.clear();
2817 }
2818}
2819
2820void VulkanSurface::resize(uint32_t width, uint32_t height) {
2821 destroy_swap_chain();
2822 create_swap_chain();
2823}
2824
2825std::pair<uint32_t, uint32_t> VulkanSurface::get_size() {
2826 return std::make_pair(width_, height_);
2827}
2828
2829StreamSemaphore VulkanSurface::acquire_next_image() {
2830 if (!config_.window_handle) {
2831 image_index_ = (image_index_ + 1) % uint32_t(swapchain_images_.size());
2832 return nullptr;
2833 } else {
2834 vkAcquireNextImageKHR(device_->vk_device(), swapchain_, UINT64_MAX,
2835 image_available_->semaphore, VK_NULL_HANDLE,
2836 &image_index_);
2837 return std::make_shared<VulkanStreamSemaphoreObject>(image_available_);
2838 }
2839}
2840
2841DeviceAllocation VulkanSurface::get_target_image() {
2842 return swapchain_images_[image_index_];
2843}
2844
2845BufferFormat VulkanSurface::image_format() {
2846 return image_format_;
2847}
2848
2849void VulkanSurface::present_image(
2850 const std::vector<StreamSemaphore> &wait_semaphores) {
2851 std::vector<VkSemaphore> vk_wait_semaphores;
2852
2853 // Already transitioned to `present_src` at the end of the render pass.
2854 // device_->image_transition(get_target_image(),
2855 // ImageLayout::color_attachment,
2856 // ImageLayout::present_src);
2857
2858 for (const StreamSemaphore &sema_ : wait_semaphores) {
2859 auto sema = std::static_pointer_cast<VulkanStreamSemaphoreObject>(sema_);
2860 vk_wait_semaphores.push_back(sema->vkapi_ref->semaphore);
2861 }
2862
2863 VkPresentInfoKHR presentInfo{};
2864 presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
2865 presentInfo.waitSemaphoreCount = vk_wait_semaphores.size();
2866 presentInfo.pWaitSemaphores = vk_wait_semaphores.data();
2867 presentInfo.swapchainCount = 1;
2868 presentInfo.pSwapchains = &swapchain_;
2869 presentInfo.pImageIndices = &image_index_;
2870 presentInfo.pResults = nullptr;
2871
2872 vkQueuePresentKHR(device_->graphics_queue(), &presentInfo);
2873
2874 device_->wait_idle();
2875}
2876
2877DeviceAllocation VulkanSurface::get_depth_data(DeviceAllocation &depth_alloc) {
2878 auto *stream = device_->get_graphics_stream();
2879
2880 auto [w, h] = get_size();
2881 size_t size_bytes = size_t(w * h) * sizeof(float);
2882
2883 if (!depth_buffer_) {
2884 Device::AllocParams params{size_bytes, /*host_wrtie*/ false,
2885 /*host_read*/ true, /*export_sharing*/ false,
2886 AllocUsage::Uniform};
2887 depth_buffer_ = device_->allocate_memory_unique(params);
2888 }
2889
2890 BufferImageCopyParams copy_params;
2891 copy_params.image_extent.x = w;
2892 copy_params.image_extent.y = h;
2893 copy_params.image_aspect_flag = VK_IMAGE_ASPECT_DEPTH_BIT;
2894 auto [cmd_list, res] = stream->new_command_list_unique();
2895 assert(res == RhiResult::success && "Failed to allocate command list");
2896 cmd_list->image_transition(depth_alloc, ImageLayout::depth_attachment,
2897 ImageLayout::transfer_src);
2898 cmd_list->image_to_buffer(depth_buffer_->get_ptr(), depth_alloc,
2899 ImageLayout::transfer_src, copy_params);
2900 cmd_list->image_transition(depth_alloc, ImageLayout::transfer_src,
2901 ImageLayout::depth_attachment);
2902 stream->submit_synced(cmd_list.get());
2903
2904 return *depth_buffer_;
2905}
2906
2907DeviceAllocation VulkanSurface::get_image_data() {
2908 auto *stream = device_->get_graphics_stream();
2909 DeviceAllocation img_alloc = swapchain_images_[image_index_];
2910 auto [w, h] = get_size();
2911 size_t size_bytes = size_t(w * h) * sizeof(uint8_t) * 4;
2912
2913 /*
2914 if (screenshot_image_ == kDeviceNullAllocation) {
2915 ImageParams params = {ImageDimension::d2D,
2916 BufferFormat::rgba8,
2917 ImageLayout::transfer_dst,
2918 w,
2919 h,
2920 1,
2921 false};
2922 screenshot_image_ = device_->create_image(params);
2923 }
2924 */
2925
2926 if (!screenshot_buffer_) {
2927 Device::AllocParams params{size_bytes, /*host_wrtie*/ false,
2928 /*host_read*/ true, /*export_sharing*/ false,
2929 AllocUsage::Uniform};
2930 screenshot_buffer_ = device_->allocate_memory_unique(params);
2931 }
2932
2933 /*
2934 if (config_.window_handle) {
2935 // TODO: check if blit is supported, and use copy_image if not
2936 cmd_list = stream->new_command_list();
2937 cmd_list->blit_image(screenshot_image_, img_alloc,
2938 ImageLayout::transfer_dst, ImageLayout::transfer_src,
2939 {w, h, 1});
2940 cmd_list->image_transition(screenshot_image_, ImageLayout::transfer_dst,
2941 ImageLayout::transfer_src);
2942 stream->submit_synced(cmd_list.get());
2943 }
2944 */
2945
2946 BufferImageCopyParams copy_params;
2947 copy_params.image_extent.x = w;
2948 copy_params.image_extent.y = h;
2949 copy_params.image_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
2950 auto [cmd_list, res] = stream->new_command_list_unique();
2951 assert(res == RhiResult::success && "Failed to allocate command list");
2952 cmd_list->image_transition(img_alloc, ImageLayout::present_src,
2953 ImageLayout::transfer_src);
2954 // TODO: directly map the image to cpu memory
2955 cmd_list->image_to_buffer(screenshot_buffer_->get_ptr(), img_alloc,
2956 ImageLayout::transfer_src, copy_params);
2957 cmd_list->image_transition(img_alloc, ImageLayout::transfer_src,
2958 ImageLayout::present_src);
2959 /*
2960 if (config_.window_handle) {
2961 cmd_list->image_transition(screenshot_image_, ImageLayout::transfer_src,
2962 ImageLayout::transfer_dst);
2963 }
2964 */
2965 stream->submit_synced(cmd_list.get());
2966
2967 return *screenshot_buffer_;
2968}
2969
2970VulkanStream::VulkanStream(VulkanDevice &device,
2971 VkQueue queue,
2972 uint32_t queue_family_index)
2973 : device_(device), queue_(queue), queue_family_index_(queue_family_index) {
2974 command_pool_ = vkapi::create_command_pool(
2975 device_.vk_device(), VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
2976 queue_family_index);
2977}
2978
2979VulkanStream::~VulkanStream() {
2980}
2981
2982} // namespace vulkan
2983} // namespace taichi::lang
2984