1 | /* |
2 | * Copyright 2016-2021 The Brenwill Workshop Ltd. |
3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | */ |
17 | |
18 | /* |
19 | * At your option, you may choose to accept this material under either: |
20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
22 | */ |
23 | |
24 | #ifndef SPIRV_CROSS_MSL_HPP |
25 | #define SPIRV_CROSS_MSL_HPP |
26 | |
27 | #include "spirv_glsl.hpp" |
28 | #include <map> |
29 | #include <set> |
30 | #include <stddef.h> |
31 | #include <unordered_map> |
32 | #include <unordered_set> |
33 | |
34 | namespace SPIRV_CROSS_NAMESPACE |
35 | { |
36 | |
37 | // Indicates the format of a shader input. Currently limited to specifying |
38 | // if the input is an 8-bit unsigned integer, 16-bit unsigned integer, or |
39 | // some other format. |
40 | enum MSLShaderInputFormat |
41 | { |
42 | MSL_SHADER_INPUT_FORMAT_OTHER = 0, |
43 | MSL_SHADER_INPUT_FORMAT_UINT8 = 1, |
44 | MSL_SHADER_INPUT_FORMAT_UINT16 = 2, |
45 | MSL_SHADER_INPUT_FORMAT_ANY16 = 3, |
46 | MSL_SHADER_INPUT_FORMAT_ANY32 = 4, |
47 | |
48 | // Deprecated aliases. |
49 | MSL_VERTEX_FORMAT_OTHER = MSL_SHADER_INPUT_FORMAT_OTHER, |
50 | MSL_VERTEX_FORMAT_UINT8 = MSL_SHADER_INPUT_FORMAT_UINT8, |
51 | MSL_VERTEX_FORMAT_UINT16 = MSL_SHADER_INPUT_FORMAT_UINT16, |
52 | |
53 | MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff |
54 | }; |
55 | |
56 | // Defines MSL characteristics of an input variable at a particular location. |
57 | // After compilation, it is possible to query whether or not this location was used. |
58 | // If vecsize is nonzero, it must be greater than or equal to the vecsize declared in the shader, |
59 | // or behavior is undefined. |
60 | struct MSLShaderInput |
61 | { |
62 | uint32_t location = 0; |
63 | uint32_t component = 0; |
64 | MSLShaderInputFormat format = MSL_SHADER_INPUT_FORMAT_OTHER; |
65 | spv::BuiltIn builtin = spv::BuiltInMax; |
66 | uint32_t vecsize = 0; |
67 | }; |
68 | |
69 | // Matches the binding index of a MSL resource for a binding within a descriptor set. |
70 | // Taken together, the stage, desc_set and binding combine to form a reference to a resource |
71 | // descriptor used in a particular shading stage. The count field indicates the number of |
72 | // resources consumed by this binding, if the binding represents an array of resources. |
73 | // If the resource array is a run-time-sized array, which are legal in GLSL or SPIR-V, this value |
74 | // will be used to declare the array size in MSL, which does not support run-time-sized arrays. |
75 | // If pad_argument_buffer_resources is enabled, the base_type and count values are used to |
76 | // specify the base type and array size of the resource in the argument buffer, if that resource |
77 | // is not defined and used by the shader. With pad_argument_buffer_resources enabled, this |
78 | // information will be used to pad the argument buffer structure, in order to align that |
79 | // structure consistently for all uses, across all shaders, of the descriptor set represented |
80 | // by the arugment buffer. If pad_argument_buffer_resources is disabled, base_type does not |
81 | // need to be populated, and if the resource is also not a run-time sized array, the count |
82 | // field does not need to be populated. |
83 | // If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, |
84 | // and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we |
85 | // remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. |
86 | // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will |
87 | // become a [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. |
88 | struct MSLResourceBinding |
89 | { |
90 | spv::ExecutionModel stage = spv::ExecutionModelMax; |
91 | SPIRType::BaseType basetype = SPIRType::Unknown; |
92 | uint32_t desc_set = 0; |
93 | uint32_t binding = 0; |
94 | uint32_t count = 0; |
95 | uint32_t msl_buffer = 0; |
96 | uint32_t msl_texture = 0; |
97 | uint32_t msl_sampler = 0; |
98 | }; |
99 | |
100 | enum MSLSamplerCoord |
101 | { |
102 | MSL_SAMPLER_COORD_NORMALIZED = 0, |
103 | MSL_SAMPLER_COORD_PIXEL = 1, |
104 | MSL_SAMPLER_INT_MAX = 0x7fffffff |
105 | }; |
106 | |
107 | enum MSLSamplerFilter |
108 | { |
109 | MSL_SAMPLER_FILTER_NEAREST = 0, |
110 | MSL_SAMPLER_FILTER_LINEAR = 1, |
111 | MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff |
112 | }; |
113 | |
114 | enum MSLSamplerMipFilter |
115 | { |
116 | MSL_SAMPLER_MIP_FILTER_NONE = 0, |
117 | MSL_SAMPLER_MIP_FILTER_NEAREST = 1, |
118 | MSL_SAMPLER_MIP_FILTER_LINEAR = 2, |
119 | MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff |
120 | }; |
121 | |
122 | enum MSLSamplerAddress |
123 | { |
124 | MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, |
125 | MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, |
126 | MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, |
127 | MSL_SAMPLER_ADDRESS_REPEAT = 3, |
128 | MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, |
129 | MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff |
130 | }; |
131 | |
132 | enum MSLSamplerCompareFunc |
133 | { |
134 | MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, |
135 | MSL_SAMPLER_COMPARE_FUNC_LESS = 1, |
136 | MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, |
137 | MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, |
138 | MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, |
139 | MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, |
140 | MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, |
141 | MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, |
142 | MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff |
143 | }; |
144 | |
145 | enum MSLSamplerBorderColor |
146 | { |
147 | MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, |
148 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, |
149 | MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, |
150 | MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff |
151 | }; |
152 | |
153 | enum MSLFormatResolution |
154 | { |
155 | MSL_FORMAT_RESOLUTION_444 = 0, |
156 | MSL_FORMAT_RESOLUTION_422, |
157 | MSL_FORMAT_RESOLUTION_420, |
158 | MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff |
159 | }; |
160 | |
161 | enum MSLChromaLocation |
162 | { |
163 | MSL_CHROMA_LOCATION_COSITED_EVEN = 0, |
164 | MSL_CHROMA_LOCATION_MIDPOINT, |
165 | MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff |
166 | }; |
167 | |
168 | enum MSLComponentSwizzle |
169 | { |
170 | MSL_COMPONENT_SWIZZLE_IDENTITY = 0, |
171 | MSL_COMPONENT_SWIZZLE_ZERO, |
172 | MSL_COMPONENT_SWIZZLE_ONE, |
173 | MSL_COMPONENT_SWIZZLE_R, |
174 | MSL_COMPONENT_SWIZZLE_G, |
175 | MSL_COMPONENT_SWIZZLE_B, |
176 | MSL_COMPONENT_SWIZZLE_A, |
177 | MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff |
178 | }; |
179 | |
180 | enum MSLSamplerYCbCrModelConversion |
181 | { |
182 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, |
183 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, |
184 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, |
185 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, |
186 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, |
187 | MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff |
188 | }; |
189 | |
190 | enum MSLSamplerYCbCrRange |
191 | { |
192 | MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, |
193 | MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, |
194 | MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff |
195 | }; |
196 | |
197 | struct MSLConstexprSampler |
198 | { |
199 | MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; |
200 | MSLSamplerFilter min_filter = MSL_SAMPLER_FILTER_NEAREST; |
201 | MSLSamplerFilter mag_filter = MSL_SAMPLER_FILTER_NEAREST; |
202 | MSLSamplerMipFilter mip_filter = MSL_SAMPLER_MIP_FILTER_NONE; |
203 | MSLSamplerAddress s_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
204 | MSLSamplerAddress t_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
205 | MSLSamplerAddress r_address = MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE; |
206 | MSLSamplerCompareFunc compare_func = MSL_SAMPLER_COMPARE_FUNC_NEVER; |
207 | MSLSamplerBorderColor border_color = MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; |
208 | float lod_clamp_min = 0.0f; |
209 | float lod_clamp_max = 1000.0f; |
210 | int max_anisotropy = 1; |
211 | |
212 | // Sampler Y'CbCr conversion parameters |
213 | uint32_t planes = 0; |
214 | MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; |
215 | MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; |
216 | MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
217 | MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; |
218 | MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY |
219 | MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; |
220 | MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; |
221 | uint32_t bpc = 8; |
222 | |
223 | bool compare_enable = false; |
224 | bool lod_clamp_enable = false; |
225 | bool anisotropy_enable = false; |
226 | bool ycbcr_conversion_enable = false; |
227 | |
228 | MSLConstexprSampler() |
229 | { |
230 | for (uint32_t i = 0; i < 4; i++) |
231 | swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; |
232 | } |
233 | bool swizzle_is_identity() const |
234 | { |
235 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && |
236 | swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); |
237 | } |
238 | bool swizzle_has_one_or_zero() const |
239 | { |
240 | return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || |
241 | swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || |
242 | swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || |
243 | swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); |
244 | } |
245 | }; |
246 | |
247 | // Special constant used in a MSLResourceBinding desc_set |
248 | // element to indicate the bindings for the push constants. |
249 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
250 | static const uint32_t kPushConstDescSet = ResourceBindingPushConstantDescriptorSet; |
251 | |
252 | // Special constant used in a MSLResourceBinding binding |
253 | // element to indicate the bindings for the push constants. |
254 | // Kinda deprecated. Just use ResourceBindingPushConstant{DescriptorSet,Binding} directly. |
255 | static const uint32_t kPushConstBinding = ResourceBindingPushConstantBinding; |
256 | |
257 | // Special constant used in a MSLResourceBinding binding |
258 | // element to indicate the buffer binding for swizzle buffers. |
259 | static const uint32_t kSwizzleBufferBinding = ~(1u); |
260 | |
261 | // Special constant used in a MSLResourceBinding binding |
262 | // element to indicate the buffer binding for buffer size buffers to support OpArrayLength. |
263 | static const uint32_t kBufferSizeBufferBinding = ~(2u); |
264 | |
265 | // Special constant used in a MSLResourceBinding binding |
266 | // element to indicate the buffer binding used for the argument buffer itself. |
267 | // This buffer binding should be kept as small as possible as all automatic bindings for buffers |
268 | // will start at max(kArgumentBufferBinding) + 1. |
269 | static const uint32_t kArgumentBufferBinding = ~(3u); |
270 | |
271 | static const uint32_t kMaxArgumentBuffers = 8; |
272 | |
273 | // The arbitrary maximum for the nesting of array of array copies. |
274 | static const uint32_t kArrayCopyMultidimMax = 6; |
275 | |
276 | // Decompiles SPIR-V to Metal Shading Language |
277 | class CompilerMSL : public CompilerGLSL |
278 | { |
279 | public: |
280 | // Options for compiling to Metal Shading Language |
281 | struct Options |
282 | { |
283 | typedef enum |
284 | { |
285 | iOS = 0, |
286 | macOS = 1 |
287 | } Platform; |
288 | |
289 | Platform platform = macOS; |
290 | uint32_t msl_version = make_msl_version(1, 2); |
291 | uint32_t texel_buffer_texture_width = 4096; // Width of 2D Metal textures used as 1D texel buffers |
292 | uint32_t r32ui_linear_texture_alignment = 4; |
293 | uint32_t r32ui_alignment_constant_id = 65535; |
294 | uint32_t swizzle_buffer_index = 30; |
295 | uint32_t indirect_params_buffer_index = 29; |
296 | uint32_t shader_output_buffer_index = 28; |
297 | uint32_t shader_patch_output_buffer_index = 27; |
298 | uint32_t shader_tess_factor_buffer_index = 26; |
299 | uint32_t buffer_size_buffer_index = 25; |
300 | uint32_t view_mask_buffer_index = 24; |
301 | uint32_t dynamic_offsets_buffer_index = 23; |
302 | uint32_t shader_input_buffer_index = 22; |
303 | uint32_t shader_index_buffer_index = 21; |
304 | uint32_t shader_input_wg_index = 0; |
305 | uint32_t device_index = 0; |
306 | uint32_t enable_frag_output_mask = 0xffffffff; |
307 | // Metal doesn't allow setting a fixed sample mask directly in the pipeline. |
308 | // We can evade this restriction by ANDing the internal sample_mask output |
309 | // of the shader with the additional fixed sample mask. |
310 | uint32_t additional_fixed_sample_mask = 0xffffffff; |
311 | bool enable_point_size_builtin = true; |
312 | bool enable_frag_depth_builtin = true; |
313 | bool enable_frag_stencil_ref_builtin = true; |
314 | bool disable_rasterization = false; |
315 | bool capture_output_to_buffer = false; |
316 | bool swizzle_texture_samples = false; |
317 | bool tess_domain_origin_lower_left = false; |
318 | bool multiview = false; |
319 | bool multiview_layered_rendering = true; |
320 | bool view_index_from_device_index = false; |
321 | bool dispatch_base = false; |
322 | bool texture_1D_as_2D = false; |
323 | |
324 | // Enable use of MSL 2.0 indirect argument buffers. |
325 | // MSL 2.0 must also be enabled. |
326 | bool argument_buffers = false; |
327 | |
328 | // Ensures vertex and instance indices start at zero. This reflects the behavior of HLSL with SV_VertexID and SV_InstanceID. |
329 | bool enable_base_index_zero = false; |
330 | |
331 | // Fragment output in MSL must have at least as many components as the render pass. |
332 | // Add support to explicit pad out components. |
333 | bool pad_fragment_output_components = false; |
334 | |
335 | // Specifies whether the iOS target version supports the [[base_vertex]] and [[base_instance]] attributes. |
336 | bool ios_support_base_vertex_instance = false; |
337 | |
338 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
339 | bool use_framebuffer_fetch_subpasses = false; |
340 | |
341 | // Enables use of "fma" intrinsic for invariant float math |
342 | bool invariant_float_math = false; |
343 | |
344 | // Emulate texturecube_array with texture2d_array for iOS where this type is not available |
345 | bool emulate_cube_array = false; |
346 | |
347 | // Allow user to enable decoration binding |
348 | bool enable_decoration_binding = false; |
349 | |
350 | // Requires MSL 2.1, use the native support for texel buffers. |
351 | bool texture_buffer_native = false; |
352 | |
353 | // Forces all resources which are part of an argument buffer to be considered active. |
354 | // This ensures ABI compatibility between shaders where some resources might be unused, |
355 | // and would otherwise declare a different IAB. |
356 | bool force_active_argument_buffer_resources = false; |
357 | |
358 | // Aligns each resource in an argument buffer to its assigned index value, id(N), |
359 | // by adding synthetic padding members in the argument buffer struct for any resources |
360 | // in the argument buffer that are not defined and used by the shader. This allows |
361 | // the shader to index into the correct argument in a descriptor set argument buffer |
362 | // that is shared across shaders, where not all resources in the argument buffer are |
363 | // defined in each shader. For this to work, an MSLResourceBinding must be provided for |
364 | // all descriptors in any descriptor set held in an argument buffer in the shader, and |
365 | // that MSLResourceBinding must have the basetype and count members populated correctly. |
366 | // The implementation here assumes any inline blocks in the argument buffer is provided |
367 | // in a Metal buffer, and doesn't take into consideration inline blocks that are |
368 | // optionally embedded directly into the argument buffer via add_inline_uniform_block(). |
369 | bool pad_argument_buffer_resources = false; |
370 | |
371 | // Forces the use of plain arrays, which works around certain driver bugs on certain versions |
372 | // of Intel Macbooks. See https://github.com/KhronosGroup/SPIRV-Cross/issues/1210. |
373 | // May reduce performance in scenarios where arrays are copied around as value-types. |
374 | bool force_native_arrays = false; |
375 | |
376 | // If a shader writes clip distance, also emit user varyings which |
377 | // can be read in subsequent stages. |
378 | bool enable_clip_distance_user_varying = true; |
379 | |
380 | // In a tessellation control shader, assume that more than one patch can be processed in a |
381 | // single workgroup. This requires changes to the way the InvocationId and PrimitiveId |
382 | // builtins are processed, but should result in more efficient usage of the GPU. |
383 | bool multi_patch_workgroup = false; |
384 | |
385 | // If set, a vertex shader will be compiled as part of a tessellation pipeline. |
386 | // It will be translated as a compute kernel, so it can use the global invocation ID |
387 | // to index the output buffer. |
388 | bool vertex_for_tessellation = false; |
389 | |
390 | // Assume that SubpassData images have multiple layers. Layered input attachments |
391 | // are addressed relative to the Layer output from the vertex pipeline. This option |
392 | // has no effect with multiview, since all input attachments are assumed to be layered |
393 | // and will be addressed using the current ViewIndex. |
394 | bool arrayed_subpass_input = false; |
395 | |
396 | // Whether to use SIMD-group or quadgroup functions to implement group nnon-uniform |
397 | // operations. Some GPUs on iOS do not support the SIMD-group functions, only the |
398 | // quadgroup functions. |
399 | bool ios_use_simdgroup_functions = false; |
400 | |
401 | // If set, the subgroup size will be assumed to be one, and subgroup-related |
402 | // builtins and operations will be emitted accordingly. This mode is intended to |
403 | // be used by MoltenVK on hardware/software configurations which do not provide |
404 | // sufficient support for subgroups. |
405 | bool emulate_subgroups = false; |
406 | |
407 | // If nonzero, a fixed subgroup size to assume. Metal, similarly to VK_EXT_subgroup_size_control, |
408 | // allows the SIMD-group size (aka thread execution width) to vary depending on |
409 | // register usage and requirements. In certain circumstances--for example, a pipeline |
410 | // in MoltenVK without VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT-- |
411 | // this is undesirable. This fixes the value of the SubgroupSize builtin, instead of |
412 | // mapping it to the Metal builtin [[thread_execution_width]]. If the thread |
413 | // execution width is reduced, the extra invocations will appear to be inactive. |
414 | // If zero, the SubgroupSize will be allowed to vary, and the builtin will be mapped |
415 | // to the Metal [[thread_execution_width]] builtin. |
416 | uint32_t fixed_subgroup_size = 0; |
417 | |
418 | enum class IndexType |
419 | { |
420 | None = 0, |
421 | UInt16 = 1, |
422 | UInt32 = 2 |
423 | }; |
424 | |
425 | // The type of index in the index buffer, if present. For a compute shader, Metal |
426 | // requires specifying the indexing at pipeline creation, rather than at draw time |
427 | // as with graphics pipelines. This means we must create three different pipelines, |
428 | // for no indexing, 16-bit indices, and 32-bit indices. Each requires different |
429 | // handling for the gl_VertexIndex builtin. We may as well, then, create three |
430 | // different shaders for these three scenarios. |
431 | IndexType vertex_index_type = IndexType::None; |
432 | |
433 | // If set, a dummy [[sample_id]] input is added to a fragment shader if none is present. |
434 | // This will force the shader to run at sample rate, assuming Metal does not optimize |
435 | // the extra threads away. |
436 | bool force_sample_rate_shading = false; |
437 | |
438 | bool is_ios() const |
439 | { |
440 | return platform == iOS; |
441 | } |
442 | |
443 | bool is_macos() const |
444 | { |
445 | return platform == macOS; |
446 | } |
447 | |
448 | void set_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
449 | { |
450 | msl_version = make_msl_version(major, minor, patch); |
451 | } |
452 | |
453 | bool supports_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const |
454 | { |
455 | return msl_version >= make_msl_version(major, minor, patch); |
456 | } |
457 | |
458 | static uint32_t make_msl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) |
459 | { |
460 | return (major * 10000) + (minor * 100) + patch; |
461 | } |
462 | }; |
463 | |
464 | const Options &get_msl_options() const |
465 | { |
466 | return msl_options; |
467 | } |
468 | |
469 | void set_msl_options(const Options &opts) |
470 | { |
471 | msl_options = opts; |
472 | } |
473 | |
474 | // Provide feedback to calling API to allow runtime to disable pipeline |
475 | // rasterization if vertex shader requires rasterization to be disabled. |
476 | bool get_is_rasterization_disabled() const |
477 | { |
478 | return is_rasterization_disabled && (get_entry_point().model == spv::ExecutionModelVertex || |
479 | get_entry_point().model == spv::ExecutionModelTessellationControl || |
480 | get_entry_point().model == spv::ExecutionModelTessellationEvaluation); |
481 | } |
482 | |
483 | // Provide feedback to calling API to allow it to pass an auxiliary |
484 | // swizzle buffer if the shader needs it. |
485 | bool needs_swizzle_buffer() const |
486 | { |
487 | return used_swizzle_buffer; |
488 | } |
489 | |
490 | // Provide feedback to calling API to allow it to pass a buffer |
491 | // containing STORAGE_BUFFER buffer sizes to support OpArrayLength. |
492 | bool needs_buffer_size_buffer() const |
493 | { |
494 | return !buffers_requiring_array_length.empty(); |
495 | } |
496 | |
497 | // Provide feedback to calling API to allow it to pass a buffer |
498 | // containing the view mask for the current multiview subpass. |
499 | bool needs_view_mask_buffer() const |
500 | { |
501 | return msl_options.multiview && !msl_options.view_index_from_device_index; |
502 | } |
503 | |
504 | // Provide feedback to calling API to allow it to pass a buffer |
505 | // containing the dispatch base workgroup ID. |
506 | bool needs_dispatch_base_buffer() const |
507 | { |
508 | return msl_options.dispatch_base && !msl_options.supports_msl_version(1, 2); |
509 | } |
510 | |
511 | // Provide feedback to calling API to allow it to pass an output |
512 | // buffer if the shader needs it. |
513 | bool needs_output_buffer() const |
514 | { |
515 | return capture_output_to_buffer && stage_out_var_id != ID(0); |
516 | } |
517 | |
518 | // Provide feedback to calling API to allow it to pass a patch output |
519 | // buffer if the shader needs it. |
520 | bool needs_patch_output_buffer() const |
521 | { |
522 | return capture_output_to_buffer && patch_stage_out_var_id != ID(0); |
523 | } |
524 | |
525 | // Provide feedback to calling API to allow it to pass an input threadgroup |
526 | // buffer if the shader needs it. |
527 | bool needs_input_threadgroup_mem() const |
528 | { |
529 | return capture_output_to_buffer && stage_in_var_id != ID(0); |
530 | } |
531 | |
532 | explicit CompilerMSL(std::vector<uint32_t> spirv); |
533 | CompilerMSL(const uint32_t *ir, size_t word_count); |
534 | explicit CompilerMSL(const ParsedIR &ir); |
535 | explicit CompilerMSL(ParsedIR &&ir); |
536 | |
537 | // input is a shader input description used to fix up shader input variables. |
538 | // If shader inputs are provided, is_msl_shader_input_used() will return true after |
539 | // calling ::compile() if the location was used by the MSL code. |
540 | void add_msl_shader_input(const MSLShaderInput &input); |
541 | |
542 | // resource is a resource binding to indicate the MSL buffer, |
543 | // texture or sampler index to use for a particular SPIR-V description set |
544 | // and binding. If resource bindings are provided, |
545 | // is_msl_resource_binding_used() will return true after calling ::compile() if |
546 | // the set/binding combination was used by the MSL code. |
547 | void add_msl_resource_binding(const MSLResourceBinding &resource); |
548 | |
549 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
550 | // in this shader. index is the index within the dynamic offset buffer to use. This |
551 | // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC |
552 | // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers |
553 | // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with |
554 | // an offset taken from the dynamic offset buffer. |
555 | void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); |
556 | |
557 | // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource |
558 | // in this shader. This function marks that resource as an inline uniform block |
559 | // (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers |
560 | // are enabled. If so, the buffer block will be directly embedded into the argument |
561 | // buffer, instead of being referenced indirectly via pointer. |
562 | void add_inline_uniform_block(uint32_t desc_set, uint32_t binding); |
563 | |
564 | // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. |
565 | // This corresponds to VK_KHR_push_descriptor in Vulkan. |
566 | void add_discrete_descriptor_set(uint32_t desc_set); |
567 | |
568 | // If an argument buffer is large enough, it may need to be in the device storage space rather than |
569 | // constant. Opt-in to this behavior here on a per set basis. |
570 | void set_argument_buffer_device_address_space(uint32_t desc_set, bool device_storage); |
571 | |
572 | // Query after compilation is done. This allows you to check if an input location was used by the shader. |
573 | bool is_msl_shader_input_used(uint32_t location); |
574 | |
575 | // If not using add_msl_shader_input, it's possible |
576 | // that certain builtin attributes need to be automatically assigned locations. |
577 | // This is typical for tessellation builtin inputs such as tess levels, gl_Position, etc. |
578 | // This returns k_unknown_location if the location was explicitly assigned with |
579 | // add_msl_shader_input or the builtin is not used, otherwise returns N in [[attribute(N)]]. |
580 | uint32_t get_automatic_builtin_input_location(spv::BuiltIn builtin) const; |
581 | |
582 | // NOTE: Only resources which are remapped using add_msl_resource_binding will be reported here. |
583 | // Constexpr samplers are always assumed to be emitted. |
584 | // No specific MSLResourceBinding remapping is required for constexpr samplers as long as they are remapped |
585 | // by remap_constexpr_sampler(_by_binding). |
586 | bool is_msl_resource_binding_used(spv::ExecutionModel model, uint32_t set, uint32_t binding) const; |
587 | |
588 | // This must only be called after a successful call to CompilerMSL::compile(). |
589 | // For a variable resource ID obtained through reflection API, report the automatically assigned resource index. |
590 | // If the descriptor set was part of an argument buffer, report the [[id(N)]], |
591 | // or [[buffer/texture/sampler]] binding for other resources. |
592 | // If the resource was a combined image sampler, report the image binding here, |
593 | // use the _secondary version of this call to query the sampler half of the resource. |
594 | // If no binding exists, uint32_t(-1) is returned. |
595 | uint32_t get_automatic_msl_resource_binding(uint32_t id) const; |
596 | |
597 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers, in which case the |
598 | // sampler's binding is returned instead. For any other resource type, -1 is returned. |
599 | // Secondary bindings are also used for the auxillary image atomic buffer. |
600 | uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; |
601 | |
602 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, |
603 | // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. |
604 | uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; |
605 | |
606 | // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, |
607 | // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. |
608 | uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; |
609 | |
610 | // Compiles the SPIR-V code into Metal Shading Language. |
611 | std::string compile() override; |
612 | |
613 | // Remap a sampler with ID to a constexpr sampler. |
614 | // Older iOS targets must use constexpr samplers in certain cases (PCF), |
615 | // so a static sampler must be used. |
616 | // The sampler will not consume a binding, but be declared in the entry point as a constexpr sampler. |
617 | // This can be used on both combined image/samplers (sampler2D) or standalone samplers. |
618 | // The remapped sampler must not be an array of samplers. |
619 | // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. |
620 | void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); |
621 | |
622 | // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. |
623 | // Remaps based on ID take priority over set/binding remaps. |
624 | void remap_constexpr_sampler_by_binding(uint32_t desc_set, uint32_t binding, const MSLConstexprSampler &sampler); |
625 | |
626 | // If using CompilerMSL::Options::pad_fragment_output_components, override the number of components we expect |
627 | // to use for a particular location. The default is 4 if number of components is not overridden. |
628 | void set_fragment_output_components(uint32_t location, uint32_t components); |
629 | |
630 | void set_combined_sampler_suffix(const char *suffix); |
631 | const char *get_combined_sampler_suffix() const; |
632 | |
633 | protected: |
634 | // An enum of SPIR-V functions that are implemented in additional |
635 | // source code that is added to the shader if necessary. |
636 | enum SPVFuncImpl |
637 | { |
638 | SPVFuncImplNone, |
639 | SPVFuncImplMod, |
640 | SPVFuncImplRadians, |
641 | SPVFuncImplDegrees, |
642 | SPVFuncImplFindILsb, |
643 | SPVFuncImplFindSMsb, |
644 | SPVFuncImplFindUMsb, |
645 | SPVFuncImplSSign, |
646 | SPVFuncImplArrayCopyMultidimBase, |
647 | // Unfortunately, we cannot use recursive templates in the MSL compiler properly, |
648 | // so stamp out variants up to some arbitrary maximum. |
649 | SPVFuncImplArrayCopy = SPVFuncImplArrayCopyMultidimBase + 1, |
650 | SPVFuncImplArrayOfArrayCopy2Dim = SPVFuncImplArrayCopyMultidimBase + 2, |
651 | SPVFuncImplArrayOfArrayCopy3Dim = SPVFuncImplArrayCopyMultidimBase + 3, |
652 | SPVFuncImplArrayOfArrayCopy4Dim = SPVFuncImplArrayCopyMultidimBase + 4, |
653 | SPVFuncImplArrayOfArrayCopy5Dim = SPVFuncImplArrayCopyMultidimBase + 5, |
654 | SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6, |
655 | SPVFuncImplTexelBufferCoords, |
656 | SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations |
657 | SPVFuncImplFMul, |
658 | SPVFuncImplFAdd, |
659 | SPVFuncImplFSub, |
660 | SPVFuncImplQuantizeToF16, |
661 | SPVFuncImplCubemapTo2DArrayFace, |
662 | SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type |
663 | SPVFuncImplInverse4x4, |
664 | SPVFuncImplInverse3x3, |
665 | SPVFuncImplInverse2x2, |
666 | // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's |
667 | // emitted before them. |
668 | SPVFuncImplForwardArgs, |
669 | // Likewise, this must come before *Swizzle. |
670 | SPVFuncImplGetSwizzle, |
671 | SPVFuncImplTextureSwizzle, |
672 | SPVFuncImplGatherSwizzle, |
673 | SPVFuncImplGatherCompareSwizzle, |
674 | SPVFuncImplSubgroupBroadcast, |
675 | SPVFuncImplSubgroupBroadcastFirst, |
676 | SPVFuncImplSubgroupBallot, |
677 | , |
678 | SPVFuncImplSubgroupBallotFindLSB, |
679 | SPVFuncImplSubgroupBallotFindMSB, |
680 | SPVFuncImplSubgroupBallotBitCount, |
681 | SPVFuncImplSubgroupAllEqual, |
682 | SPVFuncImplSubgroupShuffle, |
683 | SPVFuncImplSubgroupShuffleXor, |
684 | SPVFuncImplSubgroupShuffleUp, |
685 | SPVFuncImplSubgroupShuffleDown, |
686 | SPVFuncImplQuadBroadcast, |
687 | SPVFuncImplQuadSwap, |
688 | SPVFuncImplReflectScalar, |
689 | SPVFuncImplRefractScalar, |
690 | SPVFuncImplFaceForwardScalar, |
691 | SPVFuncImplChromaReconstructNearest2Plane, |
692 | SPVFuncImplChromaReconstructNearest3Plane, |
693 | SPVFuncImplChromaReconstructLinear422CositedEven2Plane, |
694 | SPVFuncImplChromaReconstructLinear422CositedEven3Plane, |
695 | SPVFuncImplChromaReconstructLinear422Midpoint2Plane, |
696 | SPVFuncImplChromaReconstructLinear422Midpoint3Plane, |
697 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, |
698 | SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, |
699 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, |
700 | SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, |
701 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, |
702 | SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, |
703 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, |
704 | SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, |
705 | SPVFuncImplExpandITUFullRange, |
706 | SPVFuncImplExpandITUNarrowRange, |
707 | SPVFuncImplConvertYCbCrBT709, |
708 | SPVFuncImplConvertYCbCrBT601, |
709 | SPVFuncImplConvertYCbCrBT2020, |
710 | SPVFuncImplDynamicImageSampler, |
711 | }; |
712 | |
713 | // If the underlying resource has been used for comparison then duplicate loads of that resource must be too |
714 | // Use Metal's native frame-buffer fetch API for subpass inputs. |
715 | void emit_texture_op(const Instruction &i, bool sparse) override; |
716 | void emit_binary_unord_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); |
717 | void emit_instruction(const Instruction &instr) override; |
718 | void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, |
719 | uint32_t count) override; |
720 | void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op, |
721 | const uint32_t *args, uint32_t count) override; |
722 | void () override; |
723 | void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; |
724 | void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; |
725 | void emit_subgroup_op(const Instruction &i) override; |
726 | std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, |
727 | SmallVector<uint32_t> &inherited_expressions) override; |
728 | void emit_fixup() override; |
729 | std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
730 | const std::string &qualifier = "" ); |
731 | void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, |
732 | const std::string &qualifier = "" , uint32_t base_offset = 0) override; |
733 | void emit_struct_padding_target(const SPIRType &type) override; |
734 | std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; |
735 | void emit_block_hints(const SPIRBlock &block) override; |
736 | |
737 | // Allow Metal to use the array<T> template to make arrays a value type |
738 | std::string type_to_array_glsl(const SPIRType &type) override; |
739 | std::string constant_op_expression(const SPIRConstantOp &cop) override; |
740 | |
741 | // Threadgroup arrays can't have a wrapper type |
742 | std::string variable_decl(const SPIRVariable &variable) override; |
743 | |
744 | bool variable_decl_is_remapped_storage(const SPIRVariable &variable, spv::StorageClass storage) const override; |
745 | |
746 | // GCC workaround of lambdas calling protected functions (for older GCC versions) |
747 | std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; |
748 | |
749 | std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; |
750 | std::string sampler_type(const SPIRType &type, uint32_t id); |
751 | std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; |
752 | std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; |
753 | std::string to_name(uint32_t id, bool allow_alias = true) const override; |
754 | std::string to_function_name(const TextureFunctionNameArguments &args) override; |
755 | std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward) override; |
756 | std::string to_initializer_expression(const SPIRVariable &var) override; |
757 | std::string to_zero_initialized_expression(uint32_t type_id) override; |
758 | |
759 | std::string unpack_expression_type(std::string expr_str, const SPIRType &type, uint32_t physical_type_id, |
760 | bool is_packed, bool row_major) override; |
761 | |
762 | // Returns true for BuiltInSampleMask because gl_SampleMask[] is an array in SPIR-V, but [[sample_mask]] is a scalar in Metal. |
763 | bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const override; |
764 | |
765 | std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; |
766 | bool emit_complex_bitcast(uint32_t result_id, uint32_t id, uint32_t op0) override; |
767 | bool skip_argument(uint32_t id) const override; |
768 | std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain) override; |
769 | std::string to_qualifiers_glsl(uint32_t id) override; |
770 | void replace_illegal_names() override; |
771 | void declare_undefined_values() override; |
772 | void declare_constant_arrays(); |
773 | |
774 | void replace_illegal_entry_point_names(); |
775 | void sync_entry_point_aliases_and_names(); |
776 | |
777 | static const std::unordered_set<std::string> &get_reserved_keyword_set(); |
778 | static const std::unordered_set<std::string> &get_illegal_func_names(); |
779 | |
780 | // Constant arrays of non-primitive types (i.e. matrices) won't link properly into Metal libraries |
781 | void declare_complex_constant_arrays(); |
782 | |
783 | bool is_patch_block(const SPIRType &type); |
784 | bool is_non_native_row_major_matrix(uint32_t id) override; |
785 | bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; |
786 | std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, |
787 | bool is_packed) override; |
788 | |
789 | void preprocess_op_codes(); |
790 | void localize_global_variables(); |
791 | void (); |
792 | void mark_packable_structs(); |
793 | void mark_as_packable(SPIRType &type); |
794 | |
795 | std::unordered_map<uint32_t, std::set<uint32_t>> function_global_vars; |
796 | void (uint32_t func_id, std::set<uint32_t> &added_arg_ids, |
797 | std::unordered_set<uint32_t> &global_var_ids, |
798 | std::unordered_set<uint32_t> &processed_func_ids); |
799 | uint32_t add_interface_block(spv::StorageClass storage, bool patch = false); |
800 | uint32_t add_interface_block_pointer(uint32_t ib_var_id, spv::StorageClass storage); |
801 | |
802 | struct InterfaceBlockMeta |
803 | { |
804 | struct LocationMeta |
805 | { |
806 | uint32_t base_type_id = 0; |
807 | uint32_t num_components = 0; |
808 | bool flat = false; |
809 | bool noperspective = false; |
810 | bool centroid = false; |
811 | bool sample = false; |
812 | }; |
813 | std::unordered_map<uint32_t, LocationMeta> location_meta; |
814 | bool strip_array = false; |
815 | bool allow_local_declaration = false; |
816 | }; |
817 | |
818 | std::string to_tesc_invocation_id(); |
819 | void emit_local_masked_variable(const SPIRVariable &masked_var, bool strip_array); |
820 | void add_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, SPIRType &ib_type, |
821 | SPIRVariable &var, InterfaceBlockMeta &meta); |
822 | void add_composite_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
823 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
824 | void add_plain_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
825 | SPIRType &ib_type, SPIRVariable &var, InterfaceBlockMeta &meta); |
826 | bool add_component_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
827 | SPIRVariable &var, const SPIRType &type, |
828 | InterfaceBlockMeta &meta); |
829 | void add_plain_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
830 | SPIRType &ib_type, SPIRVariable &var, uint32_t index, |
831 | InterfaceBlockMeta &meta); |
832 | void add_composite_member_variable_to_interface_block(spv::StorageClass storage, const std::string &ib_var_ref, |
833 | SPIRType &ib_type, SPIRVariable &var, uint32_t index, |
834 | InterfaceBlockMeta &meta); |
835 | void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); |
836 | |
837 | void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); |
838 | |
839 | void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, |
840 | spv::StorageClass storage, bool fallback = false); |
841 | uint32_t ensure_correct_builtin_type(uint32_t type_id, spv::BuiltIn builtin); |
842 | uint32_t ensure_correct_input_type(uint32_t type_id, uint32_t location, uint32_t component, |
843 | uint32_t num_components, bool strip_array); |
844 | |
845 | void emit_custom_templates(); |
846 | void emit_custom_functions(); |
847 | void emit_resources(); |
848 | void emit_specialization_constants_and_structs(); |
849 | void emit_interface_block(uint32_t ib_var_id); |
850 | bool maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs); |
851 | uint32_t get_resource_array_size(uint32_t id) const; |
852 | |
853 | void fix_up_shader_inputs_outputs(); |
854 | |
855 | std::string func_type_decl(SPIRType &type); |
856 | std::string entry_point_args_classic(bool append_comma); |
857 | std::string entry_point_args_argument_buffer(bool append_comma); |
858 | std::string entry_point_arg_stage_in(); |
859 | void entry_point_args_builtin(std::string &args); |
860 | void entry_point_args_discrete_descriptors(std::string &args); |
861 | std::string to_qualified_member_name(const SPIRType &type, uint32_t index); |
862 | std::string ensure_valid_name(std::string name, std::string pfx); |
863 | std::string to_sampler_expression(uint32_t id); |
864 | std::string to_swizzle_expression(uint32_t id); |
865 | std::string to_buffer_size_expression(uint32_t id); |
866 | bool is_sample_rate() const; |
867 | bool is_intersection_query() const; |
868 | bool is_direct_input_builtin(spv::BuiltIn builtin); |
869 | std::string builtin_qualifier(spv::BuiltIn builtin); |
870 | std::string builtin_type_decl(spv::BuiltIn builtin, uint32_t id = 0); |
871 | std::string built_in_func_arg(spv::BuiltIn builtin, bool prefix_comma); |
872 | std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); |
873 | std::string member_location_attribute_qualifier(const SPIRType &type, uint32_t index); |
874 | std::string argument_decl(const SPIRFunction::Parameter &arg); |
875 | const char *descriptor_address_space(uint32_t id, spv::StorageClass storage, const char *plain_address_space) const; |
876 | std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); |
877 | uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); |
878 | uint32_t get_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr) const; |
879 | uint32_t get_or_allocate_builtin_input_member_location(spv::BuiltIn builtin, |
880 | uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); |
881 | |
882 | uint32_t get_physical_tess_level_array_size(spv::BuiltIn builtin) const; |
883 | |
884 | // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. |
885 | // These values can change depending on various extended decorations which control packing rules. |
886 | // We need to make these rules match up with SPIR-V declared rules. |
887 | uint32_t get_declared_type_size_msl(const SPIRType &type, bool packed, bool row_major) const; |
888 | uint32_t get_declared_type_array_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
889 | uint32_t get_declared_type_matrix_stride_msl(const SPIRType &type, bool packed, bool row_major) const; |
890 | uint32_t get_declared_type_alignment_msl(const SPIRType &type, bool packed, bool row_major) const; |
891 | |
892 | uint32_t get_declared_struct_member_size_msl(const SPIRType &struct_type, uint32_t index) const; |
893 | uint32_t get_declared_struct_member_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
894 | uint32_t get_declared_struct_member_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
895 | uint32_t get_declared_struct_member_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
896 | |
897 | uint32_t get_declared_input_size_msl(const SPIRType &struct_type, uint32_t index) const; |
898 | uint32_t get_declared_input_array_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
899 | uint32_t get_declared_input_matrix_stride_msl(const SPIRType &struct_type, uint32_t index) const; |
900 | uint32_t get_declared_input_alignment_msl(const SPIRType &struct_type, uint32_t index) const; |
901 | |
902 | const SPIRType &get_physical_member_type(const SPIRType &struct_type, uint32_t index) const; |
903 | SPIRType get_presumed_input_type(const SPIRType &struct_type, uint32_t index) const; |
904 | |
905 | uint32_t get_declared_struct_size_msl(const SPIRType &struct_type, bool ignore_alignment = false, |
906 | bool ignore_padding = false) const; |
907 | |
908 | std::string to_component_argument(uint32_t id); |
909 | void align_struct(SPIRType &ib_type, std::unordered_set<uint32_t> &aligned_structs); |
910 | void mark_scalar_layout_structs(const SPIRType &ib_type); |
911 | void mark_struct_members_packed(const SPIRType &type); |
912 | void ensure_member_packing_rules_msl(SPIRType &ib_type, uint32_t index); |
913 | bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; |
914 | std::string get_argument_address_space(const SPIRVariable &argument); |
915 | std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); |
916 | const char *to_restrict(uint32_t id, bool space = true); |
917 | SPIRType &get_stage_in_struct_type(); |
918 | SPIRType &get_stage_out_struct_type(); |
919 | SPIRType &get_patch_stage_in_struct_type(); |
920 | SPIRType &get_patch_stage_out_struct_type(); |
921 | std::string get_tess_factor_struct_name(); |
922 | SPIRType &get_uint_type(); |
923 | uint32_t get_uint_type_id(); |
924 | void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, spv::Op opcode, |
925 | uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, |
926 | bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); |
927 | const char *get_memory_order(uint32_t spv_mem_sem); |
928 | void add_pragma_line(const std::string &line); |
929 | void add_typedef_line(const std::string &line); |
930 | void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); |
931 | void emit_array_copy(const std::string &lhs, uint32_t lhs_id, uint32_t rhs_id, |
932 | spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; |
933 | void build_implicit_builtins(); |
934 | uint32_t build_constant_uint_array_pointer(); |
935 | void emit_entry_point_declarations() override; |
936 | bool uses_explicit_early_fragment_test(); |
937 | |
938 | uint32_t builtin_frag_coord_id = 0; |
939 | uint32_t builtin_sample_id_id = 0; |
940 | uint32_t builtin_sample_mask_id = 0; |
941 | uint32_t builtin_vertex_idx_id = 0; |
942 | uint32_t builtin_base_vertex_id = 0; |
943 | uint32_t builtin_instance_idx_id = 0; |
944 | uint32_t builtin_base_instance_id = 0; |
945 | uint32_t builtin_view_idx_id = 0; |
946 | uint32_t builtin_layer_id = 0; |
947 | uint32_t builtin_invocation_id_id = 0; |
948 | uint32_t builtin_primitive_id_id = 0; |
949 | uint32_t builtin_subgroup_invocation_id_id = 0; |
950 | uint32_t builtin_subgroup_size_id = 0; |
951 | uint32_t builtin_dispatch_base_id = 0; |
952 | uint32_t builtin_stage_input_size_id = 0; |
953 | uint32_t builtin_local_invocation_index_id = 0; |
954 | uint32_t builtin_workgroup_size_id = 0; |
955 | uint32_t swizzle_buffer_id = 0; |
956 | uint32_t buffer_size_buffer_id = 0; |
957 | uint32_t view_mask_buffer_id = 0; |
958 | uint32_t dynamic_offsets_buffer_id = 0; |
959 | uint32_t uint_type_id = 0; |
960 | uint32_t argument_buffer_padding_buffer_type_id = 0; |
961 | uint32_t argument_buffer_padding_image_type_id = 0; |
962 | uint32_t argument_buffer_padding_sampler_type_id = 0; |
963 | |
964 | bool does_shader_write_sample_mask = false; |
965 | |
966 | void cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; |
967 | void cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; |
968 | void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; |
969 | |
970 | void analyze_sampled_image_usage(); |
971 | |
972 | bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; |
973 | void prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, |
974 | bool &is_packed) override; |
975 | void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); |
976 | bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); |
977 | bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); |
978 | bool is_out_of_bounds_tessellation_level(uint32_t id_lhs); |
979 | |
980 | void ensure_builtin(spv::StorageClass storage, spv::BuiltIn builtin); |
981 | |
982 | void mark_implicit_builtin(spv::StorageClass storage, spv::BuiltIn builtin, uint32_t id); |
983 | |
984 | std::string convert_to_f32(const std::string &expr, uint32_t components); |
985 | |
986 | Options msl_options; |
987 | std::set<SPVFuncImpl> spv_function_implementations; |
988 | // Must be ordered to ensure declarations are in a specific order. |
989 | std::map<LocationComponentPair, MSLShaderInput> inputs_by_location; |
990 | std::unordered_map<uint32_t, MSLShaderInput> inputs_by_builtin; |
991 | std::unordered_set<uint32_t> location_inputs_in_use; |
992 | std::unordered_set<uint32_t> location_inputs_in_use_fallback; |
993 | std::unordered_map<uint32_t, uint32_t> fragment_output_components; |
994 | std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location; |
995 | std::set<std::string> pragma_lines; |
996 | std::set<std::string> typedef_lines; |
997 | SmallVector<uint32_t> vars_needing_early_declaration; |
998 | |
999 | std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings; |
1000 | std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number; |
1001 | |
1002 | uint32_t next_metal_resource_index_buffer = 0; |
1003 | uint32_t next_metal_resource_index_texture = 0; |
1004 | uint32_t next_metal_resource_index_sampler = 0; |
1005 | // Intentionally uninitialized, works around MSVC 2013 bug. |
1006 | uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; |
1007 | |
1008 | VariableID stage_in_var_id = 0; |
1009 | VariableID stage_out_var_id = 0; |
1010 | VariableID patch_stage_in_var_id = 0; |
1011 | VariableID patch_stage_out_var_id = 0; |
1012 | VariableID stage_in_ptr_var_id = 0; |
1013 | VariableID stage_out_ptr_var_id = 0; |
1014 | VariableID stage_out_masked_builtin_type_id = 0; |
1015 | |
1016 | // Handle HLSL-style 0-based vertex/instance index. |
1017 | enum class TriState |
1018 | { |
1019 | Neutral, |
1020 | No, |
1021 | Yes |
1022 | }; |
1023 | TriState needs_base_vertex_arg = TriState::Neutral; |
1024 | TriState needs_base_instance_arg = TriState::Neutral; |
1025 | |
1026 | bool has_sampled_images = false; |
1027 | bool builtin_declaration = false; // Handle HLSL-style 0-based vertex/instance index. |
1028 | |
1029 | bool is_using_builtin_array = false; // Force the use of C style array declaration. |
1030 | bool using_builtin_array() const; |
1031 | |
1032 | bool is_rasterization_disabled = false; |
1033 | bool capture_output_to_buffer = false; |
1034 | bool needs_swizzle_buffer_def = false; |
1035 | bool used_swizzle_buffer = false; |
1036 | bool added_builtin_tess_level = false; |
1037 | bool needs_subgroup_invocation_id = false; |
1038 | bool needs_subgroup_size = false; |
1039 | bool needs_sample_id = false; |
1040 | std::string qual_pos_var_name; |
1041 | std::string stage_in_var_name = "in" ; |
1042 | std::string stage_out_var_name = "out" ; |
1043 | std::string patch_stage_in_var_name = "patchIn" ; |
1044 | std::string patch_stage_out_var_name = "patchOut" ; |
1045 | std::string sampler_name_suffix = "Smplr" ; |
1046 | std::string swizzle_name_suffix = "Swzl" ; |
1047 | std::string buffer_size_name_suffix = "BufferSize" ; |
1048 | std::string plane_name_suffix = "Plane" ; |
1049 | std::string input_wg_var_name = "gl_in" ; |
1050 | std::string input_buffer_var_name = "spvIn" ; |
1051 | std::string output_buffer_var_name = "spvOut" ; |
1052 | std::string patch_output_buffer_var_name = "spvPatchOut" ; |
1053 | std::string tess_factor_buffer_var_name = "spvTessLevel" ; |
1054 | std::string index_buffer_var_name = "spvIndices" ; |
1055 | spv::Op previous_instruction_opcode = spv::OpNop; |
1056 | |
1057 | // Must be ordered since declaration is in a specific order. |
1058 | std::map<uint32_t, MSLConstexprSampler> constexpr_samplers_by_id; |
1059 | std::unordered_map<SetBindingPair, MSLConstexprSampler, InternalHasher> constexpr_samplers_by_binding; |
1060 | const MSLConstexprSampler *find_constexpr_sampler(uint32_t id) const; |
1061 | |
1062 | std::unordered_set<uint32_t> buffers_requiring_array_length; |
1063 | SmallVector<uint32_t> buffer_arrays; |
1064 | std::unordered_set<uint32_t> atomic_image_vars; // Emulate texture2D atomic operations |
1065 | std::unordered_set<uint32_t> pull_model_inputs; |
1066 | |
1067 | // Must be ordered since array is in a specific order. |
1068 | std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset; |
1069 | |
1070 | SmallVector<uint32_t> disabled_frag_outputs; |
1071 | |
1072 | std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks; |
1073 | |
1074 | uint32_t argument_buffer_ids[kMaxArgumentBuffers]; |
1075 | uint32_t argument_buffer_discrete_mask = 0; |
1076 | uint32_t argument_buffer_device_storage_mask = 0; |
1077 | |
1078 | void analyze_argument_buffers(); |
1079 | bool descriptor_set_is_argument_buffer(uint32_t desc_set) const; |
1080 | MSLResourceBinding &get_argument_buffer_resource(uint32_t desc_set, uint32_t arg_idx); |
1081 | void add_argument_buffer_padding_buffer_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1082 | void add_argument_buffer_padding_image_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1083 | void add_argument_buffer_padding_sampler_type(SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, MSLResourceBinding &rez_bind); |
1084 | void add_argument_buffer_padding_type(uint32_t mbr_type_id, SPIRType &struct_type, uint32_t &mbr_idx, uint32_t &arg_buff_index, uint32_t count); |
1085 | |
1086 | uint32_t get_target_components_for_fragment_location(uint32_t location) const; |
1087 | uint32_t build_extended_vector_type(uint32_t type_id, uint32_t components, |
1088 | SPIRType::BaseType basetype = SPIRType::Unknown); |
1089 | uint32_t build_msl_interpolant_type(uint32_t type_id, bool is_noperspective); |
1090 | |
1091 | bool suppress_missing_prototypes = false; |
1092 | |
1093 | void add_spv_func_and_recompile(SPVFuncImpl spv_func); |
1094 | |
1095 | void activate_argument_buffer_resources(); |
1096 | |
1097 | bool type_is_msl_framebuffer_fetch(const SPIRType &type) const; |
1098 | bool type_is_pointer(const SPIRType &type) const; |
1099 | bool type_is_pointer_to_pointer(const SPIRType &type) const; |
1100 | bool is_supported_argument_buffer_type(const SPIRType &type) const; |
1101 | |
1102 | bool variable_storage_requires_stage_io(spv::StorageClass storage) const; |
1103 | |
1104 | bool has_additional_fixed_sample_mask() const { return msl_options.additional_fixed_sample_mask != 0xffffffff; } |
1105 | std::string additional_fixed_sample_mask_str() const; |
1106 | |
1107 | // OpcodeHandler that handles several MSL preprocessing operations. |
1108 | struct OpCodePreprocessor : OpcodeHandler |
1109 | { |
1110 | OpCodePreprocessor(CompilerMSL &compiler_) |
1111 | : compiler(compiler_) |
1112 | { |
1113 | } |
1114 | |
1115 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; |
1116 | CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); |
1117 | void check_resource_write(uint32_t var_id); |
1118 | |
1119 | CompilerMSL &compiler; |
1120 | std::unordered_map<uint32_t, uint32_t> result_types; |
1121 | std::unordered_map<uint32_t, uint32_t> image_pointers; // Emulate texture2D atomic operations |
1122 | bool suppress_missing_prototypes = false; |
1123 | bool uses_atomics = false; |
1124 | bool uses_resource_write = false; |
1125 | bool needs_subgroup_invocation_id = false; |
1126 | bool needs_subgroup_size = false; |
1127 | bool needs_sample_id = false; |
1128 | }; |
1129 | |
1130 | // OpcodeHandler that scans for uses of sampled images |
1131 | struct SampledImageScanner : OpcodeHandler |
1132 | { |
1133 | SampledImageScanner(CompilerMSL &compiler_) |
1134 | : compiler(compiler_) |
1135 | { |
1136 | } |
1137 | |
1138 | bool handle(spv::Op opcode, const uint32_t *args, uint32_t) override; |
1139 | |
1140 | CompilerMSL &compiler; |
1141 | }; |
1142 | |
1143 | // Sorts the members of a SPIRType and associated Meta info based on a settable sorting |
1144 | // aspect, which defines which aspect of the struct members will be used to sort them. |
1145 | // Regardless of the sorting aspect, built-in members always appear at the end of the struct. |
1146 | struct MemberSorter |
1147 | { |
1148 | enum SortAspect |
1149 | { |
1150 | LocationThenBuiltInType, |
1151 | Offset |
1152 | }; |
1153 | |
1154 | void sort(); |
1155 | bool operator()(uint32_t mbr_idx1, uint32_t mbr_idx2); |
1156 | MemberSorter(SPIRType &t, Meta &m, SortAspect sa); |
1157 | |
1158 | SPIRType &type; |
1159 | Meta &meta; |
1160 | SortAspect sort_aspect; |
1161 | }; |
1162 | }; |
1163 | } // namespace SPIRV_CROSS_NAMESPACE |
1164 | |
1165 | #endif |
1166 | |