1/*
2 * Copyright 2015-2021 Arm Limited
3 * SPDX-License-Identifier: Apache-2.0 OR MIT
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * At your option, you may choose to accept this material under either:
20 * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21 * 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22 */
23
24#include "spirv_glsl.hpp"
25#include "GLSL.std.450.h"
26#include "spirv_common.hpp"
27#include <algorithm>
28#include <assert.h>
29#include <cmath>
30#include <limits>
31#include <locale.h>
32#include <utility>
33
34#ifndef _WIN32
35#include <langinfo.h>
36#endif
37#include <locale.h>
38
39using namespace spv;
40using namespace SPIRV_CROSS_NAMESPACE;
41using namespace std;
42
43enum ExtraSubExpressionType
44{
45 // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
46 EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000,
47 EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000
48};
49
50static bool is_unsigned_opcode(Op op)
51{
52 // Don't have to be exhaustive, only relevant for legacy target checking ...
53 switch (op)
54 {
55 case OpShiftRightLogical:
56 case OpUGreaterThan:
57 case OpUGreaterThanEqual:
58 case OpULessThan:
59 case OpULessThanEqual:
60 case OpUConvert:
61 case OpUDiv:
62 case OpUMod:
63 case OpUMulExtended:
64 case OpConvertUToF:
65 case OpConvertFToU:
66 return true;
67
68 default:
69 return false;
70 }
71}
72
73static bool is_unsigned_glsl_opcode(GLSLstd450 op)
74{
75 // Don't have to be exhaustive, only relevant for legacy target checking ...
76 switch (op)
77 {
78 case GLSLstd450UClamp:
79 case GLSLstd450UMin:
80 case GLSLstd450UMax:
81 case GLSLstd450FindUMsb:
82 return true;
83
84 default:
85 return false;
86 }
87}
88
89static bool packing_is_vec4_padded(BufferPackingStandard packing)
90{
91 switch (packing)
92 {
93 case BufferPackingHLSLCbuffer:
94 case BufferPackingHLSLCbufferPackOffset:
95 case BufferPackingStd140:
96 case BufferPackingStd140EnhancedLayout:
97 return true;
98
99 default:
100 return false;
101 }
102}
103
104static bool packing_is_hlsl(BufferPackingStandard packing)
105{
106 switch (packing)
107 {
108 case BufferPackingHLSLCbuffer:
109 case BufferPackingHLSLCbufferPackOffset:
110 return true;
111
112 default:
113 return false;
114 }
115}
116
117static bool packing_has_flexible_offset(BufferPackingStandard packing)
118{
119 switch (packing)
120 {
121 case BufferPackingStd140:
122 case BufferPackingStd430:
123 case BufferPackingScalar:
124 case BufferPackingHLSLCbuffer:
125 return false;
126
127 default:
128 return true;
129 }
130}
131
132static bool packing_is_scalar(BufferPackingStandard packing)
133{
134 switch (packing)
135 {
136 case BufferPackingScalar:
137 case BufferPackingScalarEnhancedLayout:
138 return true;
139
140 default:
141 return false;
142 }
143}
144
145static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
146{
147 switch (packing)
148 {
149 case BufferPackingStd140EnhancedLayout:
150 return BufferPackingStd140;
151 case BufferPackingStd430EnhancedLayout:
152 return BufferPackingStd430;
153 case BufferPackingHLSLCbufferPackOffset:
154 return BufferPackingHLSLCbuffer;
155 case BufferPackingScalarEnhancedLayout:
156 return BufferPackingScalar;
157 default:
158 return packing;
159 }
160}
161
162void CompilerGLSL::init()
163{
164 if (ir.source.known)
165 {
166 options.es = ir.source.es;
167 options.version = ir.source.version;
168 }
169
170 // Query the locale to see what the decimal point is.
171 // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
172 // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
173 // tricky.
174#ifdef _WIN32
175 // On Windows, localeconv uses thread-local storage, so it should be fine.
176 const struct lconv *conv = localeconv();
177 if (conv && conv->decimal_point)
178 current_locale_radix_character = *conv->decimal_point;
179#elif defined(__ANDROID__) && __ANDROID_API__ < 26
180 // nl_langinfo is not supported on this platform, fall back to the worse alternative.
181 const struct lconv *conv = localeconv();
182 if (conv && conv->decimal_point)
183 current_locale_radix_character = *conv->decimal_point;
184#else
185 // localeconv, the portable function is not MT safe ...
186 const char *decimal_point = nl_langinfo(RADIXCHAR);
187 if (decimal_point && *decimal_point != '\0')
188 current_locale_radix_character = *decimal_point;
189#endif
190}
191
192static const char *to_pls_layout(PlsFormat format)
193{
194 switch (format)
195 {
196 case PlsR11FG11FB10F:
197 return "layout(r11f_g11f_b10f) ";
198 case PlsR32F:
199 return "layout(r32f) ";
200 case PlsRG16F:
201 return "layout(rg16f) ";
202 case PlsRGB10A2:
203 return "layout(rgb10_a2) ";
204 case PlsRGBA8:
205 return "layout(rgba8) ";
206 case PlsRG16:
207 return "layout(rg16) ";
208 case PlsRGBA8I:
209 return "layout(rgba8i)";
210 case PlsRG16I:
211 return "layout(rg16i) ";
212 case PlsRGB10A2UI:
213 return "layout(rgb10_a2ui) ";
214 case PlsRGBA8UI:
215 return "layout(rgba8ui) ";
216 case PlsRG16UI:
217 return "layout(rg16ui) ";
218 case PlsR32UI:
219 return "layout(r32ui) ";
220 default:
221 return "";
222 }
223}
224
225static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
226{
227 switch (format)
228 {
229 default:
230 case PlsR11FG11FB10F:
231 case PlsR32F:
232 case PlsRG16F:
233 case PlsRGB10A2:
234 case PlsRGBA8:
235 case PlsRG16:
236 return SPIRType::Float;
237
238 case PlsRGBA8I:
239 case PlsRG16I:
240 return SPIRType::Int;
241
242 case PlsRGB10A2UI:
243 case PlsRGBA8UI:
244 case PlsRG16UI:
245 case PlsR32UI:
246 return SPIRType::UInt;
247 }
248}
249
250static uint32_t pls_format_to_components(PlsFormat format)
251{
252 switch (format)
253 {
254 default:
255 case PlsR32F:
256 case PlsR32UI:
257 return 1;
258
259 case PlsRG16F:
260 case PlsRG16:
261 case PlsRG16UI:
262 case PlsRG16I:
263 return 2;
264
265 case PlsR11FG11FB10F:
266 return 3;
267
268 case PlsRGB10A2:
269 case PlsRGBA8:
270 case PlsRGBA8I:
271 case PlsRGB10A2UI:
272 case PlsRGBA8UI:
273 return 4;
274 }
275}
276
277const char *CompilerGLSL::vector_swizzle(int vecsize, int index)
278{
279 static const char *const swizzle[4][4] = {
280 { ".x", ".y", ".z", ".w" },
281 { ".xy", ".yz", ".zw", nullptr },
282 { ".xyz", ".yzw", nullptr, nullptr },
283#if defined(__GNUC__) && (__GNUC__ == 9)
284 // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
285 // This array ends up being compiled as all nullptrs, tripping the assertions below.
286 { "", nullptr, nullptr, "$" },
287#else
288 { "", nullptr, nullptr, nullptr },
289#endif
290 };
291
292 assert(vecsize >= 1 && vecsize <= 4);
293 assert(index >= 0 && index < 4);
294 assert(swizzle[vecsize - 1][index]);
295
296 return swizzle[vecsize - 1][index];
297}
298
299void CompilerGLSL::reset(uint32_t iteration_count)
300{
301 // Sanity check the iteration count to be robust against a certain class of bugs where
302 // we keep forcing recompilations without making clear forward progress.
303 // In buggy situations we will loop forever, or loop for an unbounded number of iterations.
304 // Certain types of recompilations are considered to make forward progress,
305 // but in almost all situations, we'll never see more than 3 iterations.
306 // It is highly context-sensitive when we need to force recompilation,
307 // and it is not practical with the current architecture
308 // to resolve everything up front.
309 if (iteration_count >= 3 && !is_force_recompile_forward_progress)
310 SPIRV_CROSS_THROW("Over 3 compilation loops detected and no forward progress was made. Must be a bug!");
311
312 // We do some speculative optimizations which should pretty much always work out,
313 // but just in case the SPIR-V is rather weird, recompile until it's happy.
314 // This typically only means one extra pass.
315 clear_force_recompile();
316
317 // Clear invalid expression tracking.
318 invalid_expressions.clear();
319 current_function = nullptr;
320
321 // Clear temporary usage tracking.
322 expression_usage_counts.clear();
323 forwarded_temporaries.clear();
324 suppressed_usage_tracking.clear();
325
326 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
327 flushed_phi_variables.clear();
328
329 reset_name_caches();
330
331 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
332 func.active = false;
333 func.flush_undeclared = true;
334 });
335
336 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
337
338 ir.reset_all_of_type<SPIRExpression>();
339 ir.reset_all_of_type<SPIRAccessChain>();
340
341 statement_count = 0;
342 indent = 0;
343 current_loop_level = 0;
344}
345
346void CompilerGLSL::remap_pls_variables()
347{
348 for (auto &input : pls_inputs)
349 {
350 auto &var = get<SPIRVariable>(input.id);
351
352 bool input_is_target = false;
353 if (var.storage == StorageClassUniformConstant)
354 {
355 auto &type = get<SPIRType>(var.basetype);
356 input_is_target = type.image.dim == DimSubpassData;
357 }
358
359 if (var.storage != StorageClassInput && !input_is_target)
360 SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
361 var.remapped_variable = true;
362 }
363
364 for (auto &output : pls_outputs)
365 {
366 auto &var = get<SPIRVariable>(output.id);
367 if (var.storage != StorageClassOutput)
368 SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
369 var.remapped_variable = true;
370 }
371}
372
373void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
374{
375 subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
376 inout_color_attachments.push_back({ color_location, coherent });
377}
378
379bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
380{
381 return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
382 [&](const std::pair<uint32_t, bool> &elem) {
383 return elem.first == location;
384 }) != end(inout_color_attachments);
385}
386
387bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
388{
389 return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
390 [&](const std::pair<uint32_t, bool> &elem) {
391 return elem.first == location && !elem.second;
392 }) != end(inout_color_attachments);
393}
394
395void CompilerGLSL::find_static_extensions()
396{
397 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
398 if (type.basetype == SPIRType::Double)
399 {
400 if (options.es)
401 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
402 if (!options.es && options.version < 400)
403 require_extension_internal("GL_ARB_gpu_shader_fp64");
404 }
405 else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
406 {
407 if (options.es)
408 SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
409 if (!options.es)
410 require_extension_internal("GL_ARB_gpu_shader_int64");
411 }
412 else if (type.basetype == SPIRType::Half)
413 {
414 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
415 if (options.vulkan_semantics)
416 require_extension_internal("GL_EXT_shader_16bit_storage");
417 }
418 else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte)
419 {
420 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
421 if (options.vulkan_semantics)
422 require_extension_internal("GL_EXT_shader_8bit_storage");
423 }
424 else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort)
425 {
426 require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
427 if (options.vulkan_semantics)
428 require_extension_internal("GL_EXT_shader_16bit_storage");
429 }
430 });
431
432 auto &execution = get_entry_point();
433 switch (execution.model)
434 {
435 case ExecutionModelGLCompute:
436 if (!options.es && options.version < 430)
437 require_extension_internal("GL_ARB_compute_shader");
438 if (options.es && options.version < 310)
439 SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
440 break;
441
442 case ExecutionModelGeometry:
443 if (options.es && options.version < 320)
444 require_extension_internal("GL_EXT_geometry_shader");
445 if (!options.es && options.version < 150)
446 require_extension_internal("GL_ARB_geometry_shader4");
447
448 if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
449 {
450 // Instanced GS is part of 400 core or this extension.
451 if (!options.es && options.version < 400)
452 require_extension_internal("GL_ARB_gpu_shader5");
453 }
454 break;
455
456 case ExecutionModelTessellationEvaluation:
457 case ExecutionModelTessellationControl:
458 if (options.es && options.version < 320)
459 require_extension_internal("GL_EXT_tessellation_shader");
460 if (!options.es && options.version < 400)
461 require_extension_internal("GL_ARB_tessellation_shader");
462 break;
463
464 case ExecutionModelRayGenerationKHR:
465 case ExecutionModelIntersectionKHR:
466 case ExecutionModelAnyHitKHR:
467 case ExecutionModelClosestHitKHR:
468 case ExecutionModelMissKHR:
469 case ExecutionModelCallableKHR:
470 // NV enums are aliases.
471 if (options.es || options.version < 460)
472 SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
473 if (!options.vulkan_semantics)
474 SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
475
476 // Need to figure out if we should target KHR or NV extension based on capabilities.
477 for (auto &cap : ir.declared_capabilities)
478 {
479 if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR ||
480 cap == CapabilityRayTraversalPrimitiveCullingKHR)
481 {
482 ray_tracing_is_khr = true;
483 break;
484 }
485 }
486
487 if (ray_tracing_is_khr)
488 {
489 // In KHR ray tracing we pass payloads by pointer instead of location,
490 // so make sure we assign locations properly.
491 ray_tracing_khr_fixup_locations();
492 require_extension_internal("GL_EXT_ray_tracing");
493 }
494 else
495 require_extension_internal("GL_NV_ray_tracing");
496 break;
497
498 default:
499 break;
500 }
501
502 if (!pls_inputs.empty() || !pls_outputs.empty())
503 {
504 if (execution.model != ExecutionModelFragment)
505 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
506 require_extension_internal("GL_EXT_shader_pixel_local_storage");
507 }
508
509 if (!inout_color_attachments.empty())
510 {
511 if (execution.model != ExecutionModelFragment)
512 SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
513 if (options.vulkan_semantics)
514 SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
515
516 bool has_coherent = false;
517 bool has_incoherent = false;
518
519 for (auto &att : inout_color_attachments)
520 {
521 if (att.second)
522 has_coherent = true;
523 else
524 has_incoherent = true;
525 }
526
527 if (has_coherent)
528 require_extension_internal("GL_EXT_shader_framebuffer_fetch");
529 if (has_incoherent)
530 require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
531 }
532
533 if (options.separate_shader_objects && !options.es && options.version < 410)
534 require_extension_internal("GL_ARB_separate_shader_objects");
535
536 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
537 {
538 if (!options.vulkan_semantics)
539 SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
540 if (options.es && options.version < 320)
541 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
542 else if (!options.es && options.version < 450)
543 SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
544 require_extension_internal("GL_EXT_buffer_reference");
545 }
546 else if (ir.addressing_model != AddressingModelLogical)
547 {
548 SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
549 }
550
551 // Check for nonuniform qualifier and passthrough.
552 // Instead of looping over all decorations to find this, just look at capabilities.
553 for (auto &cap : ir.declared_capabilities)
554 {
555 switch (cap)
556 {
557 case CapabilityShaderNonUniformEXT:
558 if (!options.vulkan_semantics)
559 require_extension_internal("GL_NV_gpu_shader5");
560 else
561 require_extension_internal("GL_EXT_nonuniform_qualifier");
562 break;
563 case CapabilityRuntimeDescriptorArrayEXT:
564 if (!options.vulkan_semantics)
565 SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
566 require_extension_internal("GL_EXT_nonuniform_qualifier");
567 break;
568
569 case CapabilityGeometryShaderPassthroughNV:
570 if (execution.model == ExecutionModelGeometry)
571 {
572 require_extension_internal("GL_NV_geometry_shader_passthrough");
573 execution.geometry_passthrough = true;
574 }
575 break;
576
577 case CapabilityVariablePointers:
578 case CapabilityVariablePointersStorageBuffer:
579 SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
580
581 case CapabilityMultiView:
582 if (options.vulkan_semantics)
583 require_extension_internal("GL_EXT_multiview");
584 else
585 {
586 require_extension_internal("GL_OVR_multiview2");
587 if (options.ovr_multiview_view_count == 0)
588 SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
589 if (get_execution_model() != ExecutionModelVertex)
590 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
591 }
592 break;
593
594 case CapabilityRayQueryKHR:
595 if (options.es || options.version < 460 || !options.vulkan_semantics)
596 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
597 require_extension_internal("GL_EXT_ray_query");
598 ray_tracing_is_khr = true;
599 break;
600
601 case CapabilityRayTraversalPrimitiveCullingKHR:
602 if (options.es || options.version < 460 || !options.vulkan_semantics)
603 SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
604 require_extension_internal("GL_EXT_ray_flags_primitive_culling");
605 ray_tracing_is_khr = true;
606 break;
607
608 default:
609 break;
610 }
611 }
612
613 if (options.ovr_multiview_view_count)
614 {
615 if (options.vulkan_semantics)
616 SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
617 if (get_execution_model() != ExecutionModelVertex)
618 SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
619 require_extension_internal("GL_OVR_multiview2");
620 }
621}
622
623void CompilerGLSL::ray_tracing_khr_fixup_locations()
624{
625 uint32_t location = 0;
626 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
627 // Incoming payload storage can also be used for tracing.
628 if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
629 var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
630 return;
631 if (is_hidden_variable(var))
632 return;
633 set_decoration(var.self, DecorationLocation, location++);
634 });
635}
636
637string CompilerGLSL::compile()
638{
639 ir.fixup_reserved_names();
640
641 if (!options.vulkan_semantics)
642 {
643 // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
644 backend.nonuniform_qualifier = "";
645 backend.needs_row_major_load_workaround = true;
646 }
647 backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
648 backend.force_gl_in_out_block = true;
649 backend.supports_extensions = true;
650 backend.use_array_constructor = true;
651 backend.workgroup_size_is_hidden = true;
652
653 backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320);
654
655 if (is_legacy_es())
656 backend.support_case_fallthrough = false;
657
658 // Scan the SPIR-V to find trivial uses of extensions.
659 fixup_type_alias();
660 reorder_type_alias();
661 build_function_control_flow_graphs_and_analyze();
662 find_static_extensions();
663 fixup_image_load_store_access();
664 update_active_builtins();
665 analyze_image_and_sampler_usage();
666 analyze_interlocked_resource_usage();
667 if (!inout_color_attachments.empty())
668 emit_inout_fragment_outputs_copy_to_subpass_inputs();
669
670 // Shaders might cast unrelated data to pointers of non-block types.
671 // Find all such instances and make sure we can cast the pointers to a synthesized block type.
672 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
673 analyze_non_block_pointer_types();
674
675 uint32_t pass_count = 0;
676 do
677 {
678 reset(pass_count);
679
680 buffer.reset();
681
682 emit_header();
683 emit_resources();
684 emit_extension_workarounds(get_execution_model());
685
686 emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset());
687
688 pass_count++;
689 } while (is_forcing_recompilation());
690
691 // Implement the interlocked wrapper function at the end.
692 // The body was implemented in lieu of main().
693 if (interlocked_is_complex)
694 {
695 statement("void main()");
696 begin_scope();
697 statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
698 statement("SPIRV_Cross_beginInvocationInterlock();");
699 statement("spvMainInterlockedBody();");
700 statement("SPIRV_Cross_endInvocationInterlock();");
701 end_scope();
702 }
703
704 // Entry point in GLSL is always main().
705 get_entry_point().name = "main";
706
707 return buffer.str();
708}
709
710std::string CompilerGLSL::get_partial_source()
711{
712 return buffer.str();
713}
714
715void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
716 const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
717{
718 auto &execution = get_entry_point();
719 bool builtin_workgroup = execution.workgroup_size.constant != 0;
720 bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId);
721
722 if (wg_x.id)
723 {
724 if (options.vulkan_semantics)
725 arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
726 else
727 arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
728 }
729 else if (use_local_size_id && execution.workgroup_size.id_x)
730 arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar()));
731 else
732 arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
733
734 if (wg_y.id)
735 {
736 if (options.vulkan_semantics)
737 arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
738 else
739 arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
740 }
741 else if (use_local_size_id && execution.workgroup_size.id_y)
742 arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar()));
743 else
744 arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
745
746 if (wg_z.id)
747 {
748 if (options.vulkan_semantics)
749 arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
750 else
751 arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
752 }
753 else if (use_local_size_id && execution.workgroup_size.id_z)
754 arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar()));
755 else
756 arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
757}
758
759void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
760{
761 if (options.vulkan_semantics)
762 {
763 auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
764 require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
765 }
766 else
767 {
768 if (!shader_subgroup_supporter.is_feature_requested(feature))
769 force_recompile();
770 shader_subgroup_supporter.request_feature(feature);
771 }
772}
773
774void CompilerGLSL::emit_header()
775{
776 auto &execution = get_entry_point();
777 statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
778
779 if (!options.es && options.version < 420)
780 {
781 // Needed for binding = # on UBOs, etc.
782 if (options.enable_420pack_extension)
783 {
784 statement("#ifdef GL_ARB_shading_language_420pack");
785 statement("#extension GL_ARB_shading_language_420pack : require");
786 statement("#endif");
787 }
788 // Needed for: layout(early_fragment_tests) in;
789 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
790 require_extension_internal("GL_ARB_shader_image_load_store");
791 }
792
793 // Needed for: layout(post_depth_coverage) in;
794 if (execution.flags.get(ExecutionModePostDepthCoverage))
795 require_extension_internal("GL_ARB_post_depth_coverage");
796
797 // Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
798 bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
799 execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
800 execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
801 execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT);
802
803 if (interlock_used)
804 {
805 if (options.es)
806 {
807 if (options.version < 310)
808 SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
809 require_extension_internal("GL_NV_fragment_shader_interlock");
810 }
811 else
812 {
813 if (options.version < 420)
814 require_extension_internal("GL_ARB_shader_image_load_store");
815 require_extension_internal("GL_ARB_fragment_shader_interlock");
816 }
817 }
818
819 for (auto &ext : forced_extensions)
820 {
821 if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
822 {
823 // Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
824 // GL_AMD_gpu_shader_half_float is a superset, so try that first.
825 statement("#if defined(GL_AMD_gpu_shader_half_float)");
826 statement("#extension GL_AMD_gpu_shader_half_float : require");
827 if (!options.vulkan_semantics)
828 {
829 statement("#elif defined(GL_NV_gpu_shader5)");
830 statement("#extension GL_NV_gpu_shader5 : require");
831 }
832 else
833 {
834 statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
835 statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
836 }
837 statement("#else");
838 statement("#error No extension available for FP16.");
839 statement("#endif");
840 }
841 else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
842 {
843 if (options.vulkan_semantics)
844 statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
845 else
846 {
847 statement("#if defined(GL_AMD_gpu_shader_int16)");
848 statement("#extension GL_AMD_gpu_shader_int16 : require");
849 statement("#elif defined(GL_NV_gpu_shader5)");
850 statement("#extension GL_NV_gpu_shader5 : require");
851 statement("#else");
852 statement("#error No extension available for Int16.");
853 statement("#endif");
854 }
855 }
856 else if (ext == "GL_ARB_post_depth_coverage")
857 {
858 if (options.es)
859 statement("#extension GL_EXT_post_depth_coverage : require");
860 else
861 {
862 statement("#if defined(GL_ARB_post_depth_coverge)");
863 statement("#extension GL_ARB_post_depth_coverage : require");
864 statement("#else");
865 statement("#extension GL_EXT_post_depth_coverage : require");
866 statement("#endif");
867 }
868 }
869 else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
870 {
871 // Soft-enable this extension on plain GLSL.
872 statement("#ifdef ", ext);
873 statement("#extension ", ext, " : enable");
874 statement("#endif");
875 }
876 else if (ext == "GL_EXT_control_flow_attributes")
877 {
878 // These are just hints so we can conditionally enable and fallback in the shader.
879 statement("#if defined(GL_EXT_control_flow_attributes)");
880 statement("#extension GL_EXT_control_flow_attributes : require");
881 statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
882 statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
883 statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
884 statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
885 statement("#else");
886 statement("#define SPIRV_CROSS_FLATTEN");
887 statement("#define SPIRV_CROSS_BRANCH");
888 statement("#define SPIRV_CROSS_UNROLL");
889 statement("#define SPIRV_CROSS_LOOP");
890 statement("#endif");
891 }
892 else if (ext == "GL_NV_fragment_shader_interlock")
893 {
894 statement("#extension GL_NV_fragment_shader_interlock : require");
895 statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
896 statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
897 }
898 else if (ext == "GL_ARB_fragment_shader_interlock")
899 {
900 statement("#ifdef GL_ARB_fragment_shader_interlock");
901 statement("#extension GL_ARB_fragment_shader_interlock : enable");
902 statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
903 statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
904 statement("#elif defined(GL_INTEL_fragment_shader_ordering)");
905 statement("#extension GL_INTEL_fragment_shader_ordering : enable");
906 statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
907 statement("#define SPIRV_Cross_endInvocationInterlock()");
908 statement("#endif");
909 }
910 else
911 statement("#extension ", ext, " : require");
912 }
913
914 if (!options.vulkan_semantics)
915 {
916 using Supp = ShaderSubgroupSupportHelper;
917 auto result = shader_subgroup_supporter.resolve();
918
919 for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++)
920 {
921 auto feature = static_cast<Supp::Feature>(feature_index);
922 if (!shader_subgroup_supporter.is_feature_requested(feature))
923 continue;
924
925 auto exts = Supp::get_candidates_for_feature(feature, result);
926 if (exts.empty())
927 continue;
928
929 statement("");
930
931 for (auto &ext : exts)
932 {
933 const char *name = Supp::get_extension_name(ext);
934 const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
935 auto extra_names = Supp::get_extra_required_extension_names(ext);
936 statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
937 (*extra_predicate != '\0' ? " && " : ""), extra_predicate);
938 for (const auto &e : extra_names)
939 statement("#extension ", e, " : enable");
940 statement("#extension ", name, " : require");
941 }
942
943 if (!Supp::can_feature_be_implemented_without_extensions(feature))
944 {
945 statement("#else");
946 statement("#error No extensions available to emulate requested subgroup feature.");
947 }
948
949 statement("#endif");
950 }
951 }
952
953 for (auto &header : header_lines)
954 statement(header);
955
956 SmallVector<string> inputs;
957 SmallVector<string> outputs;
958
959 switch (execution.model)
960 {
961 case ExecutionModelVertex:
962 if (options.ovr_multiview_view_count)
963 inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
964 break;
965 case ExecutionModelGeometry:
966 if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
967 inputs.push_back(join("invocations = ", execution.invocations));
968 if (execution.flags.get(ExecutionModeInputPoints))
969 inputs.push_back("points");
970 if (execution.flags.get(ExecutionModeInputLines))
971 inputs.push_back("lines");
972 if (execution.flags.get(ExecutionModeInputLinesAdjacency))
973 inputs.push_back("lines_adjacency");
974 if (execution.flags.get(ExecutionModeTriangles))
975 inputs.push_back("triangles");
976 if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
977 inputs.push_back("triangles_adjacency");
978
979 if (!execution.geometry_passthrough)
980 {
981 // For passthrough, these are implies and cannot be declared in shader.
982 outputs.push_back(join("max_vertices = ", execution.output_vertices));
983 if (execution.flags.get(ExecutionModeOutputTriangleStrip))
984 outputs.push_back("triangle_strip");
985 if (execution.flags.get(ExecutionModeOutputPoints))
986 outputs.push_back("points");
987 if (execution.flags.get(ExecutionModeOutputLineStrip))
988 outputs.push_back("line_strip");
989 }
990 break;
991
992 case ExecutionModelTessellationControl:
993 if (execution.flags.get(ExecutionModeOutputVertices))
994 outputs.push_back(join("vertices = ", execution.output_vertices));
995 break;
996
997 case ExecutionModelTessellationEvaluation:
998 if (execution.flags.get(ExecutionModeQuads))
999 inputs.push_back("quads");
1000 if (execution.flags.get(ExecutionModeTriangles))
1001 inputs.push_back("triangles");
1002 if (execution.flags.get(ExecutionModeIsolines))
1003 inputs.push_back("isolines");
1004 if (execution.flags.get(ExecutionModePointMode))
1005 inputs.push_back("point_mode");
1006
1007 if (!execution.flags.get(ExecutionModeIsolines))
1008 {
1009 if (execution.flags.get(ExecutionModeVertexOrderCw))
1010 inputs.push_back("cw");
1011 if (execution.flags.get(ExecutionModeVertexOrderCcw))
1012 inputs.push_back("ccw");
1013 }
1014
1015 if (execution.flags.get(ExecutionModeSpacingFractionalEven))
1016 inputs.push_back("fractional_even_spacing");
1017 if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
1018 inputs.push_back("fractional_odd_spacing");
1019 if (execution.flags.get(ExecutionModeSpacingEqual))
1020 inputs.push_back("equal_spacing");
1021 break;
1022
1023 case ExecutionModelGLCompute:
1024 {
1025 if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))
1026 {
1027 SpecializationConstant wg_x, wg_y, wg_z;
1028 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
1029
1030 // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1031 // declarations before we can emit the work group size.
1032 if (options.vulkan_semantics ||
1033 ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0))))
1034 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
1035 }
1036 else
1037 {
1038 inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
1039 inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
1040 inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
1041 }
1042 break;
1043 }
1044
1045 case ExecutionModelFragment:
1046 if (options.es)
1047 {
1048 switch (options.fragment.default_float_precision)
1049 {
1050 case Options::Lowp:
1051 statement("precision lowp float;");
1052 break;
1053
1054 case Options::Mediump:
1055 statement("precision mediump float;");
1056 break;
1057
1058 case Options::Highp:
1059 statement("precision highp float;");
1060 break;
1061
1062 default:
1063 break;
1064 }
1065
1066 switch (options.fragment.default_int_precision)
1067 {
1068 case Options::Lowp:
1069 statement("precision lowp int;");
1070 break;
1071
1072 case Options::Mediump:
1073 statement("precision mediump int;");
1074 break;
1075
1076 case Options::Highp:
1077 statement("precision highp int;");
1078 break;
1079
1080 default:
1081 break;
1082 }
1083 }
1084
1085 if (execution.flags.get(ExecutionModeEarlyFragmentTests))
1086 inputs.push_back("early_fragment_tests");
1087 if (execution.flags.get(ExecutionModePostDepthCoverage))
1088 inputs.push_back("post_depth_coverage");
1089
1090 if (interlock_used)
1091 statement("#if defined(GL_ARB_fragment_shader_interlock)");
1092
1093 if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
1094 statement("layout(pixel_interlock_ordered) in;");
1095 else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
1096 statement("layout(pixel_interlock_unordered) in;");
1097 else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
1098 statement("layout(sample_interlock_ordered) in;");
1099 else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
1100 statement("layout(sample_interlock_unordered) in;");
1101
1102 if (interlock_used)
1103 {
1104 statement("#elif !defined(GL_INTEL_fragment_shader_ordering)");
1105 statement("#error Fragment Shader Interlock/Ordering extension missing!");
1106 statement("#endif");
1107 }
1108
1109 if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
1110 statement("layout(depth_greater) out float gl_FragDepth;");
1111 else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
1112 statement("layout(depth_less) out float gl_FragDepth;");
1113
1114 break;
1115
1116 default:
1117 break;
1118 }
1119
1120 for (auto &cap : ir.declared_capabilities)
1121 if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1122 statement("layout(primitive_culling);");
1123
1124 if (!inputs.empty())
1125 statement("layout(", merge(inputs), ") in;");
1126 if (!outputs.empty())
1127 statement("layout(", merge(outputs), ") out;");
1128
1129 statement("");
1130}
1131
1132bool CompilerGLSL::type_is_empty(const SPIRType &type)
1133{
1134 return type.basetype == SPIRType::Struct && type.member_types.empty();
1135}
1136
1137void CompilerGLSL::emit_struct(SPIRType &type)
1138{
1139 // Struct types can be stamped out multiple times
1140 // with just different offsets, matrix layouts, etc ...
1141 // Type-punning with these types is legal, which complicates things
1142 // when we are storing struct and array types in an SSBO for example.
1143 // If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1144 if (type.type_alias != TypeID(0) &&
1145 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
1146 return;
1147
1148 add_resource_name(type.self);
1149 auto name = type_to_glsl(type);
1150
1151 statement(!backend.explicit_struct_type ? "struct " : "", name);
1152 begin_scope();
1153
1154 type.member_name_cache.clear();
1155
1156 uint32_t i = 0;
1157 bool emitted = false;
1158 for (auto &member : type.member_types)
1159 {
1160 add_member_name(type, i);
1161 emit_struct_member(type, member, i);
1162 i++;
1163 emitted = true;
1164 }
1165
1166 // Don't declare empty structs in GLSL, this is not allowed.
1167 if (type_is_empty(type) && !backend.supports_empty_struct)
1168 {
1169 statement("int empty_struct_member;");
1170 emitted = true;
1171 }
1172
1173 if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
1174 emit_struct_padding_target(type);
1175
1176 end_scope_decl();
1177
1178 if (emitted)
1179 statement("");
1180}
1181
1182string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1183{
1184 string res;
1185 //if (flags & (1ull << DecorationSmooth))
1186 // res += "smooth ";
1187 if (flags.get(DecorationFlat))
1188 res += "flat ";
1189 if (flags.get(DecorationNoPerspective))
1190 res += "noperspective ";
1191 if (flags.get(DecorationCentroid))
1192 res += "centroid ";
1193 if (flags.get(DecorationPatch))
1194 res += "patch ";
1195 if (flags.get(DecorationSample))
1196 res += "sample ";
1197 if (flags.get(DecorationInvariant))
1198 res += "invariant ";
1199
1200 if (flags.get(DecorationExplicitInterpAMD))
1201 {
1202 require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
1203 res += "__explicitInterpAMD ";
1204 }
1205
1206 if (flags.get(DecorationPerVertexNV))
1207 {
1208 if (options.es && options.version < 320)
1209 SPIRV_CROSS_THROW("pervertexNV requires ESSL 320.");
1210 else if (!options.es && options.version < 450)
1211 SPIRV_CROSS_THROW("pervertexNV requires GLSL 450.");
1212 require_extension_internal("GL_NV_fragment_shader_barycentric");
1213 res += "pervertexNV ";
1214 }
1215
1216 return res;
1217}
1218
1219string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1220{
1221 if (is_legacy())
1222 return "";
1223
1224 bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1225 if (!is_block)
1226 return "";
1227
1228 auto &memb = ir.meta[type.self].members;
1229 if (index >= memb.size())
1230 return "";
1231 auto &dec = memb[index];
1232
1233 SmallVector<string> attr;
1234
1235 if (has_member_decoration(type.self, index, DecorationPassthroughNV))
1236 attr.push_back("passthrough");
1237
1238 // We can only apply layouts on members in block interfaces.
1239 // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1240 // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1241 // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1242 //
1243 // We would like to go from (SPIR-V style):
1244 //
1245 // struct Foo { layout(row_major) mat4 matrix; };
1246 // buffer UBO { Foo foo; };
1247 //
1248 // to
1249 //
1250 // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1251 // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1252 auto flags = combined_decoration_for_member(type, index);
1253
1254 if (flags.get(DecorationRowMajor))
1255 attr.push_back("row_major");
1256 // We don't emit any global layouts, so column_major is default.
1257 //if (flags & (1ull << DecorationColMajor))
1258 // attr.push_back("column_major");
1259
1260 if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
1261 attr.push_back(join("location = ", dec.location));
1262
1263 // Can only declare component if we can declare location.
1264 if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
1265 {
1266 if (!options.es)
1267 {
1268 if (options.version < 440 && options.version >= 140)
1269 require_extension_internal("GL_ARB_enhanced_layouts");
1270 else if (options.version < 140)
1271 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1272 attr.push_back(join("component = ", dec.component));
1273 }
1274 else
1275 SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1276 }
1277
1278 // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1279 // This is only done selectively in GLSL as needed.
1280 if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
1281 dec.decoration_flags.get(DecorationOffset))
1282 attr.push_back(join("offset = ", dec.offset));
1283 else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
1284 attr.push_back(join("xfb_offset = ", dec.offset));
1285
1286 if (attr.empty())
1287 return "";
1288
1289 string res = "layout(";
1290 res += merge(attr);
1291 res += ") ";
1292 return res;
1293}
1294
1295const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1296{
1297 if (options.es && is_desktop_only_format(format))
1298 SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1299
1300 switch (format)
1301 {
1302 case ImageFormatRgba32f:
1303 return "rgba32f";
1304 case ImageFormatRgba16f:
1305 return "rgba16f";
1306 case ImageFormatR32f:
1307 return "r32f";
1308 case ImageFormatRgba8:
1309 return "rgba8";
1310 case ImageFormatRgba8Snorm:
1311 return "rgba8_snorm";
1312 case ImageFormatRg32f:
1313 return "rg32f";
1314 case ImageFormatRg16f:
1315 return "rg16f";
1316 case ImageFormatRgba32i:
1317 return "rgba32i";
1318 case ImageFormatRgba16i:
1319 return "rgba16i";
1320 case ImageFormatR32i:
1321 return "r32i";
1322 case ImageFormatRgba8i:
1323 return "rgba8i";
1324 case ImageFormatRg32i:
1325 return "rg32i";
1326 case ImageFormatRg16i:
1327 return "rg16i";
1328 case ImageFormatRgba32ui:
1329 return "rgba32ui";
1330 case ImageFormatRgba16ui:
1331 return "rgba16ui";
1332 case ImageFormatR32ui:
1333 return "r32ui";
1334 case ImageFormatRgba8ui:
1335 return "rgba8ui";
1336 case ImageFormatRg32ui:
1337 return "rg32ui";
1338 case ImageFormatRg16ui:
1339 return "rg16ui";
1340 case ImageFormatR11fG11fB10f:
1341 return "r11f_g11f_b10f";
1342 case ImageFormatR16f:
1343 return "r16f";
1344 case ImageFormatRgb10A2:
1345 return "rgb10_a2";
1346 case ImageFormatR8:
1347 return "r8";
1348 case ImageFormatRg8:
1349 return "rg8";
1350 case ImageFormatR16:
1351 return "r16";
1352 case ImageFormatRg16:
1353 return "rg16";
1354 case ImageFormatRgba16:
1355 return "rgba16";
1356 case ImageFormatR16Snorm:
1357 return "r16_snorm";
1358 case ImageFormatRg16Snorm:
1359 return "rg16_snorm";
1360 case ImageFormatRgba16Snorm:
1361 return "rgba16_snorm";
1362 case ImageFormatR8Snorm:
1363 return "r8_snorm";
1364 case ImageFormatRg8Snorm:
1365 return "rg8_snorm";
1366 case ImageFormatR8ui:
1367 return "r8ui";
1368 case ImageFormatRg8ui:
1369 return "rg8ui";
1370 case ImageFormatR16ui:
1371 return "r16ui";
1372 case ImageFormatRgb10a2ui:
1373 return "rgb10_a2ui";
1374 case ImageFormatR8i:
1375 return "r8i";
1376 case ImageFormatRg8i:
1377 return "rg8i";
1378 case ImageFormatR16i:
1379 return "r16i";
1380 default:
1381 case ImageFormatUnknown:
1382 return nullptr;
1383 }
1384}
1385
1386uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1387{
1388 switch (type.basetype)
1389 {
1390 case SPIRType::Double:
1391 case SPIRType::Int64:
1392 case SPIRType::UInt64:
1393 return 8;
1394 case SPIRType::Float:
1395 case SPIRType::Int:
1396 case SPIRType::UInt:
1397 return 4;
1398 case SPIRType::Half:
1399 case SPIRType::Short:
1400 case SPIRType::UShort:
1401 return 2;
1402 case SPIRType::SByte:
1403 case SPIRType::UByte:
1404 return 1;
1405
1406 default:
1407 SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1408 }
1409}
1410
1411uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1412 BufferPackingStandard packing)
1413{
1414 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1415 // and is 64-bit.
1416 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1417 {
1418 if (!type.pointer)
1419 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1420
1421 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1422 {
1423 if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1424 return 16;
1425 else
1426 return 8;
1427 }
1428 else
1429 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1430 }
1431
1432 if (!type.array.empty())
1433 {
1434 uint32_t minimum_alignment = 1;
1435 if (packing_is_vec4_padded(packing))
1436 minimum_alignment = 16;
1437
1438 auto *tmp = &get<SPIRType>(type.parent_type);
1439 while (!tmp->array.empty())
1440 tmp = &get<SPIRType>(tmp->parent_type);
1441
1442 // Get the alignment of the base type, then maybe round up.
1443 return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1444 }
1445
1446 if (type.basetype == SPIRType::Struct)
1447 {
1448 // Rule 9. Structs alignments are maximum alignment of its members.
1449 uint32_t alignment = 1;
1450 for (uint32_t i = 0; i < type.member_types.size(); i++)
1451 {
1452 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1453 alignment =
1454 max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
1455 }
1456
1457 // In std140, struct alignment is rounded up to 16.
1458 if (packing_is_vec4_padded(packing))
1459 alignment = max(alignment, 16u);
1460
1461 return alignment;
1462 }
1463 else
1464 {
1465 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1466
1467 // Alignment requirement for scalar block layout is always the alignment for the most basic component.
1468 if (packing_is_scalar(packing))
1469 return base_alignment;
1470
1471 // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
1472 // a vec4, this is handled outside since that part knows our current offset.
1473 if (type.columns == 1 && packing_is_hlsl(packing))
1474 return base_alignment;
1475
1476 // From 7.6.2.2 in GL 4.5 core spec.
1477 // Rule 1
1478 if (type.vecsize == 1 && type.columns == 1)
1479 return base_alignment;
1480
1481 // Rule 2
1482 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
1483 return type.vecsize * base_alignment;
1484
1485 // Rule 3
1486 if (type.vecsize == 3 && type.columns == 1)
1487 return 4 * base_alignment;
1488
1489 // Rule 4 implied. Alignment does not change in std430.
1490
1491 // Rule 5. Column-major matrices are stored as arrays of
1492 // vectors.
1493 if (flags.get(DecorationColMajor) && type.columns > 1)
1494 {
1495 if (packing_is_vec4_padded(packing))
1496 return 4 * base_alignment;
1497 else if (type.vecsize == 3)
1498 return 4 * base_alignment;
1499 else
1500 return type.vecsize * base_alignment;
1501 }
1502
1503 // Rule 6 implied.
1504
1505 // Rule 7.
1506 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1507 {
1508 if (packing_is_vec4_padded(packing))
1509 return 4 * base_alignment;
1510 else if (type.columns == 3)
1511 return 4 * base_alignment;
1512 else
1513 return type.columns * base_alignment;
1514 }
1515
1516 // Rule 8 implied.
1517 }
1518
1519 SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1520}
1521
1522uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1523 BufferPackingStandard packing)
1524{
1525 // Array stride is equal to aligned size of the underlying type.
1526 uint32_t parent = type.parent_type;
1527 assert(parent);
1528
1529 auto &tmp = get<SPIRType>(parent);
1530
1531 uint32_t size = type_to_packed_size(tmp, flags, packing);
1532 uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1533 return (size + alignment - 1) & ~(alignment - 1);
1534}
1535
1536uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1537{
1538 if (!type.array.empty())
1539 {
1540 uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1541
1542 // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1543 // so that it is possible to pack other vectors into the last element.
1544 if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1545 packed_size -= (4 - type.vecsize) * (type.width / 8);
1546
1547 return packed_size;
1548 }
1549
1550 // If using PhysicalStorageBufferEXT storage class, this is a pointer,
1551 // and is 64-bit.
1552 if (type.storage == StorageClassPhysicalStorageBufferEXT)
1553 {
1554 if (!type.pointer)
1555 SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1556
1557 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1558 return 8;
1559 else
1560 SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1561 }
1562
1563 uint32_t size = 0;
1564
1565 if (type.basetype == SPIRType::Struct)
1566 {
1567 uint32_t pad_alignment = 1;
1568
1569 for (uint32_t i = 0; i < type.member_types.size(); i++)
1570 {
1571 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1572 auto &member_type = get<SPIRType>(type.member_types[i]);
1573
1574 uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1575 uint32_t alignment = max(packed_alignment, pad_alignment);
1576
1577 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1578 // GL 4.5 spec, 7.6.2.2.
1579 if (member_type.basetype == SPIRType::Struct)
1580 pad_alignment = packed_alignment;
1581 else
1582 pad_alignment = 1;
1583
1584 size = (size + alignment - 1) & ~(alignment - 1);
1585 size += type_to_packed_size(member_type, member_flags, packing);
1586 }
1587 }
1588 else
1589 {
1590 const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1591
1592 if (packing_is_scalar(packing))
1593 {
1594 size = type.vecsize * type.columns * base_alignment;
1595 }
1596 else
1597 {
1598 if (type.columns == 1)
1599 size = type.vecsize * base_alignment;
1600
1601 if (flags.get(DecorationColMajor) && type.columns > 1)
1602 {
1603 if (packing_is_vec4_padded(packing))
1604 size = type.columns * 4 * base_alignment;
1605 else if (type.vecsize == 3)
1606 size = type.columns * 4 * base_alignment;
1607 else
1608 size = type.columns * type.vecsize * base_alignment;
1609 }
1610
1611 if (flags.get(DecorationRowMajor) && type.vecsize > 1)
1612 {
1613 if (packing_is_vec4_padded(packing))
1614 size = type.vecsize * 4 * base_alignment;
1615 else if (type.columns == 3)
1616 size = type.vecsize * 4 * base_alignment;
1617 else
1618 size = type.vecsize * type.columns * base_alignment;
1619 }
1620
1621 // For matrices in HLSL, the last element has a size which depends on its vector size,
1622 // so that it is possible to pack other vectors into the last element.
1623 if (packing_is_hlsl(packing) && type.columns > 1)
1624 size -= (4 - type.vecsize) * (type.width / 8);
1625 }
1626 }
1627
1628 return size;
1629}
1630
1631bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1632 uint32_t *failed_validation_index, uint32_t start_offset,
1633 uint32_t end_offset)
1634{
1635 // This is very tricky and error prone, but try to be exhaustive and correct here.
1636 // SPIR-V doesn't directly say if we're using std430 or std140.
1637 // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1638 // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1639 // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1640 //
1641 // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1642 // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1643 //
1644 // The only two differences between std140 and std430 are related to padding alignment/array stride
1645 // in arrays and structs. In std140 they take minimum vec4 alignment.
1646 // std430 only removes the vec4 requirement.
1647
1648 uint32_t offset = 0;
1649 uint32_t pad_alignment = 1;
1650
1651 bool is_top_level_block =
1652 has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock);
1653
1654 for (uint32_t i = 0; i < type.member_types.size(); i++)
1655 {
1656 auto &memb_type = get<SPIRType>(type.member_types[i]);
1657 auto member_flags = ir.meta[type.self].members[i].decoration_flags;
1658
1659 // Verify alignment rules.
1660 uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1661
1662 // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1663 // layout(constant_id = 0) const int s = 10;
1664 // const int S = s + 5; // SpecConstantOp
1665 // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1666 // we would need full implementation of compile-time constant folding. :(
1667 // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1668 // for our analysis (e.g. unsized arrays).
1669 // This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1670 // Querying size of this member will fail, so just don't call it unless we have to.
1671 //
1672 // This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1673 bool member_can_be_unsized =
1674 is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty();
1675
1676 uint32_t packed_size = 0;
1677 if (!member_can_be_unsized || packing_is_hlsl(packing))
1678 packed_size = type_to_packed_size(memb_type, member_flags, packing);
1679
1680 // We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1681 if (packing_is_hlsl(packing))
1682 {
1683 // If a member straddles across a vec4 boundary, alignment is actually vec4.
1684 uint32_t begin_word = offset / 16;
1685 uint32_t end_word = (offset + packed_size - 1) / 16;
1686 if (begin_word != end_word)
1687 packed_alignment = max(packed_alignment, 16u);
1688 }
1689
1690 uint32_t actual_offset = type_struct_member_offset(type, i);
1691 // Field is not in the specified range anymore and we can ignore any further fields.
1692 if (actual_offset >= end_offset)
1693 break;
1694
1695 uint32_t alignment = max(packed_alignment, pad_alignment);
1696 offset = (offset + alignment - 1) & ~(alignment - 1);
1697
1698 // The next member following a struct member is aligned to the base alignment of the struct that came before.
1699 // GL 4.5 spec, 7.6.2.2.
1700 if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1701 pad_alignment = packed_alignment;
1702 else
1703 pad_alignment = 1;
1704
1705 // Only care about packing if we are in the given range
1706 if (actual_offset >= start_offset)
1707 {
1708 // We only care about offsets in std140, std430, etc ...
1709 // For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1710 if (!packing_has_flexible_offset(packing))
1711 {
1712 if (actual_offset != offset) // This cannot be the packing we're looking for.
1713 {
1714 if (failed_validation_index)
1715 *failed_validation_index = i;
1716 return false;
1717 }
1718 }
1719 else if ((actual_offset & (alignment - 1)) != 0)
1720 {
1721 // We still need to verify that alignment rules are observed, even if we have explicit offset.
1722 if (failed_validation_index)
1723 *failed_validation_index = i;
1724 return false;
1725 }
1726
1727 // Verify array stride rules.
1728 if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1729 type_struct_member_array_stride(type, i))
1730 {
1731 if (failed_validation_index)
1732 *failed_validation_index = i;
1733 return false;
1734 }
1735
1736 // Verify that sub-structs also follow packing rules.
1737 // We cannot use enhanced layouts on substructs, so they better be up to spec.
1738 auto substruct_packing = packing_to_substruct_packing(packing);
1739
1740 if (!memb_type.pointer && !memb_type.member_types.empty() &&
1741 !buffer_is_packing_standard(memb_type, substruct_packing))
1742 {
1743 if (failed_validation_index)
1744 *failed_validation_index = i;
1745 return false;
1746 }
1747 }
1748
1749 // Bump size.
1750 offset = actual_offset + packed_size;
1751 }
1752
1753 return true;
1754}
1755
1756bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1757{
1758 // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1759 // Be very explicit here about how to solve the issue.
1760 if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
1761 (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1762 {
1763 uint32_t minimum_desktop_version = block ? 440 : 410;
1764 // ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1765
1766 if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1767 return false;
1768 else if (options.es && options.version < 310)
1769 return false;
1770 }
1771
1772 if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
1773 (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1774 {
1775 if (options.es && options.version < 300)
1776 return false;
1777 else if (!options.es && options.version < 330)
1778 return false;
1779 }
1780
1781 if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant)
1782 {
1783 if (options.es && options.version < 310)
1784 return false;
1785 else if (!options.es && options.version < 430)
1786 return false;
1787 }
1788
1789 return true;
1790}
1791
1792string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1793{
1794 // FIXME: Come up with a better solution for when to disable layouts.
1795 // Having layouts depend on extensions as well as which types
1796 // of layouts are used. For now, the simple solution is to just disable
1797 // layouts for legacy versions.
1798 if (is_legacy())
1799 return "";
1800
1801 if (subpass_input_is_framebuffer_fetch(var.self))
1802 return "";
1803
1804 SmallVector<string> attr;
1805
1806 auto &type = get<SPIRType>(var.basetype);
1807 auto &flags = get_decoration_bitset(var.self);
1808 auto &typeflags = get_decoration_bitset(type.self);
1809
1810 if (flags.get(DecorationPassthroughNV))
1811 attr.push_back("passthrough");
1812
1813 if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1814 attr.push_back("push_constant");
1815 else if (var.storage == StorageClassShaderRecordBufferKHR)
1816 attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1817
1818 if (flags.get(DecorationRowMajor))
1819 attr.push_back("row_major");
1820 if (flags.get(DecorationColMajor))
1821 attr.push_back("column_major");
1822
1823 if (options.vulkan_semantics)
1824 {
1825 if (flags.get(DecorationInputAttachmentIndex))
1826 attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
1827 }
1828
1829 bool is_block = has_decoration(type.self, DecorationBlock);
1830 if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1831 {
1832 Bitset combined_decoration;
1833 for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++)
1834 combined_decoration.merge_or(combined_decoration_for_member(type, i));
1835
1836 // If our members have location decorations, we don't need to
1837 // emit location decorations at the top as well (looks weird).
1838 if (!combined_decoration.get(DecorationLocation))
1839 attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
1840 }
1841
1842 if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1843 location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
1844 {
1845 attr.push_back("noncoherent");
1846 }
1847
1848 // Transform feedback
1849 bool uses_enhanced_layouts = false;
1850 if (is_block && var.storage == StorageClassOutput)
1851 {
1852 // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1853 // since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1854 // is the xfb_offset.
1855 uint32_t member_count = uint32_t(type.member_types.size());
1856 bool have_xfb_buffer_stride = false;
1857 bool have_any_xfb_offset = false;
1858 bool have_geom_stream = false;
1859 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
1860
1861 if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
1862 {
1863 have_xfb_buffer_stride = true;
1864 xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
1865 xfb_stride = get_decoration(var.self, DecorationXfbStride);
1866 }
1867
1868 if (flags.get(DecorationStream))
1869 {
1870 have_geom_stream = true;
1871 geom_stream = get_decoration(var.self, DecorationStream);
1872 }
1873
1874 // Verify that none of the members violate our assumption.
1875 for (uint32_t i = 0; i < member_count; i++)
1876 {
1877 if (has_member_decoration(type.self, i, DecorationStream))
1878 {
1879 uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
1880 if (have_geom_stream && member_geom_stream != geom_stream)
1881 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1882 have_geom_stream = true;
1883 geom_stream = member_geom_stream;
1884 }
1885
1886 // Only members with an Offset decoration participate in XFB.
1887 if (!has_member_decoration(type.self, i, DecorationOffset))
1888 continue;
1889 have_any_xfb_offset = true;
1890
1891 if (has_member_decoration(type.self, i, DecorationXfbBuffer))
1892 {
1893 uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
1894 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1895 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1896 have_xfb_buffer_stride = true;
1897 xfb_buffer = buffer_index;
1898 }
1899
1900 if (has_member_decoration(type.self, i, DecorationXfbStride))
1901 {
1902 uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
1903 if (have_xfb_buffer_stride && stride != xfb_stride)
1904 SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1905 have_xfb_buffer_stride = true;
1906 xfb_stride = stride;
1907 }
1908 }
1909
1910 if (have_xfb_buffer_stride && have_any_xfb_offset)
1911 {
1912 attr.push_back(join("xfb_buffer = ", xfb_buffer));
1913 attr.push_back(join("xfb_stride = ", xfb_stride));
1914 uses_enhanced_layouts = true;
1915 }
1916
1917 if (have_geom_stream)
1918 {
1919 if (get_execution_model() != ExecutionModelGeometry)
1920 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1921 if (options.es)
1922 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1923 if (options.version < 400)
1924 require_extension_internal("GL_ARB_transform_feedback3");
1925 attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1926 }
1927 }
1928 else if (var.storage == StorageClassOutput)
1929 {
1930 if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
1931 {
1932 // XFB for standalone variables, we can emit all decorations.
1933 attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
1934 attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
1935 attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
1936 uses_enhanced_layouts = true;
1937 }
1938
1939 if (flags.get(DecorationStream))
1940 {
1941 if (get_execution_model() != ExecutionModelGeometry)
1942 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1943 if (options.es)
1944 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1945 if (options.version < 400)
1946 require_extension_internal("GL_ARB_transform_feedback3");
1947 attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1948 }
1949 }
1950
1951 // Can only declare Component if we can declare location.
1952 if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1953 {
1954 uses_enhanced_layouts = true;
1955 attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
1956 }
1957
1958 if (uses_enhanced_layouts)
1959 {
1960 if (!options.es)
1961 {
1962 if (options.version < 440 && options.version >= 140)
1963 require_extension_internal("GL_ARB_enhanced_layouts");
1964 else if (options.version < 140)
1965 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1966 if (!options.es && options.version < 440)
1967 require_extension_internal("GL_ARB_enhanced_layouts");
1968 }
1969 else if (options.es)
1970 SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1971 }
1972
1973 if (flags.get(DecorationIndex))
1974 attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
1975
1976 // Do not emit set = decoration in regular GLSL output, but
1977 // we need to preserve it in Vulkan GLSL mode.
1978 if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1979 {
1980 if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1981 attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
1982 }
1983
1984 bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1985 bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
1986 (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1987 bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1988 bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1989
1990 // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1991 bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
1992
1993 // pretend no UBOs when options say so
1994 if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1995 can_use_buffer_blocks = false;
1996
1997 bool can_use_binding;
1998 if (options.es)
1999 can_use_binding = options.version >= 310;
2000 else
2001 can_use_binding = options.enable_420pack_extension || (options.version >= 420);
2002
2003 // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
2004 if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
2005 can_use_binding = false;
2006
2007 if (var.storage == StorageClassShaderRecordBufferKHR)
2008 can_use_binding = false;
2009
2010 if (can_use_binding && flags.get(DecorationBinding))
2011 attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
2012
2013 if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
2014 attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
2015
2016 // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
2017 // If SPIR-V does not comply with either layout, we cannot really work around it.
2018 if (can_use_buffer_blocks && (ubo_block || emulated_ubo))
2019 {
2020 attr.push_back(buffer_to_packing_standard(type, false));
2021 }
2022 else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
2023 {
2024 attr.push_back(buffer_to_packing_standard(type, true));
2025 }
2026
2027 // For images, the type itself adds a layout qualifer.
2028 // Only emit the format for storage images.
2029 if (type.basetype == SPIRType::Image && type.image.sampled == 2)
2030 {
2031 const char *fmt = format_to_glsl(type.image.format);
2032 if (fmt)
2033 attr.push_back(fmt);
2034 }
2035
2036 if (attr.empty())
2037 return "";
2038
2039 string res = "layout(";
2040 res += merge(attr);
2041 res += ") ";
2042 return res;
2043}
2044
2045string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
2046{
2047 if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
2048 return "std430";
2049 else if (buffer_is_packing_standard(type, BufferPackingStd140))
2050 return "std140";
2051 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
2052 {
2053 require_extension_internal("GL_EXT_scalar_block_layout");
2054 return "scalar";
2055 }
2056 else if (support_std430_without_scalar_layout &&
2057 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2058 {
2059 if (options.es && !options.vulkan_semantics)
2060 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2061 "not support GL_ARB_enhanced_layouts.");
2062 if (!options.es && !options.vulkan_semantics && options.version < 440)
2063 require_extension_internal("GL_ARB_enhanced_layouts");
2064
2065 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2066 return "std430";
2067 }
2068 else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
2069 {
2070 // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2071 // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2072 // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2073 if (options.es && !options.vulkan_semantics)
2074 SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2075 "not support GL_ARB_enhanced_layouts.");
2076 if (!options.es && !options.vulkan_semantics && options.version < 440)
2077 require_extension_internal("GL_ARB_enhanced_layouts");
2078
2079 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2080 return "std140";
2081 }
2082 else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
2083 {
2084 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2085 require_extension_internal("GL_EXT_scalar_block_layout");
2086 return "scalar";
2087 }
2088 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2089 buffer_is_packing_standard(type, BufferPackingStd430))
2090 {
2091 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2092 require_extension_internal("GL_EXT_scalar_block_layout");
2093 return "std430";
2094 }
2095 else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2096 buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2097 {
2098 // UBOs can support std430 with GL_EXT_scalar_block_layout.
2099 set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2100 require_extension_internal("GL_EXT_scalar_block_layout");
2101 return "std430";
2102 }
2103 else
2104 {
2105 SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2106 "layouts. You can try flattening this block to support a more flexible layout.");
2107 }
2108}
2109
2110void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2111{
2112 if (flattened_buffer_blocks.count(var.self))
2113 emit_buffer_block_flattened(var);
2114 else if (options.vulkan_semantics)
2115 emit_push_constant_block_vulkan(var);
2116 else if (options.emit_push_constant_as_uniform_buffer)
2117 emit_buffer_block_native(var);
2118 else
2119 emit_push_constant_block_glsl(var);
2120}
2121
2122void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2123{
2124 emit_buffer_block(var);
2125}
2126
2127void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2128{
2129 // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2130 auto &type = get<SPIRType>(var.basetype);
2131
2132 auto &flags = ir.meta[var.self].decoration.decoration_flags;
2133 flags.clear(DecorationBinding);
2134 flags.clear(DecorationDescriptorSet);
2135
2136#if 0
2137 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
2138 SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2139 "Remap to location with reflection API first or disable these decorations.");
2140#endif
2141
2142 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2143 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2144 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2145 bool block_flag = block_flags.get(DecorationBlock);
2146 block_flags.clear(DecorationBlock);
2147
2148 emit_struct(type);
2149
2150 if (block_flag)
2151 block_flags.set(DecorationBlock);
2152
2153 emit_uniform(var);
2154 statement("");
2155}
2156
2157void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2158{
2159 auto &type = get<SPIRType>(var.basetype);
2160 bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
2161
2162 if (flattened_buffer_blocks.count(var.self))
2163 emit_buffer_block_flattened(var);
2164 else if (is_legacy() || (!options.es && options.version == 130) ||
2165 (ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2166 emit_buffer_block_legacy(var);
2167 else
2168 emit_buffer_block_native(var);
2169}
2170
2171void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2172{
2173 auto &type = get<SPIRType>(var.basetype);
2174 bool ssbo = var.storage == StorageClassStorageBuffer ||
2175 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2176 if (ssbo)
2177 SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2178
2179 // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2180 // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2181 auto &block_flags = ir.meta[type.self].decoration.decoration_flags;
2182 bool block_flag = block_flags.get(DecorationBlock);
2183 block_flags.clear(DecorationBlock);
2184 emit_struct(type);
2185 if (block_flag)
2186 block_flags.set(DecorationBlock);
2187 emit_uniform(var);
2188 statement("");
2189}
2190
2191void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
2192{
2193 auto &type = get<SPIRType>(type_id);
2194 string buffer_name;
2195
2196 if (forward_declaration)
2197 {
2198 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2199 // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2200 // The names must match up.
2201 buffer_name = to_name(type.self, false);
2202
2203 // Shaders never use the block by interface name, so we don't
2204 // have to track this other than updating name caches.
2205 // If we have a collision for any reason, just fallback immediately.
2206 if (ir.meta[type.self].decoration.alias.empty() ||
2207 block_ssbo_names.find(buffer_name) != end(block_ssbo_names) ||
2208 resource_names.find(buffer_name) != end(resource_names))
2209 {
2210 buffer_name = join("_", type.self);
2211 }
2212
2213 // Make sure we get something unique for both global name scope and block name scope.
2214 // See GLSL 4.5 spec: section 4.3.9 for details.
2215 add_variable(block_ssbo_names, resource_names, buffer_name);
2216
2217 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2218 // This cannot conflict with anything else, so we're safe now.
2219 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2220 if (buffer_name.empty())
2221 buffer_name = join("_", type.self);
2222
2223 block_names.insert(buffer_name);
2224 block_ssbo_names.insert(buffer_name);
2225
2226 // Ensure we emit the correct name when emitting non-forward pointer type.
2227 ir.meta[type.self].decoration.alias = buffer_name;
2228 }
2229 else if (type.basetype != SPIRType::Struct)
2230 buffer_name = type_to_glsl(type);
2231 else
2232 buffer_name = to_name(type.self, false);
2233
2234 if (!forward_declaration)
2235 {
2236 auto itr = physical_storage_type_to_alignment.find(type_id);
2237 uint32_t alignment = 0;
2238 if (itr != physical_storage_type_to_alignment.end())
2239 alignment = itr->second.alignment;
2240
2241 if (type.basetype == SPIRType::Struct)
2242 {
2243 SmallVector<std::string> attributes;
2244 attributes.push_back("buffer_reference");
2245 if (alignment)
2246 attributes.push_back(join("buffer_reference_align = ", alignment));
2247 attributes.push_back(buffer_to_packing_standard(type, true));
2248
2249 auto flags = ir.get_buffer_block_type_flags(type);
2250 string decorations;
2251 if (flags.get(DecorationRestrict))
2252 decorations += " restrict";
2253 if (flags.get(DecorationCoherent))
2254 decorations += " coherent";
2255 if (flags.get(DecorationNonReadable))
2256 decorations += " writeonly";
2257 if (flags.get(DecorationNonWritable))
2258 decorations += " readonly";
2259
2260 statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name);
2261 }
2262 else if (alignment)
2263 statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name);
2264 else
2265 statement("layout(buffer_reference) buffer ", buffer_name);
2266
2267 begin_scope();
2268
2269 if (type.basetype == SPIRType::Struct)
2270 {
2271 type.member_name_cache.clear();
2272
2273 uint32_t i = 0;
2274 for (auto &member : type.member_types)
2275 {
2276 add_member_name(type, i);
2277 emit_struct_member(type, member, i);
2278 i++;
2279 }
2280 }
2281 else
2282 {
2283 auto &pointee_type = get_pointee_type(type);
2284 statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
2285 }
2286
2287 end_scope_decl();
2288 statement("");
2289 }
2290 else
2291 {
2292 statement("layout(buffer_reference) buffer ", buffer_name, ";");
2293 }
2294}
2295
2296void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2297{
2298 auto &type = get<SPIRType>(var.basetype);
2299
2300 Bitset flags = ir.get_buffer_block_flags(var);
2301 bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR ||
2302 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2303 bool is_restrict = ssbo && flags.get(DecorationRestrict);
2304 bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
2305 bool is_readonly = ssbo && flags.get(DecorationNonWritable);
2306 bool is_coherent = ssbo && flags.get(DecorationCoherent);
2307
2308 // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2309 auto buffer_name = to_name(type.self, false);
2310
2311 auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2312
2313 // Shaders never use the block by interface name, so we don't
2314 // have to track this other than updating name caches.
2315 // If we have a collision for any reason, just fallback immediately.
2316 if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) ||
2317 resource_names.find(buffer_name) != end(resource_names))
2318 {
2319 buffer_name = get_block_fallback_name(var.self);
2320 }
2321
2322 // Make sure we get something unique for both global name scope and block name scope.
2323 // See GLSL 4.5 spec: section 4.3.9 for details.
2324 add_variable(block_namespace, resource_names, buffer_name);
2325
2326 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2327 // This cannot conflict with anything else, so we're safe now.
2328 // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2329 if (buffer_name.empty())
2330 buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2331
2332 block_names.insert(buffer_name);
2333 block_namespace.insert(buffer_name);
2334
2335 // Save for post-reflection later.
2336 declared_block_names[var.self] = buffer_name;
2337
2338 statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
2339 is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
2340 buffer_name);
2341
2342 begin_scope();
2343
2344 type.member_name_cache.clear();
2345
2346 uint32_t i = 0;
2347 for (auto &member : type.member_types)
2348 {
2349 add_member_name(type, i);
2350 emit_struct_member(type, member, i);
2351 i++;
2352 }
2353
2354 // var.self can be used as a backup name for the block name,
2355 // so we need to make sure we don't disturb the name here on a recompile.
2356 // It will need to be reset if we have to recompile.
2357 preserve_alias_on_reset(var.self);
2358 add_resource_name(var.self);
2359 end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
2360 statement("");
2361}
2362
2363void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2364{
2365 auto &type = get<SPIRType>(var.basetype);
2366
2367 // Block names should never alias.
2368 auto buffer_name = to_name(type.self, false);
2369 size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
2370
2371 SPIRType::BaseType basic_type;
2372 if (get_common_basic_type(type, basic_type))
2373 {
2374 SPIRType tmp;
2375 tmp.basetype = basic_type;
2376 tmp.vecsize = 4;
2377 if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2378 SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2379
2380 auto flags = ir.get_buffer_block_flags(var);
2381 statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
2382 buffer_size, "];");
2383 }
2384 else
2385 SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2386}
2387
2388const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2389{
2390 auto &execution = get_entry_point();
2391
2392 if (subpass_input_is_framebuffer_fetch(var.self))
2393 return "";
2394
2395 if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
2396 {
2397 if (is_legacy() && execution.model == ExecutionModelVertex)
2398 return var.storage == StorageClassInput ? "attribute " : "varying ";
2399 else if (is_legacy() && execution.model == ExecutionModelFragment)
2400 return "varying "; // Fragment outputs are renamed so they never hit this case.
2401 else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2402 {
2403 uint32_t loc = get_decoration(var.self, DecorationLocation);
2404 bool is_inout = location_is_framebuffer_fetch(loc);
2405 if (is_inout)
2406 return "inout ";
2407 else
2408 return "out ";
2409 }
2410 else
2411 return var.storage == StorageClassInput ? "in " : "out ";
2412 }
2413 else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
2414 var.storage == StorageClassPushConstant)
2415 {
2416 return "uniform ";
2417 }
2418 else if (var.storage == StorageClassRayPayloadKHR)
2419 {
2420 return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2421 }
2422 else if (var.storage == StorageClassIncomingRayPayloadKHR)
2423 {
2424 return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2425 }
2426 else if (var.storage == StorageClassHitAttributeKHR)
2427 {
2428 return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2429 }
2430 else if (var.storage == StorageClassCallableDataKHR)
2431 {
2432 return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2433 }
2434 else if (var.storage == StorageClassIncomingCallableDataKHR)
2435 {
2436 return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2437 }
2438
2439 return "";
2440}
2441
2442void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2443 const SmallVector<uint32_t> &indices)
2444{
2445 uint32_t member_type_id = type.self;
2446 const SPIRType *member_type = &type;
2447 const SPIRType *parent_type = nullptr;
2448 auto flattened_name = basename;
2449 for (auto &index : indices)
2450 {
2451 flattened_name += "_";
2452 flattened_name += to_member_name(*member_type, index);
2453 parent_type = member_type;
2454 member_type_id = member_type->member_types[index];
2455 member_type = &get<SPIRType>(member_type_id);
2456 }
2457
2458 assert(member_type->basetype != SPIRType::Struct);
2459
2460 // We're overriding struct member names, so ensure we do so on the primary type.
2461 if (parent_type->type_alias)
2462 parent_type = &get<SPIRType>(parent_type->type_alias);
2463
2464 // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2465 // which is not allowed.
2466 ParsedIR::sanitize_underscores(flattened_name);
2467
2468 uint32_t last_index = indices.back();
2469
2470 // Pass in the varying qualifier here so it will appear in the correct declaration order.
2471 // Replace member name while emitting it so it encodes both struct name and member name.
2472 auto backup_name = get_member_name(parent_type->self, last_index);
2473 auto member_name = to_member_name(*parent_type, last_index);
2474 set_member_name(parent_type->self, last_index, flattened_name);
2475 emit_struct_member(*parent_type, member_type_id, last_index, qual);
2476 // Restore member name.
2477 set_member_name(parent_type->self, last_index, member_name);
2478}
2479
2480void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2481 const SmallVector<uint32_t> &indices)
2482{
2483 auto sub_indices = indices;
2484 sub_indices.push_back(0);
2485
2486 const SPIRType *member_type = &type;
2487 for (auto &index : indices)
2488 member_type = &get<SPIRType>(member_type->member_types[index]);
2489
2490 assert(member_type->basetype == SPIRType::Struct);
2491
2492 if (!member_type->array.empty())
2493 SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2494
2495 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
2496 {
2497 sub_indices.back() = i;
2498 if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
2499 emit_flattened_io_block_struct(basename, type, qual, sub_indices);
2500 else
2501 emit_flattened_io_block_member(basename, type, qual, sub_indices);
2502 }
2503}
2504
2505void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2506{
2507 auto &var_type = get<SPIRType>(var.basetype);
2508 if (!var_type.array.empty())
2509 SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2510
2511 // Emit flattened types based on the type alias. Normally, we are never supposed to emit
2512 // struct declarations for aliased types.
2513 auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
2514
2515 auto old_flags = ir.meta[type.self].decoration.decoration_flags;
2516 // Emit the members as if they are part of a block to get all qualifiers.
2517 ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock);
2518
2519 type.member_name_cache.clear();
2520
2521 SmallVector<uint32_t> member_indices;
2522 member_indices.push_back(0);
2523 auto basename = to_name(var.self);
2524
2525 uint32_t i = 0;
2526 for (auto &member : type.member_types)
2527 {
2528 add_member_name(type, i);
2529 auto &membertype = get<SPIRType>(member);
2530
2531 member_indices.back() = i;
2532 if (membertype.basetype == SPIRType::Struct)
2533 emit_flattened_io_block_struct(basename, type, qual, member_indices);
2534 else
2535 emit_flattened_io_block_member(basename, type, qual, member_indices);
2536 i++;
2537 }
2538
2539 ir.meta[type.self].decoration.decoration_flags = old_flags;
2540
2541 // Treat this variable as fully flattened from now on.
2542 flattened_structs[var.self] = true;
2543}
2544
2545void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2546{
2547 auto &type = get<SPIRType>(var.basetype);
2548
2549 if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2550 !options.es && options.version < 410)
2551 {
2552 require_extension_internal("GL_ARB_vertex_attrib_64bit");
2553 }
2554
2555 // Either make it plain in/out or in/out blocks depending on what shader is doing ...
2556 bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock);
2557 const char *qual = to_storage_qualifiers_glsl(var);
2558
2559 if (block)
2560 {
2561 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2562 // I/O variables which are struct types.
2563 // To support this, flatten the struct into separate varyings instead.
2564 if (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2565 (!options.es && options.version < 150))
2566 {
2567 // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2568 // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2569 emit_flattened_io_block(var, qual);
2570 }
2571 else
2572 {
2573 if (options.es && options.version < 320)
2574 {
2575 // Geometry and tessellation extensions imply this extension.
2576 if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2577 require_extension_internal("GL_EXT_shader_io_blocks");
2578 }
2579
2580 // Workaround to make sure we can emit "patch in/out" correctly.
2581 fixup_io_block_patch_qualifiers(var);
2582
2583 // Block names should never alias.
2584 auto block_name = to_name(type.self, false);
2585
2586 // The namespace for I/O blocks is separate from other variables in GLSL.
2587 auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2588
2589 // Shaders never use the block by interface name, so we don't
2590 // have to track this other than updating name caches.
2591 if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace))
2592 block_name = get_fallback_name(type.self);
2593 else
2594 block_namespace.insert(block_name);
2595
2596 // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2597 // This cannot conflict with anything else, so we're safe now.
2598 if (block_name.empty())
2599 block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2600
2601 // Instance names cannot alias block names.
2602 resource_names.insert(block_name);
2603
2604 bool is_patch = has_decoration(var.self, DecorationPatch);
2605 statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
2606 begin_scope();
2607
2608 type.member_name_cache.clear();
2609
2610 uint32_t i = 0;
2611 for (auto &member : type.member_types)
2612 {
2613 add_member_name(type, i);
2614 emit_struct_member(type, member, i);
2615 i++;
2616 }
2617
2618 add_resource_name(var.self);
2619 end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2620 statement("");
2621 }
2622 }
2623 else
2624 {
2625 // ESSL earlier than 310 and GLSL earlier than 150 did not support
2626 // I/O variables which are struct types.
2627 // To support this, flatten the struct into separate varyings instead.
2628 if (type.basetype == SPIRType::Struct &&
2629 (options.force_flattened_io_blocks || (options.es && options.version < 310) ||
2630 (!options.es && options.version < 150)))
2631 {
2632 emit_flattened_io_block(var, qual);
2633 }
2634 else
2635 {
2636 add_resource_name(var.self);
2637
2638 // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2639 // Opt for unsized as it's the more "correct" variant to use.
2640 bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2641 !has_decoration(var.self, DecorationPatch) &&
2642 (get_entry_point().model == ExecutionModelTessellationControl ||
2643 get_entry_point().model == ExecutionModelTessellationEvaluation);
2644
2645 uint32_t old_array_size = 0;
2646 bool old_array_size_literal = true;
2647
2648 if (control_point_input_array)
2649 {
2650 swap(type.array.back(), old_array_size);
2651 swap(type.array_size_literal.back(), old_array_size_literal);
2652 }
2653
2654 statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2655 variable_decl(type, to_name(var.self), var.self), ";");
2656
2657 if (control_point_input_array)
2658 {
2659 swap(type.array.back(), old_array_size);
2660 swap(type.array_size_literal.back(), old_array_size_literal);
2661 }
2662 }
2663 }
2664}
2665
2666void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2667{
2668 auto &type = get<SPIRType>(var.basetype);
2669 if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData)
2670 {
2671 if (!options.es && options.version < 420)
2672 require_extension_internal("GL_ARB_shader_image_load_store");
2673 else if (options.es && options.version < 310)
2674 SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2675 }
2676
2677 add_resource_name(var.self);
2678 statement(layout_for_variable(var), variable_decl(var), ";");
2679}
2680
2681string CompilerGLSL::constant_value_macro_name(uint32_t id)
2682{
2683 return join("SPIRV_CROSS_CONSTANT_ID_", id);
2684}
2685
2686void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2687{
2688 auto &type = get<SPIRType>(constant.basetype);
2689 add_resource_name(constant.self);
2690 auto name = to_name(constant.self);
2691 statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2692}
2693
2694int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
2695{
2696 auto &entry_point = get_entry_point();
2697 int index = -1;
2698
2699 // Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
2700 // since the spec constant declarations are never explicitly declared.
2701 if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId))
2702 {
2703 if (c.self == entry_point.workgroup_size.id_x)
2704 index = 0;
2705 else if (c.self == entry_point.workgroup_size.id_y)
2706 index = 1;
2707 else if (c.self == entry_point.workgroup_size.id_z)
2708 index = 2;
2709 }
2710
2711 return index;
2712}
2713
2714void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2715{
2716 auto &type = get<SPIRType>(constant.constant_type);
2717
2718 SpecializationConstant wg_x, wg_y, wg_z;
2719 ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2720
2721 // This specialization constant is implicitly declared by emitting layout() in;
2722 if (constant.self == workgroup_size_id)
2723 return;
2724
2725 // These specialization constants are implicitly declared by emitting layout() in;
2726 // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2727 // later can use macro overrides for work group size.
2728 bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id ||
2729 ConstantID(constant.self) == wg_z.id;
2730
2731 if (options.vulkan_semantics && is_workgroup_size_constant)
2732 {
2733 // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2734 return;
2735 }
2736 else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2737 !has_decoration(constant.self, DecorationSpecId))
2738 {
2739 // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2740 return;
2741 }
2742
2743 add_resource_name(constant.self);
2744 auto name = to_name(constant.self);
2745
2746 // Only scalars have constant IDs.
2747 if (has_decoration(constant.self, DecorationSpecId))
2748 {
2749 if (options.vulkan_semantics)
2750 {
2751 statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2752 variable_decl(type, name), " = ", constant_expression(constant), ";");
2753 }
2754 else
2755 {
2756 const string &macro_name = constant.specialization_constant_macro_name;
2757 statement("#ifndef ", macro_name);
2758 statement("#define ", macro_name, " ", constant_expression(constant));
2759 statement("#endif");
2760
2761 // For workgroup size constants, only emit the macros.
2762 if (!is_workgroup_size_constant)
2763 statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2764 }
2765 }
2766 else
2767 {
2768 statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2769 }
2770}
2771
2772void CompilerGLSL::emit_entry_point_declarations()
2773{
2774}
2775
2776void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2777{
2778 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2779 if (is_hidden_variable(var))
2780 return;
2781
2782 auto *meta = ir.find_meta(var.self);
2783 if (!meta)
2784 return;
2785
2786 auto &m = meta->decoration;
2787 if (keywords.find(m.alias) != end(keywords))
2788 m.alias = join("_", m.alias);
2789 });
2790
2791 ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
2792 auto *meta = ir.find_meta(func.self);
2793 if (!meta)
2794 return;
2795
2796 auto &m = meta->decoration;
2797 if (keywords.find(m.alias) != end(keywords))
2798 m.alias = join("_", m.alias);
2799 });
2800
2801 ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
2802 auto *meta = ir.find_meta(type.self);
2803 if (!meta)
2804 return;
2805
2806 auto &m = meta->decoration;
2807 if (keywords.find(m.alias) != end(keywords))
2808 m.alias = join("_", m.alias);
2809
2810 for (auto &memb : meta->members)
2811 if (keywords.find(memb.alias) != end(keywords))
2812 memb.alias = join("_", memb.alias);
2813 });
2814}
2815
2816void CompilerGLSL::replace_illegal_names()
2817{
2818 // clang-format off
2819 static const unordered_set<string> keywords = {
2820 "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2821 "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2822 "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2823 "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2824 "ceil", "cos", "cosh", "cross", "degrees",
2825 "dFdx", "dFdxCoarse", "dFdxFine",
2826 "dFdy", "dFdyCoarse", "dFdyFine",
2827 "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2828 "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2829 "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2830 "greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2831 "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2832 "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2833 "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2834 "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2835 "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2836 "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2837 "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2838 "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2839 "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2840 "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2841 "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2842 "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2843 "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2844
2845 "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2846 "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2847 "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2848 "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2849 "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2850 "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2851 "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2852 "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2853 "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2854 "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2855 "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2856 "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2857 "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2858 "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2859 "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2860 "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2861 "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2862 "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2863 "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2864 "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2865 "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2866 "while", "writeonly",
2867 };
2868 // clang-format on
2869
2870 replace_illegal_names(keywords);
2871}
2872
2873void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2874{
2875 auto &m = ir.meta[var.self].decoration;
2876 uint32_t location = 0;
2877 if (m.decoration_flags.get(DecorationLocation))
2878 location = m.location;
2879
2880 // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2881 // do the access chain part of this for us.
2882 auto &type = get<SPIRType>(var.basetype);
2883
2884 if (type.array.empty())
2885 {
2886 // Redirect the write to a specific render target in legacy GLSL.
2887 m.alias = join("gl_FragData[", location, "]");
2888
2889 if (is_legacy_es() && location != 0)
2890 require_extension_internal("GL_EXT_draw_buffers");
2891 }
2892 else if (type.array.size() == 1)
2893 {
2894 // If location is non-zero, we probably have to add an offset.
2895 // This gets really tricky since we'd have to inject an offset in the access chain.
2896 // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2897 m.alias = "gl_FragData";
2898 if (location != 0)
2899 SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2900 "This is unimplemented in SPIRV-Cross.");
2901
2902 if (is_legacy_es())
2903 require_extension_internal("GL_EXT_draw_buffers");
2904 }
2905 else
2906 SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2907
2908 var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2909}
2910
2911void CompilerGLSL::replace_fragment_outputs()
2912{
2913 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2914 auto &type = this->get<SPIRType>(var.basetype);
2915
2916 if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2917 replace_fragment_output(var);
2918 });
2919}
2920
2921string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2922{
2923 if (out_type.vecsize == input_components)
2924 return expr;
2925 else if (input_components == 1 && !backend.can_swizzle_scalar)
2926 return join(type_to_glsl(out_type), "(", expr, ")");
2927 else
2928 {
2929 // FIXME: This will not work with packed expressions.
2930 auto e = enclose_expression(expr) + ".";
2931 // Just clamp the swizzle index if we have more outputs than inputs.
2932 for (uint32_t c = 0; c < out_type.vecsize; c++)
2933 e += index_to_swizzle(min(c, input_components - 1));
2934 if (backend.swizzle_is_function && out_type.vecsize > 1)
2935 e += "()";
2936
2937 remove_duplicate_swizzle(e);
2938 return e;
2939 }
2940}
2941
2942void CompilerGLSL::emit_pls()
2943{
2944 auto &execution = get_entry_point();
2945 if (execution.model != ExecutionModelFragment)
2946 SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2947
2948 if (!options.es)
2949 SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2950
2951 if (options.version < 300)
2952 SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2953
2954 if (!pls_inputs.empty())
2955 {
2956 statement("__pixel_local_inEXT _PLSIn");
2957 begin_scope();
2958 for (auto &input : pls_inputs)
2959 statement(pls_decl(input), ";");
2960 end_scope_decl();
2961 statement("");
2962 }
2963
2964 if (!pls_outputs.empty())
2965 {
2966 statement("__pixel_local_outEXT _PLSOut");
2967 begin_scope();
2968 for (auto &output : pls_outputs)
2969 statement(pls_decl(output), ";");
2970 end_scope_decl();
2971 statement("");
2972 }
2973}
2974
2975void CompilerGLSL::fixup_image_load_store_access()
2976{
2977 if (!options.enable_storage_image_qualifier_deduction)
2978 return;
2979
2980 ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2981 auto &vartype = expression_type(var);
2982 if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2)
2983 {
2984 // Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
2985 // Solve this by making the image access as restricted as possible and loosen up if we need to.
2986 // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2987
2988 auto &flags = ir.meta[var].decoration.decoration_flags;
2989 if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2990 {
2991 flags.set(DecorationNonWritable);
2992 flags.set(DecorationNonReadable);
2993 }
2994 }
2995 });
2996}
2997
2998static bool is_block_builtin(BuiltIn builtin)
2999{
3000 return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance ||
3001 builtin == BuiltInCullDistance;
3002}
3003
3004bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
3005{
3006 // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
3007
3008 if (storage != StorageClassOutput)
3009 return false;
3010 bool should_force = false;
3011
3012 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3013 if (should_force)
3014 return;
3015
3016 auto &type = this->get<SPIRType>(var.basetype);
3017 bool block = has_decoration(type.self, DecorationBlock);
3018 if (var.storage == storage && block && is_builtin_variable(var))
3019 {
3020 uint32_t member_count = uint32_t(type.member_types.size());
3021 for (uint32_t i = 0; i < member_count; i++)
3022 {
3023 if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
3024 is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
3025 has_member_decoration(type.self, i, DecorationOffset))
3026 {
3027 should_force = true;
3028 }
3029 }
3030 }
3031 else if (var.storage == storage && !block && is_builtin_variable(var))
3032 {
3033 if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
3034 has_decoration(var.self, DecorationOffset))
3035 {
3036 should_force = true;
3037 }
3038 }
3039 });
3040
3041 // If we're declaring clip/cull planes with control points we need to force block declaration.
3042 if (get_execution_model() == ExecutionModelTessellationControl &&
3043 (clip_distance_count || cull_distance_count))
3044 {
3045 should_force = true;
3046 }
3047
3048 return should_force;
3049}
3050
3051void CompilerGLSL::fixup_implicit_builtin_block_names()
3052{
3053 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3054 auto &type = this->get<SPIRType>(var.basetype);
3055 bool block = has_decoration(type.self, DecorationBlock);
3056 if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block &&
3057 is_builtin_variable(var))
3058 {
3059 // Make sure the array has a supported name in the code.
3060 if (var.storage == StorageClassOutput)
3061 set_name(var.self, "gl_out");
3062 else if (var.storage == StorageClassInput)
3063 set_name(var.self, "gl_in");
3064 }
3065 });
3066}
3067
3068void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
3069{
3070 Bitset emitted_builtins;
3071 Bitset global_builtins;
3072 const SPIRVariable *block_var = nullptr;
3073 bool emitted_block = false;
3074 bool builtin_array = false;
3075
3076 // Need to use declared size in the type.
3077 // These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3078 uint32_t cull_distance_size = 0;
3079 uint32_t clip_distance_size = 0;
3080
3081 bool have_xfb_buffer_stride = false;
3082 bool have_geom_stream = false;
3083 bool have_any_xfb_offset = false;
3084 uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0;
3085 std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3086
3087 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3088 auto &type = this->get<SPIRType>(var.basetype);
3089 bool block = has_decoration(type.self, DecorationBlock);
3090 Bitset builtins;
3091
3092 if (var.storage == storage && block && is_builtin_variable(var))
3093 {
3094 uint32_t index = 0;
3095 for (auto &m : ir.meta[type.self].members)
3096 {
3097 if (m.builtin)
3098 {
3099 builtins.set(m.builtin_type);
3100 if (m.builtin_type == BuiltInCullDistance)
3101 cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
3102 else if (m.builtin_type == BuiltInClipDistance)
3103 clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index]));
3104
3105 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
3106 {
3107 have_any_xfb_offset = true;
3108 builtin_xfb_offsets[m.builtin_type] = m.offset;
3109 }
3110
3111 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3112 {
3113 uint32_t stream = m.stream;
3114 if (have_geom_stream && geom_stream != stream)
3115 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3116 have_geom_stream = true;
3117 geom_stream = stream;
3118 }
3119 }
3120 index++;
3121 }
3122
3123 if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
3124 has_decoration(var.self, DecorationXfbStride))
3125 {
3126 uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
3127 uint32_t stride = get_decoration(var.self, DecorationXfbStride);
3128 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3129 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3130 if (have_xfb_buffer_stride && stride != xfb_stride)
3131 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3132 have_xfb_buffer_stride = true;
3133 xfb_buffer = buffer_index;
3134 xfb_stride = stride;
3135 }
3136
3137 if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
3138 {
3139 uint32_t stream = get_decoration(var.self, DecorationStream);
3140 if (have_geom_stream && geom_stream != stream)
3141 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3142 have_geom_stream = true;
3143 geom_stream = stream;
3144 }
3145 }
3146 else if (var.storage == storage && !block && is_builtin_variable(var))
3147 {
3148 // While we're at it, collect all declared global builtins (HLSL mostly ...).
3149 auto &m = ir.meta[var.self].decoration;
3150 if (m.builtin)
3151 {
3152 global_builtins.set(m.builtin_type);
3153 if (m.builtin_type == BuiltInCullDistance)
3154 cull_distance_size = to_array_size_literal(type);
3155 else if (m.builtin_type == BuiltInClipDistance)
3156 clip_distance_size = to_array_size_literal(type);
3157
3158 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
3159 m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
3160 {
3161 have_any_xfb_offset = true;
3162 builtin_xfb_offsets[m.builtin_type] = m.offset;
3163 uint32_t buffer_index = m.xfb_buffer;
3164 uint32_t stride = m.xfb_stride;
3165 if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3166 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3167 if (have_xfb_buffer_stride && stride != xfb_stride)
3168 SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3169 have_xfb_buffer_stride = true;
3170 xfb_buffer = buffer_index;
3171 xfb_stride = stride;
3172 }
3173
3174 if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3175 {
3176 uint32_t stream = get_decoration(var.self, DecorationStream);
3177 if (have_geom_stream && geom_stream != stream)
3178 SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3179 have_geom_stream = true;
3180 geom_stream = stream;
3181 }
3182 }
3183 }
3184
3185 if (builtins.empty())
3186 return;
3187
3188 if (emitted_block)
3189 SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3190
3191 emitted_builtins = builtins;
3192 emitted_block = true;
3193 builtin_array = !type.array.empty();
3194 block_var = &var;
3195 });
3196
3197 global_builtins =
3198 Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
3199 (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
3200
3201 // Try to collect all other declared builtins.
3202 if (!emitted_block)
3203 emitted_builtins = global_builtins;
3204
3205 // Can't declare an empty interface block.
3206 if (emitted_builtins.empty())
3207 return;
3208
3209 if (storage == StorageClassOutput)
3210 {
3211 SmallVector<string> attr;
3212 if (have_xfb_buffer_stride && have_any_xfb_offset)
3213 {
3214 if (!options.es)
3215 {
3216 if (options.version < 440 && options.version >= 140)
3217 require_extension_internal("GL_ARB_enhanced_layouts");
3218 else if (options.version < 140)
3219 SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3220 if (!options.es && options.version < 440)
3221 require_extension_internal("GL_ARB_enhanced_layouts");
3222 }
3223 else if (options.es)
3224 SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3225 attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
3226 }
3227
3228 if (have_geom_stream)
3229 {
3230 if (get_execution_model() != ExecutionModelGeometry)
3231 SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3232 if (options.es)
3233 SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3234 if (options.version < 400)
3235 require_extension_internal("GL_ARB_transform_feedback3");
3236 attr.push_back(join("stream = ", geom_stream));
3237 }
3238
3239 if (!attr.empty())
3240 statement("layout(", merge(attr), ") out gl_PerVertex");
3241 else
3242 statement("out gl_PerVertex");
3243 }
3244 else
3245 {
3246 // If we have passthrough, there is no way PerVertex cannot be passthrough.
3247 if (get_entry_point().geometry_passthrough)
3248 statement("layout(passthrough) in gl_PerVertex");
3249 else
3250 statement("in gl_PerVertex");
3251 }
3252
3253 begin_scope();
3254 if (emitted_builtins.get(BuiltInPosition))
3255 {
3256 auto itr = builtin_xfb_offsets.find(BuiltInPosition);
3257 if (itr != end(builtin_xfb_offsets))
3258 statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;");
3259 else
3260 statement("vec4 gl_Position;");
3261 }
3262
3263 if (emitted_builtins.get(BuiltInPointSize))
3264 {
3265 auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
3266 if (itr != end(builtin_xfb_offsets))
3267 statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;");
3268 else
3269 statement("float gl_PointSize;");
3270 }
3271
3272 if (emitted_builtins.get(BuiltInClipDistance))
3273 {
3274 auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
3275 if (itr != end(builtin_xfb_offsets))
3276 statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];");
3277 else
3278 statement("float gl_ClipDistance[", clip_distance_size, "];");
3279 }
3280
3281 if (emitted_builtins.get(BuiltInCullDistance))
3282 {
3283 auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
3284 if (itr != end(builtin_xfb_offsets))
3285 statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];");
3286 else
3287 statement("float gl_CullDistance[", cull_distance_size, "];");
3288 }
3289
3290 if (builtin_array)
3291 {
3292 if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3293 end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
3294 else
3295 end_scope_decl(join(to_name(block_var->self), "[]"));
3296 }
3297 else
3298 end_scope_decl();
3299 statement("");
3300}
3301
3302void CompilerGLSL::declare_undefined_values()
3303{
3304 bool emitted = false;
3305 ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
3306 auto &type = this->get<SPIRType>(undef.basetype);
3307 // OpUndef can be void for some reason ...
3308 if (type.basetype == SPIRType::Void)
3309 return;
3310
3311 string initializer;
3312 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3313 initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
3314
3315 statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
3316 emitted = true;
3317 });
3318
3319 if (emitted)
3320 statement("");
3321}
3322
3323bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3324{
3325 bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable;
3326
3327 if (statically_assigned)
3328 {
3329 auto *constant = maybe_get<SPIRConstant>(var.static_expression);
3330 if (constant && constant->is_used_as_lut)
3331 return true;
3332 }
3333
3334 return false;
3335}
3336
3337void CompilerGLSL::emit_resources()
3338{
3339 auto &execution = get_entry_point();
3340
3341 replace_illegal_names();
3342
3343 // Legacy GL uses gl_FragData[], redeclare all fragment outputs
3344 // with builtins.
3345 if (execution.model == ExecutionModelFragment && is_legacy())
3346 replace_fragment_outputs();
3347
3348 // Emit PLS blocks if we have such variables.
3349 if (!pls_inputs.empty() || !pls_outputs.empty())
3350 emit_pls();
3351
3352 switch (execution.model)
3353 {
3354 case ExecutionModelGeometry:
3355 case ExecutionModelTessellationControl:
3356 case ExecutionModelTessellationEvaluation:
3357 fixup_implicit_builtin_block_names();
3358 break;
3359
3360 default:
3361 break;
3362 }
3363
3364 // Emit custom gl_PerVertex for SSO compatibility.
3365 if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3366 {
3367 switch (execution.model)
3368 {
3369 case ExecutionModelGeometry:
3370 case ExecutionModelTessellationControl:
3371 case ExecutionModelTessellationEvaluation:
3372 emit_declared_builtin_block(StorageClassInput, execution.model);
3373 emit_declared_builtin_block(StorageClassOutput, execution.model);
3374 break;
3375
3376 case ExecutionModelVertex:
3377 emit_declared_builtin_block(StorageClassOutput, execution.model);
3378 break;
3379
3380 default:
3381 break;
3382 }
3383 }
3384 else if (should_force_emit_builtin_block(StorageClassOutput))
3385 {
3386 emit_declared_builtin_block(StorageClassOutput, execution.model);
3387 }
3388 else if (execution.geometry_passthrough)
3389 {
3390 // Need to declare gl_in with Passthrough.
3391 // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3392 emit_declared_builtin_block(StorageClassInput, execution.model);
3393 }
3394 else
3395 {
3396 // Need to redeclare clip/cull distance with explicit size to use them.
3397 // SPIR-V mandates these builtins have a size declared.
3398 const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3399 if (clip_distance_count != 0)
3400 statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
3401 if (cull_distance_count != 0)
3402 statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
3403 if (clip_distance_count != 0 || cull_distance_count != 0)
3404 statement("");
3405 }
3406
3407 if (position_invariant)
3408 {
3409 statement("invariant gl_Position;");
3410 statement("");
3411 }
3412
3413 bool emitted = false;
3414
3415 // If emitted Vulkan GLSL,
3416 // emit specialization constants as actual floats,
3417 // spec op expressions will redirect to the constant name.
3418 //
3419 {
3420 auto loop_lock = ir.create_loop_hard_lock();
3421 for (auto &id_ : ir.ids_for_constant_or_type)
3422 {
3423 auto &id = ir.ids[id_];
3424
3425 if (id.get_type() == TypeConstant)
3426 {
3427 auto &c = id.get<SPIRConstant>();
3428
3429 bool needs_declaration = c.specialization || c.is_used_as_lut;
3430
3431 if (needs_declaration)
3432 {
3433 if (!options.vulkan_semantics && c.specialization)
3434 {
3435 c.specialization_constant_macro_name =
3436 constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
3437 }
3438 emit_constant(c);
3439 emitted = true;
3440 }
3441 }
3442 else if (id.get_type() == TypeConstantOp)
3443 {
3444 emit_specialization_constant_op(id.get<SPIRConstantOp>());
3445 emitted = true;
3446 }
3447 else if (id.get_type() == TypeType)
3448 {
3449 auto *type = &id.get<SPIRType>();
3450
3451 bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3452 (!has_decoration(type->self, DecorationBlock) &&
3453 !has_decoration(type->self, DecorationBufferBlock));
3454
3455 // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3456 if (type->basetype == SPIRType::Struct && type->pointer &&
3457 has_decoration(type->self, DecorationBlock) &&
3458 (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR ||
3459 type->storage == StorageClassHitAttributeKHR))
3460 {
3461 type = &get<SPIRType>(type->parent_type);
3462 is_natural_struct = true;
3463 }
3464
3465 if (is_natural_struct)
3466 {
3467 if (emitted)
3468 statement("");
3469 emitted = false;
3470
3471 emit_struct(*type);
3472 }
3473 }
3474 }
3475 }
3476
3477 if (emitted)
3478 statement("");
3479
3480 // If we needed to declare work group size late, check here.
3481 // If the work group size depends on a specialization constant, we need to declare the layout() block
3482 // after constants (and their macros) have been declared.
3483 if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3484 (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)))
3485 {
3486 SpecializationConstant wg_x, wg_y, wg_z;
3487 get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
3488
3489 if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0)))
3490 {
3491 SmallVector<string> inputs;
3492 build_workgroup_size(inputs, wg_x, wg_y, wg_z);
3493 statement("layout(", merge(inputs), ") in;");
3494 statement("");
3495 }
3496 }
3497
3498 emitted = false;
3499
3500 if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3501 {
3502 for (auto type : physical_storage_non_block_pointer_types)
3503 {
3504 emit_buffer_reference_block(type, false);
3505 }
3506
3507 // Output buffer reference blocks.
3508 // Do this in two stages, one with forward declaration,
3509 // and one without. Buffer reference blocks can reference themselves
3510 // to support things like linked lists.
3511 ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
3512 if (type.basetype == SPIRType::Struct && type.pointer &&
3513 type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3514 type.storage == StorageClassPhysicalStorageBufferEXT)
3515 {
3516 emit_buffer_reference_block(self, true);
3517 }
3518 });
3519
3520 ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
3521 if (type.basetype == SPIRType::Struct &&
3522 type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) &&
3523 type.storage == StorageClassPhysicalStorageBufferEXT)
3524 {
3525 emit_buffer_reference_block(self, false);
3526 }
3527 });
3528 }
3529
3530 // Output UBOs and SSBOs
3531 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3532 auto &type = this->get<SPIRType>(var.basetype);
3533
3534 bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform ||
3535 type.storage == StorageClassShaderRecordBufferKHR;
3536 bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
3537 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
3538
3539 if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3540 has_block_flags)
3541 {
3542 emit_buffer_block(var);
3543 }
3544 });
3545
3546 // Output push constant blocks
3547 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3548 auto &type = this->get<SPIRType>(var.basetype);
3549 if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3550 !is_hidden_variable(var))
3551 {
3552 emit_push_constant_block(var);
3553 }
3554 });
3555
3556 bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
3557
3558 // Output Uniform Constants (values, samplers, images, etc).
3559 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3560 auto &type = this->get<SPIRType>(var.basetype);
3561
3562 // If we're remapping separate samplers and images, only emit the combined samplers.
3563 if (skip_separate_image_sampler)
3564 {
3565 // Sampler buffers are always used without a sampler, and they will also work in regular GL.
3566 bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3567 bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
3568 bool separate_sampler = type.basetype == SPIRType::Sampler;
3569 if (!sampler_buffer && (separate_image || separate_sampler))
3570 return;
3571 }
3572
3573 if (var.storage != StorageClassFunction && type.pointer &&
3574 (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter ||
3575 type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR ||
3576 type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR ||
3577 type.storage == StorageClassHitAttributeKHR) &&
3578 !is_hidden_variable(var))
3579 {
3580 emit_uniform(var);
3581 emitted = true;
3582 }
3583 });
3584
3585 if (emitted)
3586 statement("");
3587 emitted = false;
3588
3589 bool emitted_base_instance = false;
3590
3591 // Output in/out interfaces.
3592 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3593 auto &type = this->get<SPIRType>(var.basetype);
3594
3595 bool is_hidden = is_hidden_variable(var);
3596
3597 // Unused output I/O variables might still be required to implement framebuffer fetch.
3598 if (var.storage == StorageClassOutput && !is_legacy() &&
3599 location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0)
3600 {
3601 is_hidden = false;
3602 }
3603
3604 if (var.storage != StorageClassFunction && type.pointer &&
3605 (var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
3606 interface_variable_exists_in_entry_point(var.self) && !is_hidden)
3607 {
3608 if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
3609 type.array.size() == 1)
3610 {
3611 SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
3612 }
3613 emit_interface_block(var);
3614 emitted = true;
3615 }
3616 else if (is_builtin_variable(var))
3617 {
3618 auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
3619 // For gl_InstanceIndex emulation on GLES, the API user needs to
3620 // supply this uniform.
3621
3622 // The draw parameter extension is soft-enabled on GL with some fallbacks.
3623 if (!options.vulkan_semantics)
3624 {
3625 if (!emitted_base_instance &&
3626 ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) ||
3627 (builtin == BuiltInBaseInstance)))
3628 {
3629 statement("#ifdef GL_ARB_shader_draw_parameters");
3630 statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3631 statement("#else");
3632 // A crude, but simple workaround which should be good enough for non-indirect draws.
3633 statement("uniform int SPIRV_Cross_BaseInstance;");
3634 statement("#endif");
3635 emitted = true;
3636 emitted_base_instance = true;
3637 }
3638 else if (builtin == BuiltInBaseVertex)
3639 {
3640 statement("#ifdef GL_ARB_shader_draw_parameters");
3641 statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3642 statement("#else");
3643 // A crude, but simple workaround which should be good enough for non-indirect draws.
3644 statement("uniform int SPIRV_Cross_BaseVertex;");
3645 statement("#endif");
3646 }
3647 else if (builtin == BuiltInDrawIndex)
3648 {
3649 statement("#ifndef GL_ARB_shader_draw_parameters");
3650 // Cannot really be worked around.
3651 statement("#error GL_ARB_shader_draw_parameters is not supported.");
3652 statement("#endif");
3653 }
3654 }
3655 }
3656 });
3657
3658 // Global variables.
3659 for (auto global : global_variables)
3660 {
3661 auto &var = get<SPIRVariable>(global);
3662 if (is_hidden_variable(var, true))
3663 continue;
3664
3665 if (var.storage != StorageClassOutput)
3666 {
3667 if (!variable_is_lut(var))
3668 {
3669 add_resource_name(var.self);
3670
3671 string initializer;
3672 if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3673 !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
3674 {
3675 initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
3676 }
3677
3678 statement(variable_decl(var), initializer, ";");
3679 emitted = true;
3680 }
3681 }
3682 else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
3683 {
3684 emit_output_variable_initializer(var);
3685 }
3686 }
3687
3688 if (emitted)
3689 statement("");
3690
3691 declare_undefined_values();
3692}
3693
3694void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3695{
3696 // If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3697 auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
3698 auto &type = get<SPIRType>(var.basetype);
3699 bool is_patch = has_decoration(var.self, DecorationPatch);
3700 bool is_block = has_decoration(type.self, DecorationBlock);
3701 bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3702
3703 if (is_block)
3704 {
3705 uint32_t member_count = uint32_t(type.member_types.size());
3706 bool type_is_array = type.array.size() == 1;
3707 uint32_t array_size = 1;
3708 if (type_is_array)
3709 array_size = to_array_size_literal(type);
3710 uint32_t iteration_count = is_control_point ? 1 : array_size;
3711
3712 // If the initializer is a block, we must initialize each block member one at a time.
3713 for (uint32_t i = 0; i < member_count; i++)
3714 {
3715 // These outputs might not have been properly declared, so don't initialize them in that case.
3716 if (has_member_decoration(type.self, i, DecorationBuiltIn))
3717 {
3718 if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
3719 !cull_distance_count)
3720 continue;
3721
3722 if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
3723 !clip_distance_count)
3724 continue;
3725 }
3726
3727 // We need to build a per-member array first, essentially transposing from AoS to SoA.
3728 // This code path hits when we have an array of blocks.
3729 string lut_name;
3730 if (type_is_array)
3731 {
3732 lut_name = join("_", var.self, "_", i, "_init");
3733 uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i];
3734 auto &member_type = get<SPIRType>(member_type_id);
3735 auto array_type = member_type;
3736 array_type.parent_type = member_type_id;
3737 array_type.array.push_back(array_size);
3738 array_type.array_size_literal.push_back(true);
3739
3740 SmallVector<string> exprs;
3741 exprs.reserve(array_size);
3742 auto &c = get<SPIRConstant>(var.initializer);
3743 for (uint32_t j = 0; j < array_size; j++)
3744 exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i]));
3745 statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
3746 type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
3747 }
3748
3749 for (uint32_t j = 0; j < iteration_count; j++)
3750 {
3751 entry_func.fixup_hooks_in.push_back([=, &var]() {
3752 AccessChainMeta meta;
3753 auto &c = this->get<SPIRConstant>(var.initializer);
3754
3755 uint32_t invocation_id = 0;
3756 uint32_t member_index_id = 0;
3757 if (is_control_point)
3758 {
3759 uint32_t ids = ir.increase_bound_by(3);
3760 SPIRType uint_type;
3761 uint_type.basetype = SPIRType::UInt;
3762 uint_type.width = 32;
3763 set<SPIRType>(ids, uint_type);
3764 set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
3765 set<SPIRConstant>(ids + 2, ids, i, false);
3766 invocation_id = ids + 1;
3767 member_index_id = ids + 2;
3768 }
3769
3770 if (is_patch)
3771 {
3772 statement("if (gl_InvocationID == 0)");
3773 begin_scope();
3774 }
3775
3776 if (type_is_array && !is_control_point)
3777 {
3778 uint32_t indices[2] = { j, i };
3779 auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3780 statement(chain, " = ", lut_name, "[", j, "];");
3781 }
3782 else if (is_control_point)
3783 {
3784 uint32_t indices[2] = { invocation_id, member_index_id };
3785 auto chain = access_chain_internal(var.self, indices, 2, 0, &meta);
3786 statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
3787 }
3788 else
3789 {
3790 auto chain =
3791 access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3792 statement(chain, " = ", to_expression(c.subconstants[i]), ";");
3793 }
3794
3795 if (is_patch)
3796 end_scope();
3797 });
3798 }
3799 }
3800 }
3801 else if (is_control_point)
3802 {
3803 auto lut_name = join("_", var.self, "_init");
3804 statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
3805 " = ", to_expression(var.initializer), ";");
3806 entry_func.fixup_hooks_in.push_back([&, lut_name]() {
3807 statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
3808 });
3809 }
3810 else if (has_decoration(var.self, DecorationBuiltIn) &&
3811 BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
3812 {
3813 // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
3814 entry_func.fixup_hooks_in.push_back([&] {
3815 auto &c = this->get<SPIRConstant>(var.initializer);
3816 uint32_t num_constants = uint32_t(c.subconstants.size());
3817 for (uint32_t i = 0; i < num_constants; i++)
3818 {
3819 // Don't use to_expression on constant since it might be uint, just fish out the raw int.
3820 statement(to_expression(var.self), "[", i, "] = ",
3821 convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";");
3822 }
3823 });
3824 }
3825 else
3826 {
3827 auto lut_name = join("_", var.self, "_init");
3828 statement("const ", type_to_glsl(type), " ", lut_name,
3829 type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
3830 entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
3831 if (is_patch)
3832 {
3833 statement("if (gl_InvocationID == 0)");
3834 begin_scope();
3835 }
3836 statement(to_expression(var.self), " = ", lut_name, ";");
3837 if (is_patch)
3838 end_scope();
3839 });
3840 }
3841}
3842
3843void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3844{
3845 static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
3846 "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
3847
3848 if (!options.vulkan_semantics)
3849 {
3850 using Supp = ShaderSubgroupSupportHelper;
3851 auto result = shader_subgroup_supporter.resolve();
3852
3853 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
3854 {
3855 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
3856
3857 for (auto &e : exts)
3858 {
3859 const char *name = Supp::get_extension_name(e);
3860 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3861
3862 switch (e)
3863 {
3864 case Supp::NV_shader_thread_group:
3865 statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3866 statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3867 statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3868 statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3869 statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3870 break;
3871 case Supp::ARB_shader_ballot:
3872 statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3873 statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3874 statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3875 statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3876 statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3877 break;
3878 default:
3879 break;
3880 }
3881 }
3882 statement("#endif");
3883 statement("");
3884 }
3885
3886 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
3887 {
3888 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
3889
3890 for (auto &e : exts)
3891 {
3892 const char *name = Supp::get_extension_name(e);
3893 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3894
3895 switch (e)
3896 {
3897 case Supp::NV_shader_thread_group:
3898 statement("#define gl_SubgroupSize gl_WarpSizeNV");
3899 break;
3900 case Supp::ARB_shader_ballot:
3901 statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
3902 break;
3903 case Supp::AMD_gcn_shader:
3904 statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3905 break;
3906 default:
3907 break;
3908 }
3909 }
3910 statement("#endif");
3911 statement("");
3912 }
3913
3914 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
3915 {
3916 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
3917
3918 for (auto &e : exts)
3919 {
3920 const char *name = Supp::get_extension_name(e);
3921 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3922
3923 switch (e)
3924 {
3925 case Supp::NV_shader_thread_group:
3926 statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3927 break;
3928 case Supp::ARB_shader_ballot:
3929 statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3930 break;
3931 default:
3932 break;
3933 }
3934 }
3935 statement("#endif");
3936 statement("");
3937 }
3938
3939 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
3940 {
3941 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
3942
3943 for (auto &e : exts)
3944 {
3945 const char *name = Supp::get_extension_name(e);
3946 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3947
3948 switch (e)
3949 {
3950 case Supp::NV_shader_thread_group:
3951 statement("#define gl_SubgroupID gl_WarpIDNV");
3952 break;
3953 default:
3954 break;
3955 }
3956 }
3957 statement("#endif");
3958 statement("");
3959 }
3960
3961 if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
3962 {
3963 auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
3964
3965 for (auto &e : exts)
3966 {
3967 const char *name = Supp::get_extension_name(e);
3968 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3969
3970 switch (e)
3971 {
3972 case Supp::NV_shader_thread_group:
3973 statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
3974 break;
3975 default:
3976 break;
3977 }
3978 }
3979 statement("#endif");
3980 statement("");
3981 }
3982
3983 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
3984 {
3985 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
3986
3987 for (auto &e : exts)
3988 {
3989 const char *name = Supp::get_extension_name(e);
3990 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3991
3992 switch (e)
3993 {
3994 case Supp::NV_shader_thread_shuffle:
3995 for (const char *t : workaround_types)
3996 {
3997 statement(t, " subgroupBroadcastFirst(", t,
3998 " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
3999 }
4000 for (const char *t : workaround_types)
4001 {
4002 statement(t, " subgroupBroadcast(", t,
4003 " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
4004 }
4005 break;
4006 case Supp::ARB_shader_ballot:
4007 for (const char *t : workaround_types)
4008 {
4009 statement(t, " subgroupBroadcastFirst(", t,
4010 " value) { return readFirstInvocationARB(value); }");
4011 }
4012 for (const char *t : workaround_types)
4013 {
4014 statement(t, " subgroupBroadcast(", t,
4015 " value, uint id) { return readInvocationARB(value, id); }");
4016 }
4017 break;
4018 default:
4019 break;
4020 }
4021 }
4022 statement("#endif");
4023 statement("");
4024 }
4025
4026 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
4027 {
4028 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
4029
4030 for (auto &e : exts)
4031 {
4032 const char *name = Supp::get_extension_name(e);
4033 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4034
4035 switch (e)
4036 {
4037 case Supp::NV_shader_thread_group:
4038 statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
4039 statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
4040 break;
4041 default:
4042 break;
4043 }
4044 }
4045 statement("#else");
4046 statement("uint subgroupBallotFindLSB(uvec4 value)");
4047 begin_scope();
4048 statement("int firstLive = findLSB(value.x);");
4049 statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
4050 end_scope();
4051 statement("uint subgroupBallotFindMSB(uvec4 value)");
4052 begin_scope();
4053 statement("int firstLive = findMSB(value.y);");
4054 statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
4055 end_scope();
4056 statement("#endif");
4057 statement("");
4058 }
4059
4060 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
4061 {
4062 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
4063
4064 for (auto &e : exts)
4065 {
4066 const char *name = Supp::get_extension_name(e);
4067 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4068
4069 switch (e)
4070 {
4071 case Supp::NV_gpu_shader_5:
4072 statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
4073 statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
4074 statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
4075 break;
4076 case Supp::ARB_shader_group_vote:
4077 statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
4078 statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
4079 statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
4080 break;
4081 case Supp::AMD_gcn_shader:
4082 statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4083 statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4084 statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || "
4085 "b == ballotAMD(true); }");
4086 break;
4087 default:
4088 break;
4089 }
4090 }
4091 statement("#endif");
4092 statement("");
4093 }
4094
4095 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
4096 {
4097 statement("#ifndef GL_KHR_shader_subgroup_vote");
4098 statement(
4099 "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4100 "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4101 for (const char *t : workaround_types)
4102 statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
4103 statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4104 statement("#endif");
4105 statement("");
4106 }
4107
4108 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
4109 {
4110 auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
4111
4112 for (auto &e : exts)
4113 {
4114 const char *name = Supp::get_extension_name(e);
4115 statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4116
4117 switch (e)
4118 {
4119 case Supp::NV_shader_thread_group:
4120 statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4121 break;
4122 case Supp::ARB_shader_ballot:
4123 statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4124 break;
4125 default:
4126 break;
4127 }
4128 }
4129 statement("#endif");
4130 statement("");
4131 }
4132
4133 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
4134 {
4135 statement("#ifndef GL_KHR_shader_subgroup_basic");
4136 statement("bool subgroupElect()");
4137 begin_scope();
4138 statement("uvec4 activeMask = subgroupBallot(true);");
4139 statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
4140 statement("return gl_SubgroupInvocationID == firstLive;");
4141 end_scope();
4142 statement("#endif");
4143 statement("");
4144 }
4145
4146 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
4147 {
4148 // Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4149 // that subgroup execute in lockstep so this barrier is implicit.
4150 // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4151 // and a specific test of optimizing scans by leveraging lock-step invocation execution,
4152 // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4153 // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4154 statement("#ifndef GL_KHR_shader_subgroup_basic");
4155 statement("void subgroupBarrier() { memoryBarrierShared(); }");
4156 statement("#endif");
4157 statement("");
4158 }
4159
4160 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
4161 {
4162 if (model == spv::ExecutionModelGLCompute)
4163 {
4164 statement("#ifndef GL_KHR_shader_subgroup_basic");
4165 statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4166 statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4167 statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4168 statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4169 statement("#endif");
4170 }
4171 else
4172 {
4173 statement("#ifndef GL_KHR_shader_subgroup_basic");
4174 statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
4175 statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4176 statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4177 statement("#endif");
4178 }
4179 statement("");
4180 }
4181
4182 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4183 {
4184 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4185 statement("bool subgroupInverseBallot(uvec4 value)");
4186 begin_scope();
4187 statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4188 end_scope();
4189
4190 statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
4191 begin_scope();
4192 statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4193 statement("ivec2 c = bitCount(v);");
4194 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4195 statement("return uint(c.x);");
4196 statement_no_indent("#else");
4197 statement("return uint(c.x + c.y);");
4198 statement_no_indent("#endif");
4199 end_scope();
4200
4201 statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
4202 begin_scope();
4203 statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4204 statement("ivec2 c = bitCount(v);");
4205 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4206 statement("return uint(c.x);");
4207 statement_no_indent("#else");
4208 statement("return uint(c.x + c.y);");
4209 statement_no_indent("#endif");
4210 end_scope();
4211 statement("#endif");
4212 statement("");
4213 }
4214
4215 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
4216 {
4217 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4218 statement("uint subgroupBallotBitCount(uvec4 value)");
4219 begin_scope();
4220 statement("ivec2 c = bitCount(value.xy);");
4221 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4222 statement("return uint(c.x);");
4223 statement_no_indent("#else");
4224 statement("return uint(c.x + c.y);");
4225 statement_no_indent("#endif");
4226 end_scope();
4227 statement("#endif");
4228 statement("");
4229 }
4230
4231 if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
4232 {
4233 statement("#ifndef GL_KHR_shader_subgroup_ballot");
4234 statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
4235 begin_scope();
4236 statement_no_indent("#ifdef GL_NV_shader_thread_group");
4237 statement("uint shifted = value.x >> index;");
4238 statement_no_indent("#else");
4239 statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4240 statement_no_indent("#endif");
4241 statement("return (shifted & 1u) != 0u;");
4242 end_scope();
4243 statement("#endif");
4244 statement("");
4245 }
4246 }
4247
4248 if (!workaround_ubo_load_overload_types.empty())
4249 {
4250 for (auto &type_id : workaround_ubo_load_overload_types)
4251 {
4252 auto &type = get<SPIRType>(type_id);
4253 statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
4254 " wrap) { return wrap; }");
4255 }
4256 statement("");
4257 }
4258
4259 if (requires_transpose_2x2)
4260 {
4261 statement("mat2 spvTranspose(mat2 m)");
4262 begin_scope();
4263 statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4264 end_scope();
4265 statement("");
4266 }
4267
4268 if (requires_transpose_3x3)
4269 {
4270 statement("mat3 spvTranspose(mat3 m)");
4271 begin_scope();
4272 statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4273 end_scope();
4274 statement("");
4275 }
4276
4277 if (requires_transpose_4x4)
4278 {
4279 statement("mat4 spvTranspose(mat4 m)");
4280 begin_scope();
4281 statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4282 "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4283 end_scope();
4284 statement("");
4285 }
4286}
4287
4288// Returns a string representation of the ID, usable as a function arg.
4289// Default is to simply return the expression representation fo the arg ID.
4290// Subclasses may override to modify the return value.
4291string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4292{
4293 // Make sure that we use the name of the original variable, and not the parameter alias.
4294 uint32_t name_id = id;
4295 auto *var = maybe_get<SPIRVariable>(id);
4296 if (var && var->basevariable)
4297 name_id = var->basevariable;
4298 return to_expression(name_id);
4299}
4300
4301void CompilerGLSL::handle_invalid_expression(uint32_t id)
4302{
4303 // We tried to read an invalidated expression.
4304 // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
4305 auto res = forced_temporaries.insert(id);
4306
4307 // Forcing new temporaries guarantees forward progress.
4308 if (res.second)
4309 force_recompile_guarantee_forward_progress();
4310 else
4311 force_recompile();
4312}
4313
4314// Converts the format of the current expression from packed to unpacked,
4315// by wrapping the expression in a constructor of the appropriate type.
4316// GLSL does not support packed formats, so simply return the expression.
4317// Subclasses that do will override.
4318string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4319{
4320 return expr_str;
4321}
4322
4323// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
4324void CompilerGLSL::strip_enclosed_expression(string &expr)
4325{
4326 if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
4327 return;
4328
4329 // Have to make sure that our first and last parens actually enclose everything inside it.
4330 uint32_t paren_count = 0;
4331 for (auto &c : expr)
4332 {
4333 if (c == '(')
4334 paren_count++;
4335 else if (c == ')')
4336 {
4337 paren_count--;
4338
4339 // If we hit 0 and this is not the final char, our first and final parens actually don't
4340 // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
4341 if (paren_count == 0 && &c != &expr.back())
4342 return;
4343 }
4344 }
4345 expr.erase(expr.size() - 1, 1);
4346 expr.erase(begin(expr));
4347}
4348
4349string CompilerGLSL::enclose_expression(const string &expr)
4350{
4351 bool need_parens = false;
4352
4353 // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4354 // unary expressions.
4355 if (!expr.empty())
4356 {
4357 auto c = expr.front();
4358 if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*')
4359 need_parens = true;
4360 }
4361
4362 if (!need_parens)
4363 {
4364 uint32_t paren_count = 0;
4365 for (auto c : expr)
4366 {
4367 if (c == '(' || c == '[')
4368 paren_count++;
4369 else if (c == ')' || c == ']')
4370 {
4371 assert(paren_count);
4372 paren_count--;
4373 }
4374 else if (c == ' ' && paren_count == 0)
4375 {
4376 need_parens = true;
4377 break;
4378 }
4379 }
4380 assert(paren_count == 0);
4381 }
4382
4383 // If this expression contains any spaces which are not enclosed by parentheses,
4384 // we need to enclose it so we can treat the whole string as an expression.
4385 // This happens when two expressions have been part of a binary op earlier.
4386 if (need_parens)
4387 return join('(', expr, ')');
4388 else
4389 return expr;
4390}
4391
4392string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4393{
4394 // If this expression starts with an address-of operator ('&'), then
4395 // just return the part after the operator.
4396 // TODO: Strip parens if unnecessary?
4397 if (expr.front() == '&')
4398 return expr.substr(1);
4399 else if (backend.native_pointers)
4400 return join('*', expr);
4401 else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4402 expr_type.pointer_depth == 1)
4403 {
4404 return join(enclose_expression(expr), ".value");
4405 }
4406 else
4407 return expr;
4408}
4409
4410string CompilerGLSL::address_of_expression(const std::string &expr)
4411{
4412 if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')')
4413 {
4414 // If we have an expression which looks like (*foo), taking the address of it is the same as stripping
4415 // the first two and last characters. We might have to enclose the expression.
4416 // This doesn't work for cases like (*foo + 10),
4417 // but this is an r-value expression which we cannot take the address of anyways.
4418 return enclose_expression(expr.substr(2, expr.size() - 3));
4419 }
4420 else if (expr.front() == '*')
4421 {
4422 // If this expression starts with a dereference operator ('*'), then
4423 // just return the part after the operator.
4424 return expr.substr(1);
4425 }
4426 else
4427 return join('&', enclose_expression(expr));
4428}
4429
4430// Just like to_expression except that we enclose the expression inside parentheses if needed.
4431string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4432{
4433 return enclose_expression(to_expression(id, register_expression_read));
4434}
4435
4436// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4437// need_transpose must be forced to false.
4438string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4439{
4440 return unpack_expression_type(to_expression(id), expression_type(id),
4441 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4442 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
4443}
4444
4445string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4446{
4447 // If we need to transpose, it will also take care of unpacking rules.
4448 auto *e = maybe_get<SPIRExpression>(id);
4449 bool need_transpose = e && e->need_transpose;
4450 bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4451 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4452
4453 if (!need_transpose && (is_remapped || is_packed))
4454 {
4455 return unpack_expression_type(to_expression(id, register_expression_read),
4456 get_pointee_type(expression_type_id(id)),
4457 get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4458 has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4459 }
4460 else
4461 return to_expression(id, register_expression_read);
4462}
4463
4464string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4465{
4466 return enclose_expression(to_unpacked_expression(id, register_expression_read));
4467}
4468
4469string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4470{
4471 auto &type = expression_type(id);
4472 if (type.pointer && should_dereference(id))
4473 return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
4474 else
4475 return to_expression(id, register_expression_read);
4476}
4477
4478string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4479{
4480 auto &type = expression_type(id);
4481 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4482 return address_of_expression(to_enclosed_expression(id, register_expression_read));
4483 else
4484 return to_unpacked_expression(id, register_expression_read);
4485}
4486
4487string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4488{
4489 auto &type = expression_type(id);
4490 if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4491 return address_of_expression(to_enclosed_expression(id, register_expression_read));
4492 else
4493 return to_enclosed_unpacked_expression(id, register_expression_read);
4494}
4495
4496string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4497{
4498 auto expr = to_enclosed_expression(id);
4499 if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
4500 return join(expr, "[", index, "]");
4501 else
4502 return join(expr, ".", index_to_swizzle(index));
4503}
4504
4505string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
4506 const uint32_t *chain, uint32_t length)
4507{
4508 // It is kinda silly if application actually enter this path since they know the constant up front.
4509 // It is useful here to extract the plain constant directly.
4510 SPIRConstant tmp;
4511 tmp.constant_type = result_type;
4512 auto &composite_type = get<SPIRType>(c.constant_type);
4513 assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
4514 assert(!c.specialization);
4515
4516 if (is_matrix(composite_type))
4517 {
4518 if (length == 2)
4519 {
4520 tmp.m.c[0].vecsize = 1;
4521 tmp.m.columns = 1;
4522 tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]];
4523 }
4524 else
4525 {
4526 assert(length == 1);
4527 tmp.m.c[0].vecsize = composite_type.vecsize;
4528 tmp.m.columns = 1;
4529 tmp.m.c[0] = c.m.c[chain[0]];
4530 }
4531 }
4532 else
4533 {
4534 assert(length == 1);
4535 tmp.m.c[0].vecsize = 1;
4536 tmp.m.columns = 1;
4537 tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]];
4538 }
4539
4540 return constant_expression(tmp);
4541}
4542
4543string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4544{
4545 uint32_t size = to_array_size_literal(type);
4546 auto &parent = get<SPIRType>(type.parent_type);
4547 string expr = "{ ";
4548
4549 for (uint32_t i = 0; i < size; i++)
4550 {
4551 auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
4552 if (parent.array.empty())
4553 expr += subexpr;
4554 else
4555 expr += to_rerolled_array_expression(subexpr, parent);
4556
4557 if (i + 1 < size)
4558 expr += ", ";
4559 }
4560
4561 expr += " }";
4562 return expr;
4563}
4564
4565string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool block_like_type)
4566{
4567 auto &type = expression_type(id);
4568
4569 bool reroll_array = !type.array.empty() &&
4570 (!backend.array_is_value_type ||
4571 (block_like_type && !backend.array_is_value_type_in_buffer_blocks));
4572
4573 if (reroll_array)
4574 {
4575 // For this case, we need to "re-roll" an array initializer from a temporary.
4576 // We cannot simply pass the array directly, since it decays to a pointer and it cannot
4577 // participate in a struct initializer. E.g.
4578 // float arr[2] = { 1.0, 2.0 };
4579 // Foo foo = { arr }; must be transformed to
4580 // Foo foo = { { arr[0], arr[1] } };
4581 // The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4582
4583 // We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4584 // as temporaries anyways.
4585 return to_rerolled_array_expression(to_enclosed_expression(id), type);
4586 }
4587 else
4588 return to_unpacked_expression(id);
4589}
4590
4591string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
4592{
4593 string expr = to_expression(id);
4594
4595 if (has_decoration(id, DecorationNonUniform))
4596 convert_non_uniform_expression(expr, id);
4597
4598 return expr;
4599}
4600
4601string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4602{
4603 auto itr = invalid_expressions.find(id);
4604 if (itr != end(invalid_expressions))
4605 handle_invalid_expression(id);
4606
4607 if (ir.ids[id].get_type() == TypeExpression)
4608 {
4609 // We might have a more complex chain of dependencies.
4610 // A possible scenario is that we
4611 //
4612 // %1 = OpLoad
4613 // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4614 // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4615 // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4616 // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4617 //
4618 // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4619 // and see that we should not forward reads of the original variable.
4620 auto &expr = get<SPIRExpression>(id);
4621 for (uint32_t dep : expr.expression_dependencies)
4622 if (invalid_expressions.find(dep) != end(invalid_expressions))
4623 handle_invalid_expression(dep);
4624 }
4625
4626 if (register_expression_read)
4627 track_expression_read(id);
4628
4629 switch (ir.ids[id].get_type())
4630 {
4631 case TypeExpression:
4632 {
4633 auto &e = get<SPIRExpression>(id);
4634 if (e.base_expression)
4635 return to_enclosed_expression(e.base_expression) + e.expression;
4636 else if (e.need_transpose)
4637 {
4638 // This should not be reached for access chains, since we always deal explicitly with transpose state
4639 // when consuming an access chain expression.
4640 uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4641 bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4642 return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
4643 is_packed);
4644 }
4645 else if (flattened_structs.count(id))
4646 {
4647 return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
4648 }
4649 else
4650 {
4651 if (is_forcing_recompilation())
4652 {
4653 // During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4654 // Avoid this by returning dummy expressions during this phase.
4655 // Do not use empty expressions here, because those are sentinels for other cases.
4656 return "_";
4657 }
4658 else
4659 return e.expression;
4660 }
4661 }
4662
4663 case TypeConstant:
4664 {
4665 auto &c = get<SPIRConstant>(id);
4666 auto &type = get<SPIRType>(c.constant_type);
4667
4668 // WorkGroupSize may be a constant.
4669 if (has_decoration(c.self, DecorationBuiltIn))
4670 return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric);
4671 else if (c.specialization)
4672 {
4673 if (backend.workgroup_size_is_hidden)
4674 {
4675 int wg_index = get_constant_mapping_to_workgroup_component(c);
4676 if (wg_index >= 0)
4677 {
4678 auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index));
4679 if (type.basetype != SPIRType::UInt)
4680 wg_size = bitcast_expression(type, SPIRType::UInt, wg_size);
4681 return wg_size;
4682 }
4683 }
4684
4685 return to_name(id);
4686 }
4687 else if (c.is_used_as_lut)
4688 return to_name(id);
4689 else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4690 return to_name(id);
4691 else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4692 return to_name(id);
4693 else
4694 return constant_expression(c);
4695 }
4696
4697 case TypeConstantOp:
4698 return to_name(id);
4699
4700 case TypeVariable:
4701 {
4702 auto &var = get<SPIRVariable>(id);
4703 // If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4704 // the variable has not been declared yet.
4705 if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
4706 return to_expression(var.static_expression);
4707 else if (var.deferred_declaration)
4708 {
4709 var.deferred_declaration = false;
4710 return variable_decl(var);
4711 }
4712 else if (flattened_structs.count(id))
4713 {
4714 return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
4715 }
4716 else
4717 {
4718 auto &dec = ir.meta[var.self].decoration;
4719 if (dec.builtin)
4720 return builtin_to_glsl(dec.builtin_type, var.storage);
4721 else
4722 return to_name(id);
4723 }
4724 }
4725
4726 case TypeCombinedImageSampler:
4727 // This type should never be taken the expression of directly.
4728 // The intention is that texture sampling functions will extract the image and samplers
4729 // separately and take their expressions as needed.
4730 // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4731 // expression ala sampler2D(texture, sampler).
4732 SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4733
4734 case TypeAccessChain:
4735 // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4736 SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4737
4738 default:
4739 return to_name(id);
4740 }
4741}
4742
4743string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4744{
4745 auto &type = get<SPIRType>(cop.basetype);
4746 bool binary = false;
4747 bool unary = false;
4748 string op;
4749
4750 if (is_legacy() && is_unsigned_opcode(cop.opcode))
4751 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4752
4753 // TODO: Find a clean way to reuse emit_instruction.
4754 switch (cop.opcode)
4755 {
4756 case OpSConvert:
4757 case OpUConvert:
4758 case OpFConvert:
4759 op = type_to_glsl_constructor(type);
4760 break;
4761
4762#define GLSL_BOP(opname, x) \
4763 case Op##opname: \
4764 binary = true; \
4765 op = x; \
4766 break
4767
4768#define GLSL_UOP(opname, x) \
4769 case Op##opname: \
4770 unary = true; \
4771 op = x; \
4772 break
4773
4774 GLSL_UOP(SNegate, "-");
4775 GLSL_UOP(Not, "~");
4776 GLSL_BOP(IAdd, "+");
4777 GLSL_BOP(ISub, "-");
4778 GLSL_BOP(IMul, "*");
4779 GLSL_BOP(SDiv, "/");
4780 GLSL_BOP(UDiv, "/");
4781 GLSL_BOP(UMod, "%");
4782 GLSL_BOP(SMod, "%");
4783 GLSL_BOP(ShiftRightLogical, ">>");
4784 GLSL_BOP(ShiftRightArithmetic, ">>");
4785 GLSL_BOP(ShiftLeftLogical, "<<");
4786 GLSL_BOP(BitwiseOr, "|");
4787 GLSL_BOP(BitwiseXor, "^");
4788 GLSL_BOP(BitwiseAnd, "&");
4789 GLSL_BOP(LogicalOr, "||");
4790 GLSL_BOP(LogicalAnd, "&&");
4791 GLSL_UOP(LogicalNot, "!");
4792 GLSL_BOP(LogicalEqual, "==");
4793 GLSL_BOP(LogicalNotEqual, "!=");
4794 GLSL_BOP(IEqual, "==");
4795 GLSL_BOP(INotEqual, "!=");
4796 GLSL_BOP(ULessThan, "<");
4797 GLSL_BOP(SLessThan, "<");
4798 GLSL_BOP(ULessThanEqual, "<=");
4799 GLSL_BOP(SLessThanEqual, "<=");
4800 GLSL_BOP(UGreaterThan, ">");
4801 GLSL_BOP(SGreaterThan, ">");
4802 GLSL_BOP(UGreaterThanEqual, ">=");
4803 GLSL_BOP(SGreaterThanEqual, ">=");
4804
4805 case OpSRem:
4806 {
4807 uint32_t op0 = cop.arguments[0];
4808 uint32_t op1 = cop.arguments[1];
4809 return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
4810 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
4811 }
4812
4813 case OpSelect:
4814 {
4815 if (cop.arguments.size() < 3)
4816 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4817
4818 // This one is pretty annoying. It's triggered from
4819 // uint(bool), int(bool) from spec constants.
4820 // In order to preserve its compile-time constness in Vulkan GLSL,
4821 // we need to reduce the OpSelect expression back to this simplified model.
4822 // If we cannot, fail.
4823 if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
4824 {
4825 // Implement as a simple cast down below.
4826 }
4827 else
4828 {
4829 // Implement a ternary and pray the compiler understands it :)
4830 return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]);
4831 }
4832 break;
4833 }
4834
4835 case OpVectorShuffle:
4836 {
4837 string expr = type_to_glsl_constructor(type);
4838 expr += "(";
4839
4840 uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
4841 string left_arg = to_enclosed_expression(cop.arguments[0]);
4842 string right_arg = to_enclosed_expression(cop.arguments[1]);
4843
4844 for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
4845 {
4846 uint32_t index = cop.arguments[i];
4847 if (index >= left_components)
4848 expr += right_arg + "." + "xyzw"[index - left_components];
4849 else
4850 expr += left_arg + "." + "xyzw"[index];
4851
4852 if (i + 1 < uint32_t(cop.arguments.size()))
4853 expr += ", ";
4854 }
4855
4856 expr += ")";
4857 return expr;
4858 }
4859
4860 case OpCompositeExtract:
4861 {
4862 auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1),
4863 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
4864 return expr;
4865 }
4866
4867 case OpCompositeInsert:
4868 SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4869
4870 default:
4871 // Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4872 SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4873 }
4874
4875 uint32_t bit_width = 0;
4876 if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4877 bit_width = expression_type(cop.arguments[0]).width;
4878
4879 SPIRType::BaseType input_type;
4880 bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
4881
4882 switch (cop.opcode)
4883 {
4884 case OpIEqual:
4885 case OpINotEqual:
4886 input_type = to_signed_basetype(bit_width);
4887 break;
4888
4889 case OpSLessThan:
4890 case OpSLessThanEqual:
4891 case OpSGreaterThan:
4892 case OpSGreaterThanEqual:
4893 case OpSMod:
4894 case OpSDiv:
4895 case OpShiftRightArithmetic:
4896 case OpSConvert:
4897 case OpSNegate:
4898 input_type = to_signed_basetype(bit_width);
4899 break;
4900
4901 case OpULessThan:
4902 case OpULessThanEqual:
4903 case OpUGreaterThan:
4904 case OpUGreaterThanEqual:
4905 case OpUMod:
4906 case OpUDiv:
4907 case OpShiftRightLogical:
4908 case OpUConvert:
4909 input_type = to_unsigned_basetype(bit_width);
4910 break;
4911
4912 default:
4913 input_type = type.basetype;
4914 break;
4915 }
4916
4917#undef GLSL_BOP
4918#undef GLSL_UOP
4919 if (binary)
4920 {
4921 if (cop.arguments.size() < 2)
4922 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4923
4924 string cast_op0;
4925 string cast_op1;
4926 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
4927 cop.arguments[1], skip_cast_if_equal_type);
4928
4929 if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
4930 {
4931 expected_type.basetype = input_type;
4932 auto expr = bitcast_glsl_op(type, expected_type);
4933 expr += '(';
4934 expr += join(cast_op0, " ", op, " ", cast_op1);
4935 expr += ')';
4936 return expr;
4937 }
4938 else
4939 return join("(", cast_op0, " ", op, " ", cast_op1, ")");
4940 }
4941 else if (unary)
4942 {
4943 if (cop.arguments.size() < 1)
4944 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4945
4946 // Auto-bitcast to result type as needed.
4947 // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
4948 return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
4949 }
4950 else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert)
4951 {
4952 if (cop.arguments.size() < 1)
4953 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4954
4955 auto &arg_type = expression_type(cop.arguments[0]);
4956 if (arg_type.width < type.width && input_type != arg_type.basetype)
4957 {
4958 auto expected = arg_type;
4959 expected.basetype = input_type;
4960 return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")");
4961 }
4962 else
4963 return join(op, "(", to_expression(cop.arguments[0]), ")");
4964 }
4965 else
4966 {
4967 if (cop.arguments.size() < 1)
4968 SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4969 return join(op, "(", to_expression(cop.arguments[0]), ")");
4970 }
4971}
4972
4973string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope)
4974{
4975 auto &type = get<SPIRType>(c.constant_type);
4976
4977 if (type.pointer)
4978 {
4979 return backend.null_pointer_literal;
4980 }
4981 else if (!c.subconstants.empty())
4982 {
4983 // Handles Arrays and structures.
4984 string res;
4985
4986 // Only consider the decay if we are inside a struct scope.
4987 // Outside a struct declaration, we can always bind to a constant array with templated type.
4988 bool array_type_decays = inside_block_like_struct_scope &&
4989 !type.array.empty() && !backend.array_is_value_type_in_buffer_blocks &&
4990 has_decoration(c.constant_type, DecorationArrayStride);
4991
4992 if (type.array.empty() && type.basetype == SPIRType::Struct && type_is_block_like(type))
4993 inside_block_like_struct_scope = true;
4994
4995 // Allow Metal to use the array<T> template to make arrays a value type
4996 bool needs_trailing_tracket = false;
4997 if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
4998 type.array.empty())
4999 {
5000 res = type_to_glsl_constructor(type) + "{ ";
5001 }
5002 else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
5003 !type.array.empty() && !array_type_decays)
5004 {
5005 res = type_to_glsl_constructor(type) + "({ ";
5006 needs_trailing_tracket = true;
5007 }
5008 else if (backend.use_initializer_list)
5009 {
5010 res = "{ ";
5011 }
5012 else
5013 {
5014 res = type_to_glsl_constructor(type) + "(";
5015 }
5016
5017 for (auto &elem : c.subconstants)
5018 {
5019 auto &subc = get<SPIRConstant>(elem);
5020 if (subc.specialization)
5021 res += to_name(elem);
5022 else
5023 res += constant_expression(subc, inside_block_like_struct_scope);
5024
5025 if (&elem != &c.subconstants.back())
5026 res += ", ";
5027 }
5028
5029 res += backend.use_initializer_list ? " }" : ")";
5030 if (needs_trailing_tracket)
5031 res += ")";
5032
5033 return res;
5034 }
5035 else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0)
5036 {
5037 // Metal tessellation likes empty structs which are then constant expressions.
5038 if (backend.supports_empty_struct)
5039 return "{ }";
5040 else if (backend.use_typed_initializer_list)
5041 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
5042 else if (backend.use_initializer_list)
5043 return "{ 0 }";
5044 else
5045 return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
5046 }
5047 else if (c.columns() == 1)
5048 {
5049 return constant_expression_vector(c, 0);
5050 }
5051 else
5052 {
5053 string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
5054 for (uint32_t col = 0; col < c.columns(); col++)
5055 {
5056 if (c.specialization_constant_id(col) != 0)
5057 res += to_name(c.specialization_constant_id(col));
5058 else
5059 res += constant_expression_vector(c, col);
5060
5061 if (col + 1 < c.columns())
5062 res += ", ";
5063 }
5064 res += ")";
5065 return res;
5066 }
5067}
5068
5069#ifdef _MSC_VER
5070// sprintf warning.
5071// We cannot rely on snprintf existing because, ..., MSVC.
5072#pragma warning(push)
5073#pragma warning(disable : 4996)
5074#endif
5075
5076string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5077{
5078 string res;
5079 float float_value = c.scalar_f16(col, row);
5080
5081 // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
5082 // of complicated workarounds, just value-cast to the half type always.
5083 if (std::isnan(float_value) || std::isinf(float_value))
5084 {
5085 SPIRType type;
5086 type.basetype = SPIRType::Half;
5087 type.vecsize = 1;
5088 type.columns = 1;
5089
5090 if (float_value == numeric_limits<float>::infinity())
5091 res = join(type_to_glsl(type), "(1.0 / 0.0)");
5092 else if (float_value == -numeric_limits<float>::infinity())
5093 res = join(type_to_glsl(type), "(-1.0 / 0.0)");
5094 else if (std::isnan(float_value))
5095 res = join(type_to_glsl(type), "(0.0 / 0.0)");
5096 else
5097 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5098 }
5099 else
5100 {
5101 SPIRType type;
5102 type.basetype = SPIRType::Half;
5103 type.vecsize = 1;
5104 type.columns = 1;
5105 res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
5106 }
5107
5108 return res;
5109}
5110
5111string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5112{
5113 string res;
5114 float float_value = c.scalar_f32(col, row);
5115
5116 if (std::isnan(float_value) || std::isinf(float_value))
5117 {
5118 // Use special representation.
5119 if (!is_legacy())
5120 {
5121 SPIRType out_type;
5122 SPIRType in_type;
5123 out_type.basetype = SPIRType::Float;
5124 in_type.basetype = SPIRType::UInt;
5125 out_type.vecsize = 1;
5126 in_type.vecsize = 1;
5127 out_type.width = 32;
5128 in_type.width = 32;
5129
5130 char print_buffer[32];
5131 sprintf(print_buffer, "0x%xu", c.scalar(col, row));
5132
5133 const char *comment = "inf";
5134 if (float_value == -numeric_limits<float>::infinity())
5135 comment = "-inf";
5136 else if (std::isnan(float_value))
5137 comment = "nan";
5138 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5139 }
5140 else
5141 {
5142 if (float_value == numeric_limits<float>::infinity())
5143 {
5144 if (backend.float_literal_suffix)
5145 res = "(1.0f / 0.0f)";
5146 else
5147 res = "(1.0 / 0.0)";
5148 }
5149 else if (float_value == -numeric_limits<float>::infinity())
5150 {
5151 if (backend.float_literal_suffix)
5152 res = "(-1.0f / 0.0f)";
5153 else
5154 res = "(-1.0 / 0.0)";
5155 }
5156 else if (std::isnan(float_value))
5157 {
5158 if (backend.float_literal_suffix)
5159 res = "(0.0f / 0.0f)";
5160 else
5161 res = "(0.0 / 0.0)";
5162 }
5163 else
5164 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5165 }
5166 }
5167 else
5168 {
5169 res = convert_to_string(float_value, current_locale_radix_character);
5170 if (backend.float_literal_suffix)
5171 res += "f";
5172 }
5173
5174 return res;
5175}
5176
5177std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5178{
5179 string res;
5180 double double_value = c.scalar_f64(col, row);
5181
5182 if (std::isnan(double_value) || std::isinf(double_value))
5183 {
5184 // Use special representation.
5185 if (!is_legacy())
5186 {
5187 SPIRType out_type;
5188 SPIRType in_type;
5189 out_type.basetype = SPIRType::Double;
5190 in_type.basetype = SPIRType::UInt64;
5191 out_type.vecsize = 1;
5192 in_type.vecsize = 1;
5193 out_type.width = 64;
5194 in_type.width = 64;
5195
5196 uint64_t u64_value = c.scalar_u64(col, row);
5197
5198 if (options.es)
5199 SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
5200 require_extension_internal("GL_ARB_gpu_shader_int64");
5201
5202 char print_buffer[64];
5203 sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
5204 backend.long_long_literal_suffix ? "ull" : "ul");
5205
5206 const char *comment = "inf";
5207 if (double_value == -numeric_limits<double>::infinity())
5208 comment = "-inf";
5209 else if (std::isnan(double_value))
5210 comment = "nan";
5211 res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5212 }
5213 else
5214 {
5215 if (options.es)
5216 SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
5217 if (options.version < 400)
5218 require_extension_internal("GL_ARB_gpu_shader_fp64");
5219
5220 if (double_value == numeric_limits<double>::infinity())
5221 {
5222 if (backend.double_literal_suffix)
5223 res = "(1.0lf / 0.0lf)";
5224 else
5225 res = "(1.0 / 0.0)";
5226 }
5227 else if (double_value == -numeric_limits<double>::infinity())
5228 {
5229 if (backend.double_literal_suffix)
5230 res = "(-1.0lf / 0.0lf)";
5231 else
5232 res = "(-1.0 / 0.0)";
5233 }
5234 else if (std::isnan(double_value))
5235 {
5236 if (backend.double_literal_suffix)
5237 res = "(0.0lf / 0.0lf)";
5238 else
5239 res = "(0.0 / 0.0)";
5240 }
5241 else
5242 SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5243 }
5244 }
5245 else
5246 {
5247 res = convert_to_string(double_value, current_locale_radix_character);
5248 if (backend.double_literal_suffix)
5249 res += "lf";
5250 }
5251
5252 return res;
5253}
5254
5255#ifdef _MSC_VER
5256#pragma warning(pop)
5257#endif
5258
5259string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
5260{
5261 auto type = get<SPIRType>(c.constant_type);
5262 type.columns = 1;
5263
5264 auto scalar_type = type;
5265 scalar_type.vecsize = 1;
5266
5267 string res;
5268 bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
5269 bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
5270
5271 if (!type_is_floating_point(type))
5272 {
5273 // Cannot swizzle literal integers as a special case.
5274 swizzle_splat = false;
5275 }
5276
5277 if (splat || swizzle_splat)
5278 {
5279 // Cannot use constant splatting if we have specialization constants somewhere in the vector.
5280 for (uint32_t i = 0; i < c.vector_size(); i++)
5281 {
5282 if (c.specialization_constant_id(vector, i) != 0)
5283 {
5284 splat = false;
5285 swizzle_splat = false;
5286 break;
5287 }
5288 }
5289 }
5290
5291 if (splat || swizzle_splat)
5292 {
5293 if (type.width == 64)
5294 {
5295 uint64_t ident = c.scalar_u64(vector, 0);
5296 for (uint32_t i = 1; i < c.vector_size(); i++)
5297 {
5298 if (ident != c.scalar_u64(vector, i))
5299 {
5300 splat = false;
5301 swizzle_splat = false;
5302 break;
5303 }
5304 }
5305 }
5306 else
5307 {
5308 uint32_t ident = c.scalar(vector, 0);
5309 for (uint32_t i = 1; i < c.vector_size(); i++)
5310 {
5311 if (ident != c.scalar(vector, i))
5312 {
5313 splat = false;
5314 swizzle_splat = false;
5315 }
5316 }
5317 }
5318 }
5319
5320 if (c.vector_size() > 1 && !swizzle_splat)
5321 res += type_to_glsl(type) + "(";
5322
5323 switch (type.basetype)
5324 {
5325 case SPIRType::Half:
5326 if (splat || swizzle_splat)
5327 {
5328 res += convert_half_to_string(c, vector, 0);
5329 if (swizzle_splat)
5330 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5331 }
5332 else
5333 {
5334 for (uint32_t i = 0; i < c.vector_size(); i++)
5335 {
5336 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5337 res += to_expression(c.specialization_constant_id(vector, i));
5338 else
5339 res += convert_half_to_string(c, vector, i);
5340
5341 if (i + 1 < c.vector_size())
5342 res += ", ";
5343 }
5344 }
5345 break;
5346
5347 case SPIRType::Float:
5348 if (splat || swizzle_splat)
5349 {
5350 res += convert_float_to_string(c, vector, 0);
5351 if (swizzle_splat)
5352 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5353 }
5354 else
5355 {
5356 for (uint32_t i = 0; i < c.vector_size(); i++)
5357 {
5358 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5359 res += to_expression(c.specialization_constant_id(vector, i));
5360 else
5361 res += convert_float_to_string(c, vector, i);
5362
5363 if (i + 1 < c.vector_size())
5364 res += ", ";
5365 }
5366 }
5367 break;
5368
5369 case SPIRType::Double:
5370 if (splat || swizzle_splat)
5371 {
5372 res += convert_double_to_string(c, vector, 0);
5373 if (swizzle_splat)
5374 res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
5375 }
5376 else
5377 {
5378 for (uint32_t i = 0; i < c.vector_size(); i++)
5379 {
5380 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5381 res += to_expression(c.specialization_constant_id(vector, i));
5382 else
5383 res += convert_double_to_string(c, vector, i);
5384
5385 if (i + 1 < c.vector_size())
5386 res += ", ";
5387 }
5388 }
5389 break;
5390
5391 case SPIRType::Int64:
5392 {
5393 auto tmp = type;
5394 tmp.vecsize = 1;
5395 tmp.columns = 1;
5396 auto int64_type = type_to_glsl(tmp);
5397
5398 if (splat)
5399 {
5400 res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix);
5401 }
5402 else
5403 {
5404 for (uint32_t i = 0; i < c.vector_size(); i++)
5405 {
5406 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5407 res += to_expression(c.specialization_constant_id(vector, i));
5408 else
5409 res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix);
5410
5411 if (i + 1 < c.vector_size())
5412 res += ", ";
5413 }
5414 }
5415 break;
5416 }
5417
5418 case SPIRType::UInt64:
5419 if (splat)
5420 {
5421 res += convert_to_string(c.scalar_u64(vector, 0));
5422 if (backend.long_long_literal_suffix)
5423 res += "ull";
5424 else
5425 res += "ul";
5426 }
5427 else
5428 {
5429 for (uint32_t i = 0; i < c.vector_size(); i++)
5430 {
5431 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5432 res += to_expression(c.specialization_constant_id(vector, i));
5433 else
5434 {
5435 res += convert_to_string(c.scalar_u64(vector, i));
5436 if (backend.long_long_literal_suffix)
5437 res += "ull";
5438 else
5439 res += "ul";
5440 }
5441
5442 if (i + 1 < c.vector_size())
5443 res += ", ";
5444 }
5445 }
5446 break;
5447
5448 case SPIRType::UInt:
5449 if (splat)
5450 {
5451 res += convert_to_string(c.scalar(vector, 0));
5452 if (is_legacy())
5453 {
5454 // Fake unsigned constant literals with signed ones if possible.
5455 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5456 if (c.scalar_i32(vector, 0) < 0)
5457 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5458 }
5459 else if (backend.uint32_t_literal_suffix)
5460 res += "u";
5461 }
5462 else
5463 {
5464 for (uint32_t i = 0; i < c.vector_size(); i++)
5465 {
5466 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5467 res += to_expression(c.specialization_constant_id(vector, i));
5468 else
5469 {
5470 res += convert_to_string(c.scalar(vector, i));
5471 if (is_legacy())
5472 {
5473 // Fake unsigned constant literals with signed ones if possible.
5474 // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5475 if (c.scalar_i32(vector, i) < 0)
5476 SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5477 "the literal negative.");
5478 }
5479 else if (backend.uint32_t_literal_suffix)
5480 res += "u";
5481 }
5482
5483 if (i + 1 < c.vector_size())
5484 res += ", ";
5485 }
5486 }
5487 break;
5488
5489 case SPIRType::Int:
5490 if (splat)
5491 res += convert_to_string(c.scalar_i32(vector, 0));
5492 else
5493 {
5494 for (uint32_t i = 0; i < c.vector_size(); i++)
5495 {
5496 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5497 res += to_expression(c.specialization_constant_id(vector, i));
5498 else
5499 res += convert_to_string(c.scalar_i32(vector, i));
5500 if (i + 1 < c.vector_size())
5501 res += ", ";
5502 }
5503 }
5504 break;
5505
5506 case SPIRType::UShort:
5507 if (splat)
5508 {
5509 res += convert_to_string(c.scalar(vector, 0));
5510 }
5511 else
5512 {
5513 for (uint32_t i = 0; i < c.vector_size(); i++)
5514 {
5515 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5516 res += to_expression(c.specialization_constant_id(vector, i));
5517 else
5518 {
5519 if (*backend.uint16_t_literal_suffix)
5520 {
5521 res += convert_to_string(c.scalar_u16(vector, i));
5522 res += backend.uint16_t_literal_suffix;
5523 }
5524 else
5525 {
5526 // If backend doesn't have a literal suffix, we need to value cast.
5527 res += type_to_glsl(scalar_type);
5528 res += "(";
5529 res += convert_to_string(c.scalar_u16(vector, i));
5530 res += ")";
5531 }
5532 }
5533
5534 if (i + 1 < c.vector_size())
5535 res += ", ";
5536 }
5537 }
5538 break;
5539
5540 case SPIRType::Short:
5541 if (splat)
5542 {
5543 res += convert_to_string(c.scalar_i16(vector, 0));
5544 }
5545 else
5546 {
5547 for (uint32_t i = 0; i < c.vector_size(); i++)
5548 {
5549 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5550 res += to_expression(c.specialization_constant_id(vector, i));
5551 else
5552 {
5553 if (*backend.int16_t_literal_suffix)
5554 {
5555 res += convert_to_string(c.scalar_i16(vector, i));
5556 res += backend.int16_t_literal_suffix;
5557 }
5558 else
5559 {
5560 // If backend doesn't have a literal suffix, we need to value cast.
5561 res += type_to_glsl(scalar_type);
5562 res += "(";
5563 res += convert_to_string(c.scalar_i16(vector, i));
5564 res += ")";
5565 }
5566 }
5567
5568 if (i + 1 < c.vector_size())
5569 res += ", ";
5570 }
5571 }
5572 break;
5573
5574 case SPIRType::UByte:
5575 if (splat)
5576 {
5577 res += convert_to_string(c.scalar_u8(vector, 0));
5578 }
5579 else
5580 {
5581 for (uint32_t i = 0; i < c.vector_size(); i++)
5582 {
5583 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5584 res += to_expression(c.specialization_constant_id(vector, i));
5585 else
5586 {
5587 res += type_to_glsl(scalar_type);
5588 res += "(";
5589 res += convert_to_string(c.scalar_u8(vector, i));
5590 res += ")";
5591 }
5592
5593 if (i + 1 < c.vector_size())
5594 res += ", ";
5595 }
5596 }
5597 break;
5598
5599 case SPIRType::SByte:
5600 if (splat)
5601 {
5602 res += convert_to_string(c.scalar_i8(vector, 0));
5603 }
5604 else
5605 {
5606 for (uint32_t i = 0; i < c.vector_size(); i++)
5607 {
5608 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5609 res += to_expression(c.specialization_constant_id(vector, i));
5610 else
5611 {
5612 res += type_to_glsl(scalar_type);
5613 res += "(";
5614 res += convert_to_string(c.scalar_i8(vector, i));
5615 res += ")";
5616 }
5617
5618 if (i + 1 < c.vector_size())
5619 res += ", ";
5620 }
5621 }
5622 break;
5623
5624 case SPIRType::Boolean:
5625 if (splat)
5626 res += c.scalar(vector, 0) ? "true" : "false";
5627 else
5628 {
5629 for (uint32_t i = 0; i < c.vector_size(); i++)
5630 {
5631 if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
5632 res += to_expression(c.specialization_constant_id(vector, i));
5633 else
5634 res += c.scalar(vector, i) ? "true" : "false";
5635
5636 if (i + 1 < c.vector_size())
5637 res += ", ";
5638 }
5639 }
5640 break;
5641
5642 default:
5643 SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5644 }
5645
5646 if (c.vector_size() > 1 && !swizzle_splat)
5647 res += ")";
5648
5649 return res;
5650}
5651
5652SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5653{
5654 forced_temporaries.insert(id);
5655 emit_uninitialized_temporary(type, id);
5656 return set<SPIRExpression>(id, to_name(id), type, true);
5657}
5658
5659void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5660{
5661 // If we're declaring temporaries inside continue blocks,
5662 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5663 if (current_continue_block && !hoisted_temporaries.count(result_id))
5664 {
5665 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5666 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5667 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5668 return tmp.first == result_type && tmp.second == result_id;
5669 }) == end(header.declare_temporary))
5670 {
5671 header.declare_temporary.emplace_back(result_type, result_id);
5672 hoisted_temporaries.insert(result_id);
5673 force_recompile();
5674 }
5675 }
5676 else if (hoisted_temporaries.count(result_id) == 0)
5677 {
5678 auto &type = get<SPIRType>(result_type);
5679 auto &flags = ir.meta[result_id].decoration.decoration_flags;
5680
5681 // The result_id has not been made into an expression yet, so use flags interface.
5682 add_local_variable_name(result_id);
5683
5684 string initializer;
5685 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5686 initializer = join(" = ", to_zero_initialized_expression(result_type));
5687
5688 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
5689 }
5690}
5691
5692string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5693{
5694 auto &type = get<SPIRType>(result_type);
5695 auto &flags = ir.meta[result_id].decoration.decoration_flags;
5696
5697 // If we're declaring temporaries inside continue blocks,
5698 // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5699 if (current_continue_block && !hoisted_temporaries.count(result_id))
5700 {
5701 auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5702 if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5703 [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5704 return tmp.first == result_type && tmp.second == result_id;
5705 }) == end(header.declare_temporary))
5706 {
5707 header.declare_temporary.emplace_back(result_type, result_id);
5708 hoisted_temporaries.insert(result_id);
5709 force_recompile();
5710 }
5711
5712 return join(to_name(result_id), " = ");
5713 }
5714 else if (hoisted_temporaries.count(result_id))
5715 {
5716 // The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5717 return join(to_name(result_id), " = ");
5718 }
5719 else
5720 {
5721 // The result_id has not been made into an expression yet, so use flags interface.
5722 add_local_variable_name(result_id);
5723 return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
5724 }
5725}
5726
5727bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5728{
5729 return forwarded_temporaries.count(id) != 0;
5730}
5731
5732bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5733{
5734 return suppressed_usage_tracking.count(id) != 0;
5735}
5736
5737bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5738{
5739 auto *expr = maybe_get<SPIRExpression>(id);
5740 if (!expr)
5741 return false;
5742
5743 // If we're emitting code at a deeper loop level than when we emitted the expression,
5744 // we're probably reading the same expression over and over.
5745 return current_loop_level > expr->emitted_loop_level;
5746}
5747
5748SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5749 bool suppress_usage_tracking)
5750{
5751 if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
5752 {
5753 // Just forward it without temporary.
5754 // If the forward is trivial, we do not force flushing to temporary for this expression.
5755 forwarded_temporaries.insert(result_id);
5756 if (suppress_usage_tracking)
5757 suppressed_usage_tracking.insert(result_id);
5758
5759 return set<SPIRExpression>(result_id, rhs, result_type, true);
5760 }
5761 else
5762 {
5763 // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5764 statement(declare_temporary(result_type, result_id), rhs, ";");
5765 return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
5766 }
5767}
5768
5769void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5770{
5771 bool forward = should_forward(op0);
5772 emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
5773 inherit_expression_dependencies(result_id, op0);
5774}
5775
5776void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5777{
5778 // Various FP arithmetic opcodes such as add, sub, mul will hit this.
5779 bool force_temporary_precise = backend.support_precise_qualifier &&
5780 has_decoration(result_id, DecorationNoContraction) &&
5781 type_is_floating_point(get<SPIRType>(result_type));
5782 bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
5783
5784 emit_op(result_type, result_id,
5785 join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
5786
5787 inherit_expression_dependencies(result_id, op0);
5788 inherit_expression_dependencies(result_id, op1);
5789}
5790
5791void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5792{
5793 auto &type = get<SPIRType>(result_type);
5794 auto expr = type_to_glsl_constructor(type);
5795 expr += '(';
5796 for (uint32_t i = 0; i < type.vecsize; i++)
5797 {
5798 // Make sure to call to_expression multiple times to ensure
5799 // that these expressions are properly flushed to temporaries if needed.
5800 expr += op;
5801 expr += to_extract_component_expression(operand, i);
5802
5803 if (i + 1 < type.vecsize)
5804 expr += ", ";
5805 }
5806 expr += ')';
5807 emit_op(result_type, result_id, expr, should_forward(operand));
5808
5809 inherit_expression_dependencies(result_id, operand);
5810}
5811
5812void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5813 const char *op, bool negate, SPIRType::BaseType expected_type)
5814{
5815 auto &type0 = expression_type(op0);
5816 auto &type1 = expression_type(op1);
5817
5818 SPIRType target_type0 = type0;
5819 SPIRType target_type1 = type1;
5820 target_type0.basetype = expected_type;
5821 target_type1.basetype = expected_type;
5822 target_type0.vecsize = 1;
5823 target_type1.vecsize = 1;
5824
5825 auto &type = get<SPIRType>(result_type);
5826 auto expr = type_to_glsl_constructor(type);
5827 expr += '(';
5828 for (uint32_t i = 0; i < type.vecsize; i++)
5829 {
5830 // Make sure to call to_expression multiple times to ensure
5831 // that these expressions are properly flushed to temporaries if needed.
5832 if (negate)
5833 expr += "!(";
5834
5835 if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5836 expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
5837 else
5838 expr += to_extract_component_expression(op0, i);
5839
5840 expr += ' ';
5841 expr += op;
5842 expr += ' ';
5843
5844 if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5845 expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
5846 else
5847 expr += to_extract_component_expression(op1, i);
5848
5849 if (negate)
5850 expr += ")";
5851
5852 if (i + 1 < type.vecsize)
5853 expr += ", ";
5854 }
5855 expr += ')';
5856 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5857
5858 inherit_expression_dependencies(result_id, op0);
5859 inherit_expression_dependencies(result_id, op1);
5860}
5861
5862SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5863 uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5864{
5865 auto &type0 = expression_type(op0);
5866 auto &type1 = expression_type(op1);
5867
5868 // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5869 // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5870 // since equality test is exactly the same.
5871 bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
5872
5873 // Create a fake type so we can bitcast to it.
5874 // We only deal with regular arithmetic types here like int, uints and so on.
5875 SPIRType expected_type;
5876 expected_type.basetype = input_type;
5877 expected_type.vecsize = type0.vecsize;
5878 expected_type.columns = type0.columns;
5879 expected_type.width = type0.width;
5880
5881 if (cast)
5882 {
5883 cast_op0 = bitcast_glsl(expected_type, op0);
5884 cast_op1 = bitcast_glsl(expected_type, op1);
5885 }
5886 else
5887 {
5888 // If we don't cast, our actual input type is that of the first (or second) argument.
5889 cast_op0 = to_enclosed_unpacked_expression(op0);
5890 cast_op1 = to_enclosed_unpacked_expression(op1);
5891 input_type = type0.basetype;
5892 }
5893
5894 return expected_type;
5895}
5896
5897bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
5898{
5899 // Some bitcasts may require complex casting sequences, and are implemented here.
5900 // Otherwise a simply unary function will do with bitcast_glsl_op.
5901
5902 auto &output_type = get<SPIRType>(result_type);
5903 auto &input_type = expression_type(op0);
5904 string expr;
5905
5906 if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1)
5907 expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
5908 else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
5909 input_type.vecsize == 2)
5910 expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
5911 else
5912 return false;
5913
5914 emit_op(result_type, id, expr, should_forward(op0));
5915 return true;
5916}
5917
5918void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5919 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
5920{
5921 string cast_op0, cast_op1;
5922 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5923 auto &out_type = get<SPIRType>(result_type);
5924
5925 // We might have casted away from the result type, so bitcast again.
5926 // For example, arithmetic right shift with uint inputs.
5927 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5928 string expr;
5929 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5930 {
5931 expected_type.basetype = input_type;
5932 expr = bitcast_glsl_op(out_type, expected_type);
5933 expr += '(';
5934 expr += join(cast_op0, " ", op, " ", cast_op1);
5935 expr += ')';
5936 }
5937 else
5938 expr += join(cast_op0, " ", op, " ", cast_op1);
5939
5940 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5941 inherit_expression_dependencies(result_id, op0);
5942 inherit_expression_dependencies(result_id, op1);
5943}
5944
5945void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5946{
5947 bool forward = should_forward(op0);
5948 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
5949 inherit_expression_dependencies(result_id, op0);
5950}
5951
5952void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5953 const char *op)
5954{
5955 bool forward = should_forward(op0) && should_forward(op1);
5956 emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
5957 forward);
5958 inherit_expression_dependencies(result_id, op0);
5959 inherit_expression_dependencies(result_id, op1);
5960}
5961
5962void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5963 const char *op)
5964{
5965 forced_temporaries.insert(result_id);
5966 emit_op(result_type, result_id,
5967 join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5968 to_unpacked_expression(op1), ")"), false);
5969 flush_all_atomic_capable_variables();
5970}
5971
5972void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
5973 uint32_t op0, uint32_t op1, uint32_t op2,
5974 const char *op)
5975{
5976 forced_temporaries.insert(result_id);
5977 emit_op(result_type, result_id,
5978 join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5979 to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
5980 flush_all_atomic_capable_variables();
5981}
5982
5983void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
5984 SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
5985{
5986 auto &out_type = get<SPIRType>(result_type);
5987 auto &expr_type = expression_type(op0);
5988 auto expected_type = out_type;
5989
5990 // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
5991 expected_type.basetype = input_type;
5992 expected_type.width = expr_type.width;
5993
5994 string cast_op;
5995 if (expr_type.basetype != input_type)
5996 {
5997 if (expr_type.basetype == SPIRType::Boolean)
5998 cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
5999 else
6000 cast_op = bitcast_glsl(expected_type, op0);
6001 }
6002 else
6003 cast_op = to_unpacked_expression(op0);
6004
6005 string expr;
6006 if (out_type.basetype != expected_result_type)
6007 {
6008 expected_type.basetype = expected_result_type;
6009 expected_type.width = out_type.width;
6010 if (out_type.basetype == SPIRType::Boolean)
6011 expr = type_to_glsl(out_type);
6012 else
6013 expr = bitcast_glsl_op(out_type, expected_type);
6014 expr += '(';
6015 expr += join(op, "(", cast_op, ")");
6016 expr += ')';
6017 }
6018 else
6019 {
6020 expr += join(op, "(", cast_op, ")");
6021 }
6022
6023 emit_op(result_type, result_id, expr, should_forward(op0));
6024 inherit_expression_dependencies(result_id, op0);
6025}
6026
6027// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
6028// and different vector sizes all at once. Need a special purpose method here.
6029void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6030 uint32_t op2, const char *op,
6031 SPIRType::BaseType expected_result_type,
6032 SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
6033 SPIRType::BaseType input_type2)
6034{
6035 auto &out_type = get<SPIRType>(result_type);
6036 auto expected_type = out_type;
6037 expected_type.basetype = input_type0;
6038
6039 string cast_op0 =
6040 expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6041
6042 auto op1_expr = to_unpacked_expression(op1);
6043 auto op2_expr = to_unpacked_expression(op2);
6044
6045 // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
6046 expected_type.basetype = input_type1;
6047 expected_type.vecsize = 1;
6048 string cast_op1 = expression_type(op1).basetype != input_type1 ?
6049 join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
6050 op1_expr;
6051
6052 expected_type.basetype = input_type2;
6053 expected_type.vecsize = 1;
6054 string cast_op2 = expression_type(op2).basetype != input_type2 ?
6055 join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
6056 op2_expr;
6057
6058 string expr;
6059 if (out_type.basetype != expected_result_type)
6060 {
6061 expected_type.vecsize = out_type.vecsize;
6062 expected_type.basetype = expected_result_type;
6063 expr = bitcast_glsl_op(out_type, expected_type);
6064 expr += '(';
6065 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6066 expr += ')';
6067 }
6068 else
6069 {
6070 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6071 }
6072
6073 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
6074 inherit_expression_dependencies(result_id, op0);
6075 inherit_expression_dependencies(result_id, op1);
6076 inherit_expression_dependencies(result_id, op2);
6077}
6078
6079void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6080 uint32_t op2, const char *op, SPIRType::BaseType input_type)
6081{
6082 auto &out_type = get<SPIRType>(result_type);
6083 auto expected_type = out_type;
6084 expected_type.basetype = input_type;
6085 string cast_op0 =
6086 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6087 string cast_op1 =
6088 expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
6089 string cast_op2 =
6090 expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
6091
6092 string expr;
6093 if (out_type.basetype != input_type)
6094 {
6095 expr = bitcast_glsl_op(out_type, expected_type);
6096 expr += '(';
6097 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6098 expr += ')';
6099 }
6100 else
6101 {
6102 expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6103 }
6104
6105 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
6106 inherit_expression_dependencies(result_id, op0);
6107 inherit_expression_dependencies(result_id, op1);
6108 inherit_expression_dependencies(result_id, op2);
6109}
6110
6111void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
6112 uint32_t op1, const char *op, SPIRType::BaseType input_type)
6113{
6114 // Special purpose method for implementing clustered subgroup opcodes.
6115 // Main difference is that op1 does not participate in any casting, it needs to be a literal.
6116 auto &out_type = get<SPIRType>(result_type);
6117 auto expected_type = out_type;
6118 expected_type.basetype = input_type;
6119 string cast_op0 =
6120 expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6121
6122 string expr;
6123 if (out_type.basetype != input_type)
6124 {
6125 expr = bitcast_glsl_op(out_type, expected_type);
6126 expr += '(';
6127 expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6128 expr += ')';
6129 }
6130 else
6131 {
6132 expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6133 }
6134
6135 emit_op(result_type, result_id, expr, should_forward(op0));
6136 inherit_expression_dependencies(result_id, op0);
6137}
6138
6139void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6140 const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
6141{
6142 string cast_op0, cast_op1;
6143 auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6144 auto &out_type = get<SPIRType>(result_type);
6145
6146 // Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6147 string expr;
6148 if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6149 {
6150 expected_type.basetype = input_type;
6151 expr = bitcast_glsl_op(out_type, expected_type);
6152 expr += '(';
6153 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6154 expr += ')';
6155 }
6156 else
6157 {
6158 expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6159 }
6160
6161 emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
6162 inherit_expression_dependencies(result_id, op0);
6163 inherit_expression_dependencies(result_id, op1);
6164}
6165
6166void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6167 uint32_t op2, const char *op)
6168{
6169 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
6170 emit_op(result_type, result_id,
6171 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6172 to_unpacked_expression(op2), ")"),
6173 forward);
6174
6175 inherit_expression_dependencies(result_id, op0);
6176 inherit_expression_dependencies(result_id, op1);
6177 inherit_expression_dependencies(result_id, op2);
6178}
6179
6180void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6181 uint32_t op2, uint32_t op3, const char *op)
6182{
6183 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6184 emit_op(result_type, result_id,
6185 join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6186 to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
6187 forward);
6188
6189 inherit_expression_dependencies(result_id, op0);
6190 inherit_expression_dependencies(result_id, op1);
6191 inherit_expression_dependencies(result_id, op2);
6192 inherit_expression_dependencies(result_id, op3);
6193}
6194
6195void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6196 uint32_t op2, uint32_t op3, const char *op,
6197 SPIRType::BaseType offset_count_type)
6198{
6199 // Only need to cast offset/count arguments. Types of base/insert must be same as result type,
6200 // and bitfieldInsert is sign invariant.
6201 bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6202
6203 auto op0_expr = to_unpacked_expression(op0);
6204 auto op1_expr = to_unpacked_expression(op1);
6205 auto op2_expr = to_unpacked_expression(op2);
6206 auto op3_expr = to_unpacked_expression(op3);
6207
6208 SPIRType target_type;
6209 target_type.vecsize = 1;
6210 target_type.basetype = offset_count_type;
6211
6212 if (expression_type(op2).basetype != offset_count_type)
6213 {
6214 // Value-cast here. Input might be 16-bit. GLSL requires int.
6215 op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
6216 }
6217
6218 if (expression_type(op3).basetype != offset_count_type)
6219 {
6220 // Value-cast here. Input might be 16-bit. GLSL requires int.
6221 op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
6222 }
6223
6224 emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
6225 forward);
6226
6227 inherit_expression_dependencies(result_id, op0);
6228 inherit_expression_dependencies(result_id, op1);
6229 inherit_expression_dependencies(result_id, op2);
6230 inherit_expression_dependencies(result_id, op3);
6231}
6232
6233string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
6234{
6235 const char *type;
6236 switch (imgtype.image.dim)
6237 {
6238 case spv::Dim1D:
6239 type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
6240 break;
6241 case spv::Dim2D:
6242 type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
6243 break;
6244 case spv::Dim3D:
6245 type = "3D";
6246 break;
6247 case spv::DimCube:
6248 type = "Cube";
6249 break;
6250 case spv::DimRect:
6251 type = "2DRect";
6252 break;
6253 case spv::DimBuffer:
6254 type = "Buffer";
6255 break;
6256 case spv::DimSubpassData:
6257 type = "2D";
6258 break;
6259 default:
6260 type = "";
6261 break;
6262 }
6263
6264 // In legacy GLSL, an extension is required for textureLod in the fragment
6265 // shader or textureGrad anywhere.
6266 bool legacy_lod_ext = false;
6267 auto &execution = get_entry_point();
6268 if (op == "textureGrad" || op == "textureProjGrad" ||
6269 ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex))
6270 {
6271 if (is_legacy_es())
6272 {
6273 legacy_lod_ext = true;
6274 require_extension_internal("GL_EXT_shader_texture_lod");
6275 }
6276 else if (is_legacy_desktop())
6277 require_extension_internal("GL_ARB_shader_texture_lod");
6278 }
6279
6280 if (op == "textureLodOffset" || op == "textureProjLodOffset")
6281 {
6282 if (is_legacy_es())
6283 SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
6284
6285 require_extension_internal("GL_EXT_gpu_shader4");
6286 }
6287
6288 // GLES has very limited support for shadow samplers.
6289 // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
6290 // everything else can just throw
6291 bool is_comparison = is_depth_image(imgtype, tex);
6292 if (is_comparison && is_legacy_es())
6293 {
6294 if (op == "texture" || op == "textureProj")
6295 require_extension_internal("GL_EXT_shadow_samplers");
6296 else
6297 SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
6298 }
6299
6300 if (op == "textureSize")
6301 {
6302 if (is_legacy_es())
6303 SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
6304 if (is_comparison)
6305 SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
6306 require_extension_internal("GL_EXT_gpu_shader4");
6307 }
6308
6309 if (op == "texelFetch" && is_legacy_es())
6310 SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
6311
6312 bool is_es_and_depth = is_legacy_es() && is_comparison;
6313 std::string type_prefix = is_comparison ? "shadow" : "texture";
6314
6315 if (op == "texture")
6316 return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
6317 else if (op == "textureLod")
6318 return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
6319 else if (op == "textureProj")
6320 return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
6321 else if (op == "textureGrad")
6322 return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
6323 else if (op == "textureProjLod")
6324 return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
6325 else if (op == "textureLodOffset")
6326 return join(type_prefix, type, "LodOffset");
6327 else if (op == "textureProjGrad")
6328 return join(type_prefix, type,
6329 is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
6330 else if (op == "textureProjLodOffset")
6331 return join(type_prefix, type, "ProjLodOffset");
6332 else if (op == "textureSize")
6333 return join("textureSize", type);
6334 else if (op == "texelFetch")
6335 return join("texelFetch", type);
6336 else
6337 {
6338 SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
6339 }
6340}
6341
6342bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
6343{
6344 auto *cleft = maybe_get<SPIRConstant>(left);
6345 auto *cright = maybe_get<SPIRConstant>(right);
6346 auto &lerptype = expression_type(lerp);
6347
6348 // If our targets aren't constants, we cannot use construction.
6349 if (!cleft || !cright)
6350 return false;
6351
6352 // If our targets are spec constants, we cannot use construction.
6353 if (cleft->specialization || cright->specialization)
6354 return false;
6355
6356 auto &value_type = get<SPIRType>(cleft->constant_type);
6357
6358 if (lerptype.basetype != SPIRType::Boolean)
6359 return false;
6360 if (value_type.basetype == SPIRType::Struct || is_array(value_type))
6361 return false;
6362 if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
6363 return false;
6364
6365 // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
6366 // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
6367 // Just avoid this case.
6368 if (value_type.columns > 1)
6369 return false;
6370
6371 // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6372 bool ret = true;
6373 for (uint32_t row = 0; ret && row < value_type.vecsize; row++)
6374 {
6375 switch (type.basetype)
6376 {
6377 case SPIRType::Short:
6378 case SPIRType::UShort:
6379 ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1;
6380 break;
6381
6382 case SPIRType::Int:
6383 case SPIRType::UInt:
6384 ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1;
6385 break;
6386
6387 case SPIRType::Half:
6388 ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f;
6389 break;
6390
6391 case SPIRType::Float:
6392 ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f;
6393 break;
6394
6395 case SPIRType::Double:
6396 ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0;
6397 break;
6398
6399 case SPIRType::Int64:
6400 case SPIRType::UInt64:
6401 ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1;
6402 break;
6403
6404 default:
6405 ret = false;
6406 break;
6407 }
6408 }
6409
6410 if (ret)
6411 op = type_to_glsl_constructor(type);
6412 return ret;
6413}
6414
6415string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6416 uint32_t false_value)
6417{
6418 string expr;
6419 auto &lerptype = expression_type(select);
6420
6421 if (lerptype.vecsize == 1)
6422 expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
6423 to_enclosed_pointer_expression(false_value));
6424 else
6425 {
6426 auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
6427
6428 expr = type_to_glsl_constructor(restype);
6429 expr += "(";
6430 for (uint32_t i = 0; i < restype.vecsize; i++)
6431 {
6432 expr += swiz(select, i);
6433 expr += " ? ";
6434 expr += swiz(true_value, i);
6435 expr += " : ";
6436 expr += swiz(false_value, i);
6437 if (i + 1 < restype.vecsize)
6438 expr += ", ";
6439 }
6440 expr += ")";
6441 }
6442
6443 return expr;
6444}
6445
6446void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6447{
6448 auto &lerptype = expression_type(lerp);
6449 auto &restype = get<SPIRType>(result_type);
6450
6451 // If this results in a variable pointer, assume it may be written through.
6452 if (restype.pointer)
6453 {
6454 register_write(left);
6455 register_write(right);
6456 }
6457
6458 string mix_op;
6459 bool has_boolean_mix = *backend.boolean_mix_function &&
6460 ((options.es && options.version >= 310) || (!options.es && options.version >= 450));
6461 bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
6462
6463 // Cannot use boolean mix when the lerp argument is just one boolean,
6464 // fall back to regular trinary statements.
6465 if (lerptype.vecsize == 1)
6466 has_boolean_mix = false;
6467
6468 // If we can reduce the mix to a simple cast, do so.
6469 // This helps for cases like int(bool), uint(bool) which is implemented with
6470 // OpSelect bool 1 0.
6471 if (trivial_mix)
6472 {
6473 emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
6474 }
6475 else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6476 {
6477 // Boolean mix not supported on desktop without extension.
6478 // Was added in OpenGL 4.5 with ES 3.1 compat.
6479 //
6480 // Could use GL_EXT_shader_integer_mix on desktop at least,
6481 // but Apple doesn't support it. :(
6482 // Just implement it as ternary expressions.
6483 auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
6484 emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
6485 inherit_expression_dependencies(id, left);
6486 inherit_expression_dependencies(id, right);
6487 inherit_expression_dependencies(id, lerp);
6488 }
6489 else if (lerptype.basetype == SPIRType::Boolean)
6490 emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
6491 else
6492 emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
6493}
6494
6495string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6496{
6497 // Keep track of the array indices we have used to load the image.
6498 // We'll need to use the same array index into the combined image sampler array.
6499 auto image_expr = to_non_uniform_aware_expression(image_id);
6500 string array_expr;
6501 auto array_index = image_expr.find_first_of('[');
6502 if (array_index != string::npos)
6503 array_expr = image_expr.substr(array_index, string::npos);
6504
6505 auto &args = current_function->arguments;
6506
6507 // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6508 // all possible combinations into new sampler2D uniforms.
6509 auto *image = maybe_get_backing_variable(image_id);
6510 auto *samp = maybe_get_backing_variable(samp_id);
6511 if (image)
6512 image_id = image->self;
6513 if (samp)
6514 samp_id = samp->self;
6515
6516 auto image_itr = find_if(begin(args), end(args),
6517 [image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
6518
6519 auto sampler_itr = find_if(begin(args), end(args),
6520 [samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
6521
6522 if (image_itr != end(args) || sampler_itr != end(args))
6523 {
6524 // If any parameter originates from a parameter, we will find it in our argument list.
6525 bool global_image = image_itr == end(args);
6526 bool global_sampler = sampler_itr == end(args);
6527 VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args)));
6528 VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args)));
6529
6530 auto &combined = current_function->combined_parameters;
6531 auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6532 return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6533 p.sampler_id == sid;
6534 });
6535
6536 if (itr != end(combined))
6537 return to_expression(itr->id) + array_expr;
6538 else
6539 {
6540 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6541 "build_combined_image_samplers() used "
6542 "before compile() was called?");
6543 }
6544 }
6545 else
6546 {
6547 // For global sampler2D, look directly at the global remapping table.
6548 auto &mapping = combined_image_samplers;
6549 auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
6550 return combined.image_id == image_id && combined.sampler_id == samp_id;
6551 });
6552
6553 if (itr != end(combined_image_samplers))
6554 return to_expression(itr->combined_id) + array_expr;
6555 else
6556 {
6557 SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6558 "before compile() was called?");
6559 }
6560 }
6561}
6562
6563bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6564{
6565 switch (op)
6566 {
6567 case OpGroupNonUniformElect:
6568 case OpGroupNonUniformBallot:
6569 case OpGroupNonUniformBallotFindLSB:
6570 case OpGroupNonUniformBallotFindMSB:
6571 case OpGroupNonUniformBroadcast:
6572 case OpGroupNonUniformBroadcastFirst:
6573 case OpGroupNonUniformAll:
6574 case OpGroupNonUniformAny:
6575 case OpGroupNonUniformAllEqual:
6576 case OpControlBarrier:
6577 case OpMemoryBarrier:
6578 case OpGroupNonUniformBallotBitCount:
6579 case OpGroupNonUniformBallotBitExtract:
6580 case OpGroupNonUniformInverseBallot:
6581 return true;
6582 default:
6583 return false;
6584 }
6585}
6586
6587void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6588{
6589 if (options.vulkan_semantics && combined_image_samplers.empty())
6590 {
6591 emit_binary_func_op(result_type, result_id, image_id, samp_id,
6592 type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
6593 }
6594 else
6595 {
6596 // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6597 emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
6598 }
6599
6600 // Make sure to suppress usage tracking and any expression invalidation.
6601 // It is illegal to create temporaries of opaque types.
6602 forwarded_temporaries.erase(result_id);
6603}
6604
6605static inline bool image_opcode_is_sample_no_dref(Op op)
6606{
6607 switch (op)
6608 {
6609 case OpImageSampleExplicitLod:
6610 case OpImageSampleImplicitLod:
6611 case OpImageSampleProjExplicitLod:
6612 case OpImageSampleProjImplicitLod:
6613 case OpImageFetch:
6614 case OpImageRead:
6615 case OpImageSparseSampleExplicitLod:
6616 case OpImageSparseSampleImplicitLod:
6617 case OpImageSparseSampleProjExplicitLod:
6618 case OpImageSparseSampleProjImplicitLod:
6619 case OpImageSparseFetch:
6620 case OpImageSparseRead:
6621 return true;
6622
6623 default:
6624 return false;
6625 }
6626}
6627
6628void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6629 uint32_t &texel_id)
6630{
6631 // Need to allocate two temporaries.
6632 if (options.es)
6633 SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6634 require_extension_internal("GL_ARB_sparse_texture2");
6635
6636 auto &temps = extra_sub_expressions[id];
6637 if (temps == 0)
6638 temps = ir.increase_bound_by(2);
6639
6640 feedback_id = temps + 0;
6641 texel_id = temps + 1;
6642
6643 auto &return_type = get<SPIRType>(result_type_id);
6644 if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2)
6645 SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6646 emit_uninitialized_temporary(return_type.member_types[0], feedback_id);
6647 emit_uninitialized_temporary(return_type.member_types[1], texel_id);
6648}
6649
6650uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6651{
6652 auto itr = extra_sub_expressions.find(id);
6653 if (itr == extra_sub_expressions.end())
6654 return 0;
6655 else
6656 return itr->second + 1;
6657}
6658
6659void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6660{
6661 auto *ops = stream(i);
6662 auto op = static_cast<Op>(i.op);
6663
6664 SmallVector<uint32_t> inherited_expressions;
6665
6666 uint32_t result_type_id = ops[0];
6667 uint32_t id = ops[1];
6668 auto &return_type = get<SPIRType>(result_type_id);
6669
6670 uint32_t sparse_code_id = 0;
6671 uint32_t sparse_texel_id = 0;
6672 if (sparse)
6673 emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
6674
6675 bool forward = false;
6676 string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
6677
6678 if (sparse)
6679 {
6680 statement(to_expression(sparse_code_id), " = ", expr, ";");
6681 expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
6682 ")");
6683 forward = true;
6684 inherited_expressions.clear();
6685 }
6686
6687 emit_op(result_type_id, id, expr, forward);
6688 for (auto &inherit : inherited_expressions)
6689 inherit_expression_dependencies(id, inherit);
6690
6691 // Do not register sparse ops as control dependent as they are always lowered to a temporary.
6692 switch (op)
6693 {
6694 case OpImageSampleDrefImplicitLod:
6695 case OpImageSampleImplicitLod:
6696 case OpImageSampleProjImplicitLod:
6697 case OpImageSampleProjDrefImplicitLod:
6698 register_control_dependent_expression(id);
6699 break;
6700
6701 default:
6702 break;
6703 }
6704}
6705
6706std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6707 SmallVector<uint32_t> &inherited_expressions)
6708{
6709 auto *ops = stream(i);
6710 auto op = static_cast<Op>(i.op);
6711 uint32_t length = i.length;
6712
6713 uint32_t result_type_id = ops[0];
6714 VariableID img = ops[2];
6715 uint32_t coord = ops[3];
6716 uint32_t dref = 0;
6717 uint32_t comp = 0;
6718 bool gather = false;
6719 bool proj = false;
6720 bool fetch = false;
6721 bool nonuniform_expression = false;
6722 const uint32_t *opt = nullptr;
6723
6724 auto &result_type = get<SPIRType>(result_type_id);
6725
6726 inherited_expressions.push_back(coord);
6727 if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
6728 nonuniform_expression = true;
6729
6730 switch (op)
6731 {
6732 case OpImageSampleDrefImplicitLod:
6733 case OpImageSampleDrefExplicitLod:
6734 case OpImageSparseSampleDrefImplicitLod:
6735 case OpImageSparseSampleDrefExplicitLod:
6736 dref = ops[4];
6737 opt = &ops[5];
6738 length -= 5;
6739 break;
6740
6741 case OpImageSampleProjDrefImplicitLod:
6742 case OpImageSampleProjDrefExplicitLod:
6743 case OpImageSparseSampleProjDrefImplicitLod:
6744 case OpImageSparseSampleProjDrefExplicitLod:
6745 dref = ops[4];
6746 opt = &ops[5];
6747 length -= 5;
6748 proj = true;
6749 break;
6750
6751 case OpImageDrefGather:
6752 case OpImageSparseDrefGather:
6753 dref = ops[4];
6754 opt = &ops[5];
6755 length -= 5;
6756 gather = true;
6757 if (options.es && options.version < 310)
6758 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6759 else if (!options.es && options.version < 400)
6760 SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6761 break;
6762
6763 case OpImageGather:
6764 case OpImageSparseGather:
6765 comp = ops[4];
6766 opt = &ops[5];
6767 length -= 5;
6768 gather = true;
6769 if (options.es && options.version < 310)
6770 SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6771 else if (!options.es && options.version < 400)
6772 {
6773 if (!expression_is_constant_null(comp))
6774 SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6775 require_extension_internal("GL_ARB_texture_gather");
6776 }
6777 break;
6778
6779 case OpImageFetch:
6780 case OpImageSparseFetch:
6781 case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6782 opt = &ops[4];
6783 length -= 4;
6784 fetch = true;
6785 break;
6786
6787 case OpImageSampleProjImplicitLod:
6788 case OpImageSampleProjExplicitLod:
6789 case OpImageSparseSampleProjImplicitLod:
6790 case OpImageSparseSampleProjExplicitLod:
6791 opt = &ops[4];
6792 length -= 4;
6793 proj = true;
6794 break;
6795
6796 default:
6797 opt = &ops[4];
6798 length -= 4;
6799 break;
6800 }
6801
6802 // Bypass pointers because we need the real image struct
6803 auto &type = expression_type(img);
6804 auto &imgtype = get<SPIRType>(type.self);
6805
6806 uint32_t coord_components = 0;
6807 switch (imgtype.image.dim)
6808 {
6809 case spv::Dim1D:
6810 coord_components = 1;
6811 break;
6812 case spv::Dim2D:
6813 coord_components = 2;
6814 break;
6815 case spv::Dim3D:
6816 coord_components = 3;
6817 break;
6818 case spv::DimCube:
6819 coord_components = 3;
6820 break;
6821 case spv::DimBuffer:
6822 coord_components = 1;
6823 break;
6824 default:
6825 coord_components = 2;
6826 break;
6827 }
6828
6829 if (dref)
6830 inherited_expressions.push_back(dref);
6831
6832 if (proj)
6833 coord_components++;
6834 if (imgtype.image.arrayed)
6835 coord_components++;
6836
6837 uint32_t bias = 0;
6838 uint32_t lod = 0;
6839 uint32_t grad_x = 0;
6840 uint32_t grad_y = 0;
6841 uint32_t coffset = 0;
6842 uint32_t offset = 0;
6843 uint32_t coffsets = 0;
6844 uint32_t sample = 0;
6845 uint32_t minlod = 0;
6846 uint32_t flags = 0;
6847
6848 if (length)
6849 {
6850 flags = *opt++;
6851 length--;
6852 }
6853
6854 auto test = [&](uint32_t &v, uint32_t flag) {
6855 if (length && (flags & flag))
6856 {
6857 v = *opt++;
6858 inherited_expressions.push_back(v);
6859 length--;
6860 }
6861 };
6862
6863 test(bias, ImageOperandsBiasMask);
6864 test(lod, ImageOperandsLodMask);
6865 test(grad_x, ImageOperandsGradMask);
6866 test(grad_y, ImageOperandsGradMask);
6867 test(coffset, ImageOperandsConstOffsetMask);
6868 test(offset, ImageOperandsOffsetMask);
6869 test(coffsets, ImageOperandsConstOffsetsMask);
6870 test(sample, ImageOperandsSampleMask);
6871 test(minlod, ImageOperandsMinLodMask);
6872
6873 TextureFunctionBaseArguments base_args = {};
6874 base_args.img = img;
6875 base_args.imgtype = &imgtype;
6876 base_args.is_fetch = fetch != 0;
6877 base_args.is_gather = gather != 0;
6878 base_args.is_proj = proj != 0;
6879
6880 string expr;
6881 TextureFunctionNameArguments name_args = {};
6882
6883 name_args.base = base_args;
6884 name_args.has_array_offsets = coffsets != 0;
6885 name_args.has_offset = coffset != 0 || offset != 0;
6886 name_args.has_grad = grad_x != 0 || grad_y != 0;
6887 name_args.has_dref = dref != 0;
6888 name_args.is_sparse_feedback = sparse;
6889 name_args.has_min_lod = minlod != 0;
6890 name_args.lod = lod;
6891 expr += to_function_name(name_args);
6892 expr += "(";
6893
6894 uint32_t sparse_texel_id = 0;
6895 if (sparse)
6896 sparse_texel_id = get_sparse_feedback_texel_id(ops[1]);
6897
6898 TextureFunctionArguments args = {};
6899 args.base = base_args;
6900 args.coord = coord;
6901 args.coord_components = coord_components;
6902 args.dref = dref;
6903 args.grad_x = grad_x;
6904 args.grad_y = grad_y;
6905 args.lod = lod;
6906 args.coffset = coffset;
6907 args.offset = offset;
6908 args.bias = bias;
6909 args.component = comp;
6910 args.sample = sample;
6911 args.sparse_texel = sparse_texel_id;
6912 args.min_lod = minlod;
6913 args.nonuniform_expression = nonuniform_expression;
6914 expr += to_function_args(args, forward);
6915 expr += ")";
6916
6917 // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
6918 if (is_legacy() && is_depth_image(imgtype, img))
6919 expr += ".r";
6920
6921 // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
6922 // Remap back to 4 components as sampling opcodes expect.
6923 if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
6924 {
6925 bool image_is_depth = false;
6926 const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
6927 VariableID image_id = combined ? combined->image : img;
6928
6929 if (combined && is_depth_image(imgtype, combined->image))
6930 image_is_depth = true;
6931 else if (is_depth_image(imgtype, img))
6932 image_is_depth = true;
6933
6934 // We must also check the backing variable for the image.
6935 // We might have loaded an OpImage, and used that handle for two different purposes.
6936 // Once with comparison, once without.
6937 auto *image_variable = maybe_get_backing_variable(image_id);
6938 if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self))
6939 image_is_depth = true;
6940
6941 if (image_is_depth)
6942 expr = remap_swizzle(result_type, 1, expr);
6943 }
6944
6945 if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32)
6946 {
6947 // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
6948 // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
6949 expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
6950 }
6951
6952 // Deals with reads from MSL. We might need to downconvert to fewer components.
6953 if (op == OpImageRead)
6954 expr = remap_swizzle(result_type, 4, expr);
6955
6956 return expr;
6957}
6958
6959bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
6960{
6961 auto *c = maybe_get<SPIRConstant>(id);
6962 if (!c)
6963 return false;
6964 return c->constant_is_null();
6965}
6966
6967bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
6968{
6969 auto &type = expression_type(ptr);
6970 if (type.array.empty())
6971 return false;
6972
6973 if (!backend.array_is_value_type)
6974 return true;
6975
6976 auto *var = maybe_get_backing_variable(ptr);
6977 if (!var)
6978 return false;
6979
6980 auto &backed_type = get<SPIRType>(var->basetype);
6981 return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
6982 has_member_decoration(backed_type.self, 0, DecorationOffset);
6983}
6984
6985// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
6986// For some subclasses, the function is a method on the specified image.
6987string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
6988{
6989 if (args.has_min_lod)
6990 {
6991 if (options.es)
6992 SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
6993 require_extension_internal("GL_ARB_sparse_texture_clamp");
6994 }
6995
6996 string fname;
6997 auto &imgtype = *args.base.imgtype;
6998 VariableID tex = args.base.img;
6999
7000 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7001 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
7002 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7003 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7004 bool workaround_lod_array_shadow_as_grad = false;
7005 if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
7006 is_depth_image(imgtype, tex) && args.lod)
7007 {
7008 if (!expression_is_constant_null(args.lod))
7009 {
7010 SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
7011 "expressed in GLSL.");
7012 }
7013 workaround_lod_array_shadow_as_grad = true;
7014 }
7015
7016 if (args.is_sparse_feedback)
7017 fname += "sparse";
7018
7019 if (args.base.is_fetch)
7020 fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
7021 else
7022 {
7023 fname += args.is_sparse_feedback ? "Texture" : "texture";
7024
7025 if (args.base.is_gather)
7026 fname += "Gather";
7027 if (args.has_array_offsets)
7028 fname += "Offsets";
7029 if (args.base.is_proj)
7030 fname += "Proj";
7031 if (args.has_grad || workaround_lod_array_shadow_as_grad)
7032 fname += "Grad";
7033 if (args.lod != 0 && !workaround_lod_array_shadow_as_grad)
7034 fname += "Lod";
7035 }
7036
7037 if (args.has_offset)
7038 fname += "Offset";
7039
7040 if (args.has_min_lod)
7041 fname += "Clamp";
7042
7043 if (args.is_sparse_feedback || args.has_min_lod)
7044 fname += "ARB";
7045
7046 return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
7047}
7048
7049std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
7050{
7051 auto *var = maybe_get_backing_variable(id);
7052
7053 // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
7054 // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
7055 if (var)
7056 {
7057 auto &type = get<SPIRType>(var->basetype);
7058 if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
7059 {
7060 if (options.vulkan_semantics)
7061 {
7062 if (dummy_sampler_id)
7063 {
7064 // Don't need to consider Shadow state since the dummy sampler is always non-shadow.
7065 auto sampled_type = type;
7066 sampled_type.basetype = SPIRType::SampledImage;
7067 return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
7068 to_expression(dummy_sampler_id), ")");
7069 }
7070 else
7071 {
7072 // Newer glslang supports this extension to deal with texture2D as argument to texture functions.
7073 require_extension_internal("GL_EXT_samplerless_texture_functions");
7074 }
7075 }
7076 else
7077 {
7078 if (!dummy_sampler_id)
7079 SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
7080 "build_dummy_sampler_for_combined_images() called?");
7081
7082 return to_combined_image_sampler(id, dummy_sampler_id);
7083 }
7084 }
7085 }
7086
7087 return to_non_uniform_aware_expression(id);
7088}
7089
7090// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
7091string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
7092{
7093 VariableID img = args.base.img;
7094 auto &imgtype = *args.base.imgtype;
7095
7096 string farg_str;
7097 if (args.base.is_fetch)
7098 farg_str = convert_separate_image_to_expression(img);
7099 else
7100 farg_str = to_non_uniform_aware_expression(img);
7101
7102 if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos)
7103 {
7104 // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
7105 farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
7106 }
7107
7108 bool swizz_func = backend.swizzle_is_function;
7109 auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
7110 if (comps == in_comps)
7111 return "";
7112
7113 switch (comps)
7114 {
7115 case 1:
7116 return ".x";
7117 case 2:
7118 return swizz_func ? ".xy()" : ".xy";
7119 case 3:
7120 return swizz_func ? ".xyz()" : ".xyz";
7121 default:
7122 return "";
7123 }
7124 };
7125
7126 bool forward = should_forward(args.coord);
7127
7128 // The IR can give us more components than we need, so chop them off as needed.
7129 auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize);
7130 // Only enclose the UV expression if needed.
7131 auto coord_expr =
7132 (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
7133
7134 // texelFetch only takes int, not uint.
7135 auto &coord_type = expression_type(args.coord);
7136 if (coord_type.basetype == SPIRType::UInt)
7137 {
7138 auto expected_type = coord_type;
7139 expected_type.vecsize = args.coord_components;
7140 expected_type.basetype = SPIRType::Int;
7141 coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
7142 }
7143
7144 // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7145 // To emulate this, we will have to use textureGrad with a constant gradient of 0.
7146 // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7147 // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7148 bool workaround_lod_array_shadow_as_grad =
7149 ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
7150 is_depth_image(imgtype, img) && args.lod != 0;
7151
7152 if (args.dref)
7153 {
7154 forward = forward && should_forward(args.dref);
7155
7156 // SPIR-V splits dref and coordinate.
7157 if (args.base.is_gather ||
7158 args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
7159 {
7160 farg_str += ", ";
7161 farg_str += to_expression(args.coord);
7162 farg_str += ", ";
7163 farg_str += to_expression(args.dref);
7164 }
7165 else if (args.base.is_proj)
7166 {
7167 // Have to reshuffle so we get vec4(coord, dref, proj), special case.
7168 // Other shading languages splits up the arguments for coord and compare value like SPIR-V.
7169 // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
7170 farg_str += ", vec4(";
7171
7172 if (imgtype.image.dim == Dim1D)
7173 {
7174 // Could reuse coord_expr, but we will mess up the temporary usage checking.
7175 farg_str += to_enclosed_expression(args.coord) + ".x";
7176 farg_str += ", ";
7177 farg_str += "0.0, ";
7178 farg_str += to_expression(args.dref);
7179 farg_str += ", ";
7180 farg_str += to_enclosed_expression(args.coord) + ".y)";
7181 }
7182 else if (imgtype.image.dim == Dim2D)
7183 {
7184 // Could reuse coord_expr, but we will mess up the temporary usage checking.
7185 farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
7186 farg_str += ", ";
7187 farg_str += to_expression(args.dref);
7188 farg_str += ", ";
7189 farg_str += to_enclosed_expression(args.coord) + ".z)";
7190 }
7191 else
7192 SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
7193 }
7194 else
7195 {
7196 // Create a composite which merges coord/dref into a single vector.
7197 auto type = expression_type(args.coord);
7198 type.vecsize = args.coord_components + 1;
7199 farg_str += ", ";
7200 farg_str += type_to_glsl_constructor(type);
7201 farg_str += "(";
7202 farg_str += coord_expr;
7203 farg_str += ", ";
7204 farg_str += to_expression(args.dref);
7205 farg_str += ")";
7206 }
7207 }
7208 else
7209 {
7210 farg_str += ", ";
7211 farg_str += coord_expr;
7212 }
7213
7214 if (args.grad_x || args.grad_y)
7215 {
7216 forward = forward && should_forward(args.grad_x);
7217 forward = forward && should_forward(args.grad_y);
7218 farg_str += ", ";
7219 farg_str += to_expression(args.grad_x);
7220 farg_str += ", ";
7221 farg_str += to_expression(args.grad_y);
7222 }
7223
7224 if (args.lod)
7225 {
7226 if (workaround_lod_array_shadow_as_grad)
7227 {
7228 // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
7229 // Implementing this as plain texture() is not safe on some implementations.
7230 if (imgtype.image.dim == Dim2D)
7231 farg_str += ", vec2(0.0), vec2(0.0)";
7232 else if (imgtype.image.dim == DimCube)
7233 farg_str += ", vec3(0.0), vec3(0.0)";
7234 }
7235 else
7236 {
7237 forward = forward && should_forward(args.lod);
7238 farg_str += ", ";
7239
7240 auto &lod_expr_type = expression_type(args.lod);
7241
7242 // Lod expression for TexelFetch in GLSL must be int, and only int.
7243 if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
7244 lod_expr_type.basetype != SPIRType::Int)
7245 {
7246 farg_str += join("int(", to_expression(args.lod), ")");
7247 }
7248 else
7249 {
7250 farg_str += to_expression(args.lod);
7251 }
7252 }
7253 }
7254 else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
7255 {
7256 // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
7257 farg_str += ", 0";
7258 }
7259
7260 if (args.coffset)
7261 {
7262 forward = forward && should_forward(args.coffset);
7263 farg_str += ", ";
7264 farg_str += to_expression(args.coffset);
7265 }
7266 else if (args.offset)
7267 {
7268 forward = forward && should_forward(args.offset);
7269 farg_str += ", ";
7270 farg_str += to_expression(args.offset);
7271 }
7272
7273 if (args.sample)
7274 {
7275 farg_str += ", ";
7276 farg_str += to_expression(args.sample);
7277 }
7278
7279 if (args.min_lod)
7280 {
7281 farg_str += ", ";
7282 farg_str += to_expression(args.min_lod);
7283 }
7284
7285 if (args.sparse_texel)
7286 {
7287 // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
7288 farg_str += ", ";
7289 farg_str += to_expression(args.sparse_texel);
7290 }
7291
7292 if (args.bias)
7293 {
7294 forward = forward && should_forward(args.bias);
7295 farg_str += ", ";
7296 farg_str += to_expression(args.bias);
7297 }
7298
7299 if (args.component && !expression_is_constant_null(args.component))
7300 {
7301 forward = forward && should_forward(args.component);
7302 farg_str += ", ";
7303 auto &component_type = expression_type(args.component);
7304 if (component_type.basetype == SPIRType::Int)
7305 farg_str += to_expression(args.component);
7306 else
7307 farg_str += join("int(", to_expression(args.component), ")");
7308 }
7309
7310 *p_forward = forward;
7311
7312 return farg_str;
7313}
7314
7315void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
7316{
7317 auto op = static_cast<GLSLstd450>(eop);
7318
7319 if (is_legacy() && is_unsigned_glsl_opcode(op))
7320 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
7321
7322 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7323 uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
7324 auto int_type = to_signed_basetype(integer_width);
7325 auto uint_type = to_unsigned_basetype(integer_width);
7326
7327 switch (op)
7328 {
7329 // FP fiddling
7330 case GLSLstd450Round:
7331 if (!is_legacy())
7332 emit_unary_func_op(result_type, id, args[0], "round");
7333 else
7334 {
7335 auto op0 = to_enclosed_expression(args[0]);
7336 auto &op0_type = expression_type(args[0]);
7337 auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
7338 bool forward = should_forward(args[0]);
7339 emit_op(result_type, id, expr, forward);
7340 inherit_expression_dependencies(id, args[0]);
7341 }
7342 break;
7343
7344 case GLSLstd450RoundEven:
7345 if (!is_legacy())
7346 emit_unary_func_op(result_type, id, args[0], "roundEven");
7347 else if (!options.es)
7348 {
7349 // This extension provides round() with round-to-even semantics.
7350 require_extension_internal("GL_EXT_gpu_shader4");
7351 emit_unary_func_op(result_type, id, args[0], "round");
7352 }
7353 else
7354 SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7355 break;
7356
7357 case GLSLstd450Trunc:
7358 emit_unary_func_op(result_type, id, args[0], "trunc");
7359 break;
7360 case GLSLstd450SAbs:
7361 emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type);
7362 break;
7363 case GLSLstd450FAbs:
7364 emit_unary_func_op(result_type, id, args[0], "abs");
7365 break;
7366 case GLSLstd450SSign:
7367 emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type);
7368 break;
7369 case GLSLstd450FSign:
7370 emit_unary_func_op(result_type, id, args[0], "sign");
7371 break;
7372 case GLSLstd450Floor:
7373 emit_unary_func_op(result_type, id, args[0], "floor");
7374 break;
7375 case GLSLstd450Ceil:
7376 emit_unary_func_op(result_type, id, args[0], "ceil");
7377 break;
7378 case GLSLstd450Fract:
7379 emit_unary_func_op(result_type, id, args[0], "fract");
7380 break;
7381 case GLSLstd450Radians:
7382 emit_unary_func_op(result_type, id, args[0], "radians");
7383 break;
7384 case GLSLstd450Degrees:
7385 emit_unary_func_op(result_type, id, args[0], "degrees");
7386 break;
7387 case GLSLstd450Fma:
7388 if ((!options.es && options.version < 400) || (options.es && options.version < 320))
7389 {
7390 auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ",
7391 to_enclosed_expression(args[2]));
7392
7393 emit_op(result_type, id, expr,
7394 should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2]));
7395 for (uint32_t i = 0; i < 3; i++)
7396 inherit_expression_dependencies(id, args[i]);
7397 }
7398 else
7399 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
7400 break;
7401 case GLSLstd450Modf:
7402 register_call_out_argument(args[1]);
7403 forced_temporaries.insert(id);
7404 emit_binary_func_op(result_type, id, args[0], args[1], "modf");
7405 break;
7406
7407 case GLSLstd450ModfStruct:
7408 {
7409 auto &type = get<SPIRType>(result_type);
7410 emit_uninitialized_temporary_expression(result_type, id);
7411 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
7412 to_expression(id), ".", to_member_name(type, 1), ");");
7413 break;
7414 }
7415
7416 // Minmax
7417 case GLSLstd450UMin:
7418 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false);
7419 break;
7420
7421 case GLSLstd450SMin:
7422 emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false);
7423 break;
7424
7425 case GLSLstd450FMin:
7426 emit_binary_func_op(result_type, id, args[0], args[1], "min");
7427 break;
7428
7429 case GLSLstd450FMax:
7430 emit_binary_func_op(result_type, id, args[0], args[1], "max");
7431 break;
7432
7433 case GLSLstd450UMax:
7434 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false);
7435 break;
7436
7437 case GLSLstd450SMax:
7438 emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false);
7439 break;
7440
7441 case GLSLstd450FClamp:
7442 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
7443 break;
7444
7445 case GLSLstd450UClamp:
7446 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type);
7447 break;
7448
7449 case GLSLstd450SClamp:
7450 emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type);
7451 break;
7452
7453 // Trig
7454 case GLSLstd450Sin:
7455 emit_unary_func_op(result_type, id, args[0], "sin");
7456 break;
7457 case GLSLstd450Cos:
7458 emit_unary_func_op(result_type, id, args[0], "cos");
7459 break;
7460 case GLSLstd450Tan:
7461 emit_unary_func_op(result_type, id, args[0], "tan");
7462 break;
7463 case GLSLstd450Asin:
7464 emit_unary_func_op(result_type, id, args[0], "asin");
7465 break;
7466 case GLSLstd450Acos:
7467 emit_unary_func_op(result_type, id, args[0], "acos");
7468 break;
7469 case GLSLstd450Atan:
7470 emit_unary_func_op(result_type, id, args[0], "atan");
7471 break;
7472 case GLSLstd450Sinh:
7473 emit_unary_func_op(result_type, id, args[0], "sinh");
7474 break;
7475 case GLSLstd450Cosh:
7476 emit_unary_func_op(result_type, id, args[0], "cosh");
7477 break;
7478 case GLSLstd450Tanh:
7479 emit_unary_func_op(result_type, id, args[0], "tanh");
7480 break;
7481 case GLSLstd450Asinh:
7482 emit_unary_func_op(result_type, id, args[0], "asinh");
7483 break;
7484 case GLSLstd450Acosh:
7485 emit_unary_func_op(result_type, id, args[0], "acosh");
7486 break;
7487 case GLSLstd450Atanh:
7488 emit_unary_func_op(result_type, id, args[0], "atanh");
7489 break;
7490 case GLSLstd450Atan2:
7491 emit_binary_func_op(result_type, id, args[0], args[1], "atan");
7492 break;
7493
7494 // Exponentials
7495 case GLSLstd450Pow:
7496 emit_binary_func_op(result_type, id, args[0], args[1], "pow");
7497 break;
7498 case GLSLstd450Exp:
7499 emit_unary_func_op(result_type, id, args[0], "exp");
7500 break;
7501 case GLSLstd450Log:
7502 emit_unary_func_op(result_type, id, args[0], "log");
7503 break;
7504 case GLSLstd450Exp2:
7505 emit_unary_func_op(result_type, id, args[0], "exp2");
7506 break;
7507 case GLSLstd450Log2:
7508 emit_unary_func_op(result_type, id, args[0], "log2");
7509 break;
7510 case GLSLstd450Sqrt:
7511 emit_unary_func_op(result_type, id, args[0], "sqrt");
7512 break;
7513 case GLSLstd450InverseSqrt:
7514 emit_unary_func_op(result_type, id, args[0], "inversesqrt");
7515 break;
7516
7517 // Matrix math
7518 case GLSLstd450Determinant:
7519 emit_unary_func_op(result_type, id, args[0], "determinant");
7520 break;
7521 case GLSLstd450MatrixInverse:
7522 emit_unary_func_op(result_type, id, args[0], "inverse");
7523 break;
7524
7525 // Lerping
7526 case GLSLstd450FMix:
7527 case GLSLstd450IMix:
7528 {
7529 emit_mix_op(result_type, id, args[0], args[1], args[2]);
7530 break;
7531 }
7532 case GLSLstd450Step:
7533 emit_binary_func_op(result_type, id, args[0], args[1], "step");
7534 break;
7535 case GLSLstd450SmoothStep:
7536 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
7537 break;
7538
7539 // Packing
7540 case GLSLstd450Frexp:
7541 register_call_out_argument(args[1]);
7542 forced_temporaries.insert(id);
7543 emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
7544 break;
7545
7546 case GLSLstd450FrexpStruct:
7547 {
7548 auto &type = get<SPIRType>(result_type);
7549 emit_uninitialized_temporary_expression(result_type, id);
7550 statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
7551 to_expression(id), ".", to_member_name(type, 1), ");");
7552 break;
7553 }
7554
7555 case GLSLstd450Ldexp:
7556 {
7557 bool forward = should_forward(args[0]) && should_forward(args[1]);
7558
7559 auto op0 = to_unpacked_expression(args[0]);
7560 auto op1 = to_unpacked_expression(args[1]);
7561 auto &op1_type = expression_type(args[1]);
7562 if (op1_type.basetype != SPIRType::Int)
7563 {
7564 // Need a value cast here.
7565 auto target_type = op1_type;
7566 target_type.basetype = SPIRType::Int;
7567 op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
7568 }
7569
7570 auto expr = join("ldexp(", op0, ", ", op1, ")");
7571
7572 emit_op(result_type, id, expr, forward);
7573 inherit_expression_dependencies(id, args[0]);
7574 inherit_expression_dependencies(id, args[1]);
7575 break;
7576 }
7577
7578 case GLSLstd450PackSnorm4x8:
7579 emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
7580 break;
7581 case GLSLstd450PackUnorm4x8:
7582 emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
7583 break;
7584 case GLSLstd450PackSnorm2x16:
7585 emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
7586 break;
7587 case GLSLstd450PackUnorm2x16:
7588 emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
7589 break;
7590 case GLSLstd450PackHalf2x16:
7591 emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
7592 break;
7593 case GLSLstd450UnpackSnorm4x8:
7594 emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
7595 break;
7596 case GLSLstd450UnpackUnorm4x8:
7597 emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
7598 break;
7599 case GLSLstd450UnpackSnorm2x16:
7600 emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
7601 break;
7602 case GLSLstd450UnpackUnorm2x16:
7603 emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
7604 break;
7605 case GLSLstd450UnpackHalf2x16:
7606 emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
7607 break;
7608
7609 case GLSLstd450PackDouble2x32:
7610 emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
7611 break;
7612 case GLSLstd450UnpackDouble2x32:
7613 emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
7614 break;
7615
7616 // Vector math
7617 case GLSLstd450Length:
7618 emit_unary_func_op(result_type, id, args[0], "length");
7619 break;
7620 case GLSLstd450Distance:
7621 emit_binary_func_op(result_type, id, args[0], args[1], "distance");
7622 break;
7623 case GLSLstd450Cross:
7624 emit_binary_func_op(result_type, id, args[0], args[1], "cross");
7625 break;
7626 case GLSLstd450Normalize:
7627 emit_unary_func_op(result_type, id, args[0], "normalize");
7628 break;
7629 case GLSLstd450FaceForward:
7630 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
7631 break;
7632 case GLSLstd450Reflect:
7633 emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
7634 break;
7635 case GLSLstd450Refract:
7636 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
7637 break;
7638
7639 // Bit-fiddling
7640 case GLSLstd450FindILsb:
7641 // findLSB always returns int.
7642 emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type);
7643 break;
7644
7645 case GLSLstd450FindSMsb:
7646 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type);
7647 break;
7648
7649 case GLSLstd450FindUMsb:
7650 emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type,
7651 int_type); // findMSB always returns int.
7652 break;
7653
7654 // Multisampled varying
7655 case GLSLstd450InterpolateAtCentroid:
7656 emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
7657 break;
7658 case GLSLstd450InterpolateAtSample:
7659 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
7660 break;
7661 case GLSLstd450InterpolateAtOffset:
7662 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
7663 break;
7664
7665 case GLSLstd450NMin:
7666 case GLSLstd450NMax:
7667 {
7668 emit_nminmax_op(result_type, id, args[0], args[1], op);
7669 break;
7670 }
7671
7672 case GLSLstd450NClamp:
7673 {
7674 // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7675 // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7676 uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX];
7677 if (!max_id)
7678 max_id = ir.increase_bound_by(1);
7679
7680 // Inherit precision qualifiers.
7681 ir.meta[max_id] = ir.meta[id];
7682
7683 emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax);
7684 emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin);
7685 break;
7686 }
7687
7688 default:
7689 statement("// unimplemented GLSL op ", eop);
7690 break;
7691 }
7692}
7693
7694void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7695{
7696 // Need to emulate this call.
7697 uint32_t &ids = extra_sub_expressions[id];
7698 if (!ids)
7699 {
7700 ids = ir.increase_bound_by(5);
7701 auto btype = get<SPIRType>(result_type);
7702 btype.basetype = SPIRType::Boolean;
7703 set<SPIRType>(ids, btype);
7704 }
7705
7706 uint32_t btype_id = ids + 0;
7707 uint32_t left_nan_id = ids + 1;
7708 uint32_t right_nan_id = ids + 2;
7709 uint32_t tmp_id = ids + 3;
7710 uint32_t mixed_first_id = ids + 4;
7711
7712 // Inherit precision qualifiers.
7713 ir.meta[tmp_id] = ir.meta[id];
7714 ir.meta[mixed_first_id] = ir.meta[id];
7715
7716 emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
7717 emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
7718 emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
7719 emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
7720 emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
7721}
7722
7723void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7724 uint32_t)
7725{
7726 require_extension_internal("GL_AMD_shader_ballot");
7727
7728 enum AMDShaderBallot
7729 {
7730 SwizzleInvocationsAMD = 1,
7731 SwizzleInvocationsMaskedAMD = 2,
7732 WriteInvocationAMD = 3,
7733 MbcntAMD = 4
7734 };
7735
7736 auto op = static_cast<AMDShaderBallot>(eop);
7737
7738 switch (op)
7739 {
7740 case SwizzleInvocationsAMD:
7741 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
7742 register_control_dependent_expression(id);
7743 break;
7744
7745 case SwizzleInvocationsMaskedAMD:
7746 emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
7747 register_control_dependent_expression(id);
7748 break;
7749
7750 case WriteInvocationAMD:
7751 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
7752 register_control_dependent_expression(id);
7753 break;
7754
7755 case MbcntAMD:
7756 emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
7757 register_control_dependent_expression(id);
7758 break;
7759
7760 default:
7761 statement("// unimplemented SPV AMD shader ballot op ", eop);
7762 break;
7763 }
7764}
7765
7766void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7767 const uint32_t *args, uint32_t)
7768{
7769 require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
7770
7771 enum AMDShaderExplicitVertexParameter
7772 {
7773 InterpolateAtVertexAMD = 1
7774 };
7775
7776 auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
7777
7778 switch (op)
7779 {
7780 case InterpolateAtVertexAMD:
7781 emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
7782 break;
7783
7784 default:
7785 statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
7786 break;
7787 }
7788}
7789
7790void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
7791 const uint32_t *args, uint32_t)
7792{
7793 require_extension_internal("GL_AMD_shader_trinary_minmax");
7794
7795 enum AMDShaderTrinaryMinMax
7796 {
7797 FMin3AMD = 1,
7798 UMin3AMD = 2,
7799 SMin3AMD = 3,
7800 FMax3AMD = 4,
7801 UMax3AMD = 5,
7802 SMax3AMD = 6,
7803 FMid3AMD = 7,
7804 UMid3AMD = 8,
7805 SMid3AMD = 9
7806 };
7807
7808 auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
7809
7810 switch (op)
7811 {
7812 case FMin3AMD:
7813 case UMin3AMD:
7814 case SMin3AMD:
7815 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
7816 break;
7817
7818 case FMax3AMD:
7819 case UMax3AMD:
7820 case SMax3AMD:
7821 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
7822 break;
7823
7824 case FMid3AMD:
7825 case UMid3AMD:
7826 case SMid3AMD:
7827 emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
7828 break;
7829
7830 default:
7831 statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
7832 break;
7833 }
7834}
7835
7836void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7837 uint32_t)
7838{
7839 require_extension_internal("GL_AMD_gcn_shader");
7840
7841 enum AMDGCNShader
7842 {
7843 CubeFaceIndexAMD = 1,
7844 CubeFaceCoordAMD = 2,
7845 TimeAMD = 3
7846 };
7847
7848 auto op = static_cast<AMDGCNShader>(eop);
7849
7850 switch (op)
7851 {
7852 case CubeFaceIndexAMD:
7853 emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
7854 break;
7855 case CubeFaceCoordAMD:
7856 emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
7857 break;
7858 case TimeAMD:
7859 {
7860 string expr = "timeAMD()";
7861 emit_op(result_type, id, expr, true);
7862 register_control_dependent_expression(id);
7863 break;
7864 }
7865
7866 default:
7867 statement("// unimplemented SPV AMD gcn shader op ", eop);
7868 break;
7869 }
7870}
7871
7872void CompilerGLSL::emit_subgroup_op(const Instruction &i)
7873{
7874 const uint32_t *ops = stream(i);
7875 auto op = static_cast<Op>(i.op);
7876
7877 if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
7878 SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
7879
7880 // If we need to do implicit bitcasts, make sure we do it with the correct type.
7881 uint32_t integer_width = get_integer_width_for_instruction(i);
7882 auto int_type = to_signed_basetype(integer_width);
7883 auto uint_type = to_unsigned_basetype(integer_width);
7884
7885 switch (op)
7886 {
7887 case OpGroupNonUniformElect:
7888 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
7889 break;
7890
7891 case OpGroupNonUniformBallotBitCount:
7892 {
7893 const GroupOperation operation = static_cast<GroupOperation>(ops[3]);
7894 if (operation == GroupOperationReduce)
7895 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
7896 else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan)
7897 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7898 }
7899 break;
7900
7901 case OpGroupNonUniformBallotBitExtract:
7902 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
7903 break;
7904
7905 case OpGroupNonUniformInverseBallot:
7906 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7907 break;
7908
7909 case OpGroupNonUniformBallot:
7910 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
7911 break;
7912
7913 case OpGroupNonUniformBallotFindLSB:
7914 case OpGroupNonUniformBallotFindMSB:
7915 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
7916 break;
7917
7918 case OpGroupNonUniformBroadcast:
7919 case OpGroupNonUniformBroadcastFirst:
7920 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
7921 break;
7922
7923 case OpGroupNonUniformShuffle:
7924 case OpGroupNonUniformShuffleXor:
7925 require_extension_internal("GL_KHR_shader_subgroup_shuffle");
7926 break;
7927
7928 case OpGroupNonUniformShuffleUp:
7929 case OpGroupNonUniformShuffleDown:
7930 require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
7931 break;
7932
7933 case OpGroupNonUniformAll:
7934 case OpGroupNonUniformAny:
7935 case OpGroupNonUniformAllEqual:
7936 {
7937 const SPIRType &type = expression_type(ops[3]);
7938 if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u)
7939 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
7940 else
7941 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
7942 }
7943 break;
7944
7945 case OpGroupNonUniformFAdd:
7946 case OpGroupNonUniformFMul:
7947 case OpGroupNonUniformFMin:
7948 case OpGroupNonUniformFMax:
7949 case OpGroupNonUniformIAdd:
7950 case OpGroupNonUniformIMul:
7951 case OpGroupNonUniformSMin:
7952 case OpGroupNonUniformSMax:
7953 case OpGroupNonUniformUMin:
7954 case OpGroupNonUniformUMax:
7955 case OpGroupNonUniformBitwiseAnd:
7956 case OpGroupNonUniformBitwiseOr:
7957 case OpGroupNonUniformBitwiseXor:
7958 case OpGroupNonUniformLogicalAnd:
7959 case OpGroupNonUniformLogicalOr:
7960 case OpGroupNonUniformLogicalXor:
7961 {
7962 auto operation = static_cast<GroupOperation>(ops[3]);
7963 if (operation == GroupOperationClusteredReduce)
7964 {
7965 require_extension_internal("GL_KHR_shader_subgroup_clustered");
7966 }
7967 else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
7968 operation == GroupOperationReduce)
7969 {
7970 require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
7971 }
7972 else
7973 SPIRV_CROSS_THROW("Invalid group operation.");
7974 break;
7975 }
7976
7977 case OpGroupNonUniformQuadSwap:
7978 case OpGroupNonUniformQuadBroadcast:
7979 require_extension_internal("GL_KHR_shader_subgroup_quad");
7980 break;
7981
7982 default:
7983 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7984 }
7985
7986 uint32_t result_type = ops[0];
7987 uint32_t id = ops[1];
7988
7989 auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2]));
7990 if (scope != ScopeSubgroup)
7991 SPIRV_CROSS_THROW("Only subgroup scope is supported.");
7992
7993 switch (op)
7994 {
7995 case OpGroupNonUniformElect:
7996 emit_op(result_type, id, "subgroupElect()", true);
7997 break;
7998
7999 case OpGroupNonUniformBroadcast:
8000 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
8001 break;
8002
8003 case OpGroupNonUniformBroadcastFirst:
8004 emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
8005 break;
8006
8007 case OpGroupNonUniformBallot:
8008 emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
8009 break;
8010
8011 case OpGroupNonUniformInverseBallot:
8012 emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
8013 break;
8014
8015 case OpGroupNonUniformBallotBitExtract:
8016 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
8017 break;
8018
8019 case OpGroupNonUniformBallotFindLSB:
8020 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
8021 break;
8022
8023 case OpGroupNonUniformBallotFindMSB:
8024 emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
8025 break;
8026
8027 case OpGroupNonUniformBallotBitCount:
8028 {
8029 auto operation = static_cast<GroupOperation>(ops[3]);
8030 if (operation == GroupOperationReduce)
8031 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
8032 else if (operation == GroupOperationInclusiveScan)
8033 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
8034 else if (operation == GroupOperationExclusiveScan)
8035 emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
8036 else
8037 SPIRV_CROSS_THROW("Invalid BitCount operation.");
8038 break;
8039 }
8040
8041 case OpGroupNonUniformShuffle:
8042 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
8043 break;
8044
8045 case OpGroupNonUniformShuffleXor:
8046 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
8047 break;
8048
8049 case OpGroupNonUniformShuffleUp:
8050 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
8051 break;
8052
8053 case OpGroupNonUniformShuffleDown:
8054 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
8055 break;
8056
8057 case OpGroupNonUniformAll:
8058 emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
8059 break;
8060
8061 case OpGroupNonUniformAny:
8062 emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
8063 break;
8064
8065 case OpGroupNonUniformAllEqual:
8066 emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
8067 break;
8068
8069 // clang-format off
8070#define GLSL_GROUP_OP(op, glsl_op) \
8071case OpGroupNonUniform##op: \
8072 { \
8073 auto operation = static_cast<GroupOperation>(ops[3]); \
8074 if (operation == GroupOperationReduce) \
8075 emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
8076 else if (operation == GroupOperationInclusiveScan) \
8077 emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
8078 else if (operation == GroupOperationExclusiveScan) \
8079 emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
8080 else if (operation == GroupOperationClusteredReduce) \
8081 emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
8082 else \
8083 SPIRV_CROSS_THROW("Invalid group operation."); \
8084 break; \
8085 }
8086
8087#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
8088case OpGroupNonUniform##op: \
8089 { \
8090 auto operation = static_cast<GroupOperation>(ops[3]); \
8091 if (operation == GroupOperationReduce) \
8092 emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
8093 else if (operation == GroupOperationInclusiveScan) \
8094 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
8095 else if (operation == GroupOperationExclusiveScan) \
8096 emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
8097 else if (operation == GroupOperationClusteredReduce) \
8098 emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
8099 else \
8100 SPIRV_CROSS_THROW("Invalid group operation."); \
8101 break; \
8102 }
8103
8104 GLSL_GROUP_OP(FAdd, Add)
8105 GLSL_GROUP_OP(FMul, Mul)
8106 GLSL_GROUP_OP(FMin, Min)
8107 GLSL_GROUP_OP(FMax, Max)
8108 GLSL_GROUP_OP(IAdd, Add)
8109 GLSL_GROUP_OP(IMul, Mul)
8110 GLSL_GROUP_OP_CAST(SMin, Min, int_type)
8111 GLSL_GROUP_OP_CAST(SMax, Max, int_type)
8112 GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
8113 GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
8114 GLSL_GROUP_OP(BitwiseAnd, And)
8115 GLSL_GROUP_OP(BitwiseOr, Or)
8116 GLSL_GROUP_OP(BitwiseXor, Xor)
8117 GLSL_GROUP_OP(LogicalAnd, And)
8118 GLSL_GROUP_OP(LogicalOr, Or)
8119 GLSL_GROUP_OP(LogicalXor, Xor)
8120#undef GLSL_GROUP_OP
8121#undef GLSL_GROUP_OP_CAST
8122 // clang-format on
8123
8124 case OpGroupNonUniformQuadSwap:
8125 {
8126 uint32_t direction = evaluate_constant_u32(ops[4]);
8127 if (direction == 0)
8128 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
8129 else if (direction == 1)
8130 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
8131 else if (direction == 2)
8132 emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
8133 else
8134 SPIRV_CROSS_THROW("Invalid quad swap direction.");
8135 break;
8136 }
8137
8138 case OpGroupNonUniformQuadBroadcast:
8139 {
8140 emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
8141 break;
8142 }
8143
8144 default:
8145 SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
8146 }
8147
8148 register_control_dependent_expression(id);
8149}
8150
8151string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
8152{
8153 // OpBitcast can deal with pointers.
8154 if (out_type.pointer || in_type.pointer)
8155 {
8156 if (out_type.vecsize == 2 || in_type.vecsize == 2)
8157 require_extension_internal("GL_EXT_buffer_reference_uvec2");
8158 return type_to_glsl(out_type);
8159 }
8160
8161 if (out_type.basetype == in_type.basetype)
8162 return "";
8163
8164 assert(out_type.basetype != SPIRType::Boolean);
8165 assert(in_type.basetype != SPIRType::Boolean);
8166
8167 bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
8168 bool same_size_cast = out_type.width == in_type.width;
8169
8170 // Trivial bitcast case, casts between integers.
8171 if (integral_cast && same_size_cast)
8172 return type_to_glsl(out_type);
8173
8174 // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
8175 if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1)
8176 return "unpack8";
8177 else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1)
8178 return "pack16";
8179 else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1)
8180 return "pack32";
8181
8182 // Floating <-> Integer special casts. Just have to enumerate all cases. :(
8183 // 16-bit, 32-bit and 64-bit floats.
8184 if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
8185 {
8186 if (is_legacy_es())
8187 SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
8188 else if (!options.es && options.version < 330)
8189 require_extension_internal("GL_ARB_shader_bit_encoding");
8190 return "floatBitsToUint";
8191 }
8192 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
8193 {
8194 if (is_legacy_es())
8195 SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
8196 else if (!options.es && options.version < 330)
8197 require_extension_internal("GL_ARB_shader_bit_encoding");
8198 return "floatBitsToInt";
8199 }
8200 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
8201 {
8202 if (is_legacy_es())
8203 SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
8204 else if (!options.es && options.version < 330)
8205 require_extension_internal("GL_ARB_shader_bit_encoding");
8206 return "uintBitsToFloat";
8207 }
8208 else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
8209 {
8210 if (is_legacy_es())
8211 SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
8212 else if (!options.es && options.version < 330)
8213 require_extension_internal("GL_ARB_shader_bit_encoding");
8214 return "intBitsToFloat";
8215 }
8216
8217 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
8218 return "doubleBitsToInt64";
8219 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
8220 return "doubleBitsToUint64";
8221 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
8222 return "int64BitsToDouble";
8223 else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
8224 return "uint64BitsToDouble";
8225 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
8226 return "float16BitsToInt16";
8227 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
8228 return "float16BitsToUint16";
8229 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
8230 return "int16BitsToFloat16";
8231 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
8232 return "uint16BitsToFloat16";
8233
8234 // And finally, some even more special purpose casts.
8235 if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
8236 return "packUint2x32";
8237 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2)
8238 return "unpackUint2x32";
8239 else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8240 return "unpackFloat2x16";
8241 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
8242 return "packFloat2x16";
8243 else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2)
8244 return "packInt2x16";
8245 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1)
8246 return "unpackInt2x16";
8247 else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2)
8248 return "packUint2x16";
8249 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
8250 return "unpackUint2x16";
8251 else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4)
8252 return "packInt4x16";
8253 else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1)
8254 return "unpackInt4x16";
8255 else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4)
8256 return "packUint4x16";
8257 else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
8258 return "unpackUint4x16";
8259
8260 return "";
8261}
8262
8263string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
8264{
8265 auto op = bitcast_glsl_op(result_type, expression_type(argument));
8266 if (op.empty())
8267 return to_enclosed_unpacked_expression(argument);
8268 else
8269 return join(op, "(", to_unpacked_expression(argument), ")");
8270}
8271
8272std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
8273{
8274 auto expr = to_expression(arg);
8275 auto &src_type = expression_type(arg);
8276 if (src_type.basetype != target_type)
8277 {
8278 auto target = src_type;
8279 target.basetype = target_type;
8280 expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
8281 }
8282
8283 return expr;
8284}
8285
8286std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
8287 const std::string &expr)
8288{
8289 if (target_type.basetype == expr_type)
8290 return expr;
8291
8292 auto src_type = target_type;
8293 src_type.basetype = expr_type;
8294 return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
8295}
8296
8297string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
8298{
8299 switch (builtin)
8300 {
8301 case BuiltInPosition:
8302 return "gl_Position";
8303 case BuiltInPointSize:
8304 return "gl_PointSize";
8305 case BuiltInClipDistance:
8306 return "gl_ClipDistance";
8307 case BuiltInCullDistance:
8308 return "gl_CullDistance";
8309 case BuiltInVertexId:
8310 if (options.vulkan_semantics)
8311 SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
8312 "with GL semantics.");
8313 return "gl_VertexID";
8314 case BuiltInInstanceId:
8315 if (options.vulkan_semantics)
8316 {
8317 auto model = get_entry_point().model;
8318 switch (model)
8319 {
8320 case spv::ExecutionModelIntersectionKHR:
8321 case spv::ExecutionModelAnyHitKHR:
8322 case spv::ExecutionModelClosestHitKHR:
8323 // gl_InstanceID is allowed in these shaders.
8324 break;
8325
8326 default:
8327 SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
8328 "created with GL semantics.");
8329 }
8330 }
8331 if (!options.es && options.version < 140)
8332 {
8333 require_extension_internal("GL_ARB_draw_instanced");
8334 }
8335 return "gl_InstanceID";
8336 case BuiltInVertexIndex:
8337 if (options.vulkan_semantics)
8338 return "gl_VertexIndex";
8339 else
8340 return "gl_VertexID"; // gl_VertexID already has the base offset applied.
8341 case BuiltInInstanceIndex:
8342 if (options.vulkan_semantics)
8343 return "gl_InstanceIndex";
8344
8345 if (!options.es && options.version < 140)
8346 {
8347 require_extension_internal("GL_ARB_draw_instanced");
8348 }
8349
8350 if (options.vertex.support_nonzero_base_instance)
8351 {
8352 if (!options.vulkan_semantics)
8353 {
8354 // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
8355 require_extension_internal("GL_ARB_shader_draw_parameters");
8356 }
8357 return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
8358 }
8359 else
8360 return "gl_InstanceID";
8361 case BuiltInPrimitiveId:
8362 if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8363 return "gl_PrimitiveIDIn";
8364 else
8365 return "gl_PrimitiveID";
8366 case BuiltInInvocationId:
8367 return "gl_InvocationID";
8368 case BuiltInLayer:
8369 return "gl_Layer";
8370 case BuiltInViewportIndex:
8371 return "gl_ViewportIndex";
8372 case BuiltInTessLevelOuter:
8373 return "gl_TessLevelOuter";
8374 case BuiltInTessLevelInner:
8375 return "gl_TessLevelInner";
8376 case BuiltInTessCoord:
8377 return "gl_TessCoord";
8378 case BuiltInFragCoord:
8379 return "gl_FragCoord";
8380 case BuiltInPointCoord:
8381 return "gl_PointCoord";
8382 case BuiltInFrontFacing:
8383 return "gl_FrontFacing";
8384 case BuiltInFragDepth:
8385 return "gl_FragDepth";
8386 case BuiltInNumWorkgroups:
8387 return "gl_NumWorkGroups";
8388 case BuiltInWorkgroupSize:
8389 return "gl_WorkGroupSize";
8390 case BuiltInWorkgroupId:
8391 return "gl_WorkGroupID";
8392 case BuiltInLocalInvocationId:
8393 return "gl_LocalInvocationID";
8394 case BuiltInGlobalInvocationId:
8395 return "gl_GlobalInvocationID";
8396 case BuiltInLocalInvocationIndex:
8397 return "gl_LocalInvocationIndex";
8398 case BuiltInHelperInvocation:
8399 return "gl_HelperInvocation";
8400
8401 case BuiltInBaseVertex:
8402 if (options.es)
8403 SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8404
8405 if (options.vulkan_semantics)
8406 {
8407 if (options.version < 460)
8408 {
8409 require_extension_internal("GL_ARB_shader_draw_parameters");
8410 return "gl_BaseVertexARB";
8411 }
8412 return "gl_BaseVertex";
8413 }
8414 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8415 require_extension_internal("GL_ARB_shader_draw_parameters");
8416 return "SPIRV_Cross_BaseVertex";
8417
8418 case BuiltInBaseInstance:
8419 if (options.es)
8420 SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8421
8422 if (options.vulkan_semantics)
8423 {
8424 if (options.version < 460)
8425 {
8426 require_extension_internal("GL_ARB_shader_draw_parameters");
8427 return "gl_BaseInstanceARB";
8428 }
8429 return "gl_BaseInstance";
8430 }
8431 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8432 require_extension_internal("GL_ARB_shader_draw_parameters");
8433 return "SPIRV_Cross_BaseInstance";
8434
8435 case BuiltInDrawIndex:
8436 if (options.es)
8437 SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8438
8439 if (options.vulkan_semantics)
8440 {
8441 if (options.version < 460)
8442 {
8443 require_extension_internal("GL_ARB_shader_draw_parameters");
8444 return "gl_DrawIDARB";
8445 }
8446 return "gl_DrawID";
8447 }
8448 // On regular GL, this is soft-enabled and we emit ifdefs in code.
8449 require_extension_internal("GL_ARB_shader_draw_parameters");
8450 return "gl_DrawIDARB";
8451
8452 case BuiltInSampleId:
8453 if (options.es && options.version < 320)
8454 require_extension_internal("GL_OES_sample_variables");
8455 if (!options.es && options.version < 400)
8456 SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8457 return "gl_SampleID";
8458
8459 case BuiltInSampleMask:
8460 if (options.es && options.version < 320)
8461 require_extension_internal("GL_OES_sample_variables");
8462 if (!options.es && options.version < 400)
8463 SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8464
8465 if (storage == StorageClassInput)
8466 return "gl_SampleMaskIn";
8467 else
8468 return "gl_SampleMask";
8469
8470 case BuiltInSamplePosition:
8471 if (options.es && options.version < 320)
8472 require_extension_internal("GL_OES_sample_variables");
8473 if (!options.es && options.version < 400)
8474 SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8475 return "gl_SamplePosition";
8476
8477 case BuiltInViewIndex:
8478 if (options.vulkan_semantics)
8479 return "gl_ViewIndex";
8480 else
8481 return "gl_ViewID_OVR";
8482
8483 case BuiltInNumSubgroups:
8484 request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
8485 return "gl_NumSubgroups";
8486
8487 case BuiltInSubgroupId:
8488 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
8489 return "gl_SubgroupID";
8490
8491 case BuiltInSubgroupSize:
8492 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
8493 return "gl_SubgroupSize";
8494
8495 case BuiltInSubgroupLocalInvocationId:
8496 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
8497 return "gl_SubgroupInvocationID";
8498
8499 case BuiltInSubgroupEqMask:
8500 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8501 return "gl_SubgroupEqMask";
8502
8503 case BuiltInSubgroupGeMask:
8504 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8505 return "gl_SubgroupGeMask";
8506
8507 case BuiltInSubgroupGtMask:
8508 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8509 return "gl_SubgroupGtMask";
8510
8511 case BuiltInSubgroupLeMask:
8512 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8513 return "gl_SubgroupLeMask";
8514
8515 case BuiltInSubgroupLtMask:
8516 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8517 return "gl_SubgroupLtMask";
8518
8519 case BuiltInLaunchIdKHR:
8520 return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8521 case BuiltInLaunchSizeKHR:
8522 return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8523 case BuiltInWorldRayOriginKHR:
8524 return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8525 case BuiltInWorldRayDirectionKHR:
8526 return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8527 case BuiltInObjectRayOriginKHR:
8528 return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8529 case BuiltInObjectRayDirectionKHR:
8530 return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8531 case BuiltInRayTminKHR:
8532 return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8533 case BuiltInRayTmaxKHR:
8534 return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8535 case BuiltInInstanceCustomIndexKHR:
8536 return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8537 case BuiltInObjectToWorldKHR:
8538 return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8539 case BuiltInWorldToObjectKHR:
8540 return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8541 case BuiltInHitTNV:
8542 // gl_HitTEXT is an alias of RayTMax in KHR.
8543 return "gl_HitTNV";
8544 case BuiltInHitKindKHR:
8545 return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8546 case BuiltInIncomingRayFlagsKHR:
8547 return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8548
8549 case BuiltInBaryCoordNV:
8550 {
8551 if (options.es && options.version < 320)
8552 SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
8553 else if (!options.es && options.version < 450)
8554 SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
8555 require_extension_internal("GL_NV_fragment_shader_barycentric");
8556 return "gl_BaryCoordNV";
8557 }
8558
8559 case BuiltInBaryCoordNoPerspNV:
8560 {
8561 if (options.es && options.version < 320)
8562 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
8563 else if (!options.es && options.version < 450)
8564 SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
8565 require_extension_internal("GL_NV_fragment_shader_barycentric");
8566 return "gl_BaryCoordNoPerspNV";
8567 }
8568
8569 case BuiltInFragStencilRefEXT:
8570 {
8571 if (!options.es)
8572 {
8573 require_extension_internal("GL_ARB_shader_stencil_export");
8574 return "gl_FragStencilRefARB";
8575 }
8576 else
8577 SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8578 }
8579
8580 case BuiltInPrimitiveShadingRateKHR:
8581 {
8582 if (!options.vulkan_semantics)
8583 SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
8584 require_extension_internal("GL_EXT_fragment_shading_rate");
8585 return "gl_PrimitiveShadingRateEXT";
8586 }
8587
8588 case BuiltInShadingRateKHR:
8589 {
8590 if (!options.vulkan_semantics)
8591 SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
8592 require_extension_internal("GL_EXT_fragment_shading_rate");
8593 return "gl_ShadingRateEXT";
8594 }
8595
8596 case BuiltInDeviceIndex:
8597 if (!options.vulkan_semantics)
8598 SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8599 require_extension_internal("GL_EXT_device_group");
8600 return "gl_DeviceIndex";
8601
8602 case BuiltInFullyCoveredEXT:
8603 if (!options.es)
8604 require_extension_internal("GL_NV_conservative_raster_underestimation");
8605 else
8606 SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
8607 return "gl_FragFullyCoveredNV";
8608
8609 default:
8610 return join("gl_BuiltIn_", convert_to_string(builtin));
8611 }
8612}
8613
8614const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8615{
8616 switch (index)
8617 {
8618 case 0:
8619 return "x";
8620 case 1:
8621 return "y";
8622 case 2:
8623 return "z";
8624 case 3:
8625 return "w";
8626 default:
8627 return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
8628 }
8629}
8630
8631void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/,
8632 AccessChainFlags flags, bool & /*access_chain_is_arrayed*/,
8633 uint32_t index)
8634{
8635 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8636 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8637
8638 expr += "[";
8639
8640 if (index_is_literal)
8641 expr += convert_to_string(index);
8642 else
8643 expr += to_unpacked_expression(index, register_expression_read);
8644
8645 expr += "]";
8646}
8647
8648bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
8649{
8650 return true;
8651}
8652
8653string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8654 AccessChainFlags flags, AccessChainMeta *meta)
8655{
8656 string expr;
8657
8658 bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
8659 bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
8660 bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
8661 bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
8662 bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
8663 bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0;
8664
8665 if (!chain_only)
8666 {
8667 // We handle transpose explicitly, so don't resolve that here.
8668 auto *e = maybe_get<SPIRExpression>(base);
8669 bool old_transpose = e && e->need_transpose;
8670 if (e)
8671 e->need_transpose = false;
8672 expr = to_enclosed_expression(base, register_expression_read);
8673 if (e)
8674 e->need_transpose = old_transpose;
8675 }
8676
8677 // Start traversing type hierarchy at the proper non-pointer types,
8678 // but keep type_id referencing the original pointer for use below.
8679 uint32_t type_id = expression_type_id(base);
8680
8681 if (!backend.native_pointers)
8682 {
8683 if (ptr_chain)
8684 SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8685
8686 // Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8687 // continuing the access chain.
8688 if (should_dereference(base))
8689 {
8690 auto &type = get<SPIRType>(type_id);
8691 expr = dereference_expression(type, expr);
8692 }
8693 }
8694
8695 const auto *type = &get_pointee_type(type_id);
8696
8697 bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
8698 bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
8699 bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
8700 uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
8701 bool is_invariant = has_decoration(base, DecorationInvariant);
8702 bool pending_array_enclose = false;
8703 bool dimension_flatten = false;
8704
8705 const auto append_index = [&](uint32_t index, bool is_literal) {
8706 AccessChainFlags mod_flags = flags;
8707 if (!is_literal)
8708 mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8709 access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
8710 };
8711
8712 for (uint32_t i = 0; i < count; i++)
8713 {
8714 uint32_t index = indices[i];
8715
8716 bool is_literal = index_is_literal;
8717 if (is_literal && msb_is_id && (index >> 31u) != 0u)
8718 {
8719 is_literal = false;
8720 index &= 0x7fffffffu;
8721 }
8722
8723 // Pointer chains
8724 if (ptr_chain && i == 0)
8725 {
8726 // If we are flattening multidimensional arrays, only create opening bracket on first
8727 // array index.
8728 if (options.flatten_multidimensional_arrays)
8729 {
8730 dimension_flatten = type->array.size() >= 1;
8731 pending_array_enclose = dimension_flatten;
8732 if (pending_array_enclose)
8733 expr += "[";
8734 }
8735
8736 if (options.flatten_multidimensional_arrays && dimension_flatten)
8737 {
8738 // If we are flattening multidimensional arrays, do manual stride computation.
8739 if (is_literal)
8740 expr += convert_to_string(index);
8741 else
8742 expr += to_enclosed_expression(index, register_expression_read);
8743
8744 for (auto j = uint32_t(type->array.size()); j; j--)
8745 {
8746 expr += " * ";
8747 expr += enclose_expression(to_array_size(*type, j - 1));
8748 }
8749
8750 if (type->array.empty())
8751 pending_array_enclose = false;
8752 else
8753 expr += " + ";
8754
8755 if (!pending_array_enclose)
8756 expr += "]";
8757 }
8758 else
8759 {
8760 append_index(index, is_literal);
8761 }
8762
8763 if (type->basetype == SPIRType::ControlPointArray)
8764 {
8765 type_id = type->parent_type;
8766 type = &get<SPIRType>(type_id);
8767 }
8768
8769 access_chain_is_arrayed = true;
8770 }
8771 // Arrays
8772 else if (!type->array.empty())
8773 {
8774 // If we are flattening multidimensional arrays, only create opening bracket on first
8775 // array index.
8776 if (options.flatten_multidimensional_arrays && !pending_array_enclose)
8777 {
8778 dimension_flatten = type->array.size() > 1;
8779 pending_array_enclose = dimension_flatten;
8780 if (pending_array_enclose)
8781 expr += "[";
8782 }
8783
8784 assert(type->parent_type);
8785
8786 auto *var = maybe_get<SPIRVariable>(base);
8787 if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
8788 !has_decoration(type->self, DecorationBlock))
8789 {
8790 // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
8791 // Normally, these variables live in blocks when compiled from GLSL,
8792 // but HLSL seems to just emit straight arrays here.
8793 // We must pretend this access goes through gl_in/gl_out arrays
8794 // to be able to access certain builtins as arrays.
8795 auto builtin = ir.meta[base].decoration.builtin_type;
8796 switch (builtin)
8797 {
8798 // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
8799 // case BuiltInClipDistance:
8800 case BuiltInPosition:
8801 case BuiltInPointSize:
8802 if (var->storage == StorageClassInput)
8803 expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
8804 else if (var->storage == StorageClassOutput)
8805 expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
8806 else
8807 append_index(index, is_literal);
8808 break;
8809
8810 default:
8811 append_index(index, is_literal);
8812 break;
8813 }
8814 }
8815 else if (options.flatten_multidimensional_arrays && dimension_flatten)
8816 {
8817 // If we are flattening multidimensional arrays, do manual stride computation.
8818 auto &parent_type = get<SPIRType>(type->parent_type);
8819
8820 if (is_literal)
8821 expr += convert_to_string(index);
8822 else
8823 expr += to_enclosed_expression(index, register_expression_read);
8824
8825 for (auto j = uint32_t(parent_type.array.size()); j; j--)
8826 {
8827 expr += " * ";
8828 expr += enclose_expression(to_array_size(parent_type, j - 1));
8829 }
8830
8831 if (parent_type.array.empty())
8832 pending_array_enclose = false;
8833 else
8834 expr += " + ";
8835
8836 if (!pending_array_enclose)
8837 expr += "]";
8838 }
8839 // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
8840 // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
8841 else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
8842 {
8843 append_index(index, is_literal);
8844 }
8845
8846 type_id = type->parent_type;
8847 type = &get<SPIRType>(type_id);
8848
8849 access_chain_is_arrayed = true;
8850 }
8851 // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
8852 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
8853 else if (type->basetype == SPIRType::Struct)
8854 {
8855 if (!is_literal)
8856 index = evaluate_constant_u32(index);
8857
8858 if (index < uint32_t(type->member_type_index_redirection.size()))
8859 index = type->member_type_index_redirection[index];
8860
8861 if (index >= type->member_types.size())
8862 SPIRV_CROSS_THROW("Member index is out of bounds!");
8863
8864 BuiltIn builtin;
8865 if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
8866 {
8867 if (access_chain_is_arrayed)
8868 {
8869 expr += ".";
8870 expr += builtin_to_glsl(builtin, type->storage);
8871 }
8872 else
8873 expr = builtin_to_glsl(builtin, type->storage);
8874 }
8875 else
8876 {
8877 // If the member has a qualified name, use it as the entire chain
8878 string qual_mbr_name = get_member_qualified_name(type_id, index);
8879 if (!qual_mbr_name.empty())
8880 expr = qual_mbr_name;
8881 else if (flatten_member_reference)
8882 expr += join("_", to_member_name(*type, index));
8883 else
8884 expr += to_member_reference(base, *type, index, ptr_chain);
8885 }
8886
8887 if (has_member_decoration(type->self, index, DecorationInvariant))
8888 is_invariant = true;
8889
8890 is_packed = member_is_packed_physical_type(*type, index);
8891 if (member_is_remapped_physical_type(*type, index))
8892 physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
8893 else
8894 physical_type = 0;
8895
8896 row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
8897 type = &get<SPIRType>(type->member_types[index]);
8898 }
8899 // Matrix -> Vector
8900 else if (type->columns > 1)
8901 {
8902 // If we have a row-major matrix here, we need to defer any transpose in case this access chain
8903 // is used to store a column. We can resolve it right here and now if we access a scalar directly,
8904 // by flipping indexing order of the matrix.
8905
8906 expr += "[";
8907 if (is_literal)
8908 expr += convert_to_string(index);
8909 else
8910 expr += to_unpacked_expression(index, register_expression_read);
8911 expr += "]";
8912
8913 type_id = type->parent_type;
8914 type = &get<SPIRType>(type_id);
8915 }
8916 // Vector -> Scalar
8917 else if (type->vecsize > 1)
8918 {
8919 string deferred_index;
8920 if (row_major_matrix_needs_conversion)
8921 {
8922 // Flip indexing order.
8923 auto column_index = expr.find_last_of('[');
8924 if (column_index != string::npos)
8925 {
8926 deferred_index = expr.substr(column_index);
8927 expr.resize(column_index);
8928 }
8929 }
8930
8931 // Internally, access chain implementation can also be used on composites,
8932 // ignore scalar access workarounds in this case.
8933 StorageClass effective_storage = StorageClassGeneric;
8934 bool ignore_potential_sliced_writes = false;
8935 if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0)
8936 {
8937 if (expression_type(base).pointer)
8938 effective_storage = get_expression_effective_storage_class(base);
8939
8940 // Special consideration for control points.
8941 // Control points can only be written by InvocationID, so there is no need
8942 // to consider scalar access chains here.
8943 // Cleans up some cases where it's very painful to determine the accurate storage class
8944 // since blocks can be partially masked ...
8945 auto *var = maybe_get_backing_variable(base);
8946 if (var && var->storage == StorageClassOutput &&
8947 get_execution_model() == ExecutionModelTessellationControl &&
8948 !has_decoration(var->self, DecorationPatch))
8949 {
8950 ignore_potential_sliced_writes = true;
8951 }
8952 }
8953 else
8954 ignore_potential_sliced_writes = true;
8955
8956 if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
8957 {
8958 // On some backends, we might not be able to safely access individual scalars in a vector.
8959 // To work around this, we might have to cast the access chain reference to something which can,
8960 // like a pointer to scalar, which we can then index into.
8961 prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8962 is_packed);
8963 }
8964
8965 if (is_literal)
8966 {
8967 bool out_of_bounds = (index >= type->vecsize);
8968
8969 if (!is_packed && !row_major_matrix_needs_conversion)
8970 {
8971 expr += ".";
8972 expr += index_to_swizzle(out_of_bounds ? 0 : index);
8973 }
8974 else
8975 {
8976 // For packed vectors, we can only access them as an array, not by swizzle.
8977 expr += join("[", out_of_bounds ? 0 : index, "]");
8978 }
8979 }
8980 else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
8981 {
8982 auto &c = get<SPIRConstant>(index);
8983 bool out_of_bounds = (c.scalar() >= type->vecsize);
8984
8985 if (c.specialization)
8986 {
8987 // If the index is a spec constant, we cannot turn extract into a swizzle.
8988 expr += join("[", out_of_bounds ? "0" : to_expression(index), "]");
8989 }
8990 else
8991 {
8992 expr += ".";
8993 expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar());
8994 }
8995 }
8996 else
8997 {
8998 expr += "[";
8999 expr += to_unpacked_expression(index, register_expression_read);
9000 expr += "]";
9001 }
9002
9003 if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
9004 {
9005 prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
9006 is_packed);
9007 }
9008
9009 expr += deferred_index;
9010 row_major_matrix_needs_conversion = false;
9011
9012 is_packed = false;
9013 physical_type = 0;
9014 type_id = type->parent_type;
9015 type = &get<SPIRType>(type_id);
9016 }
9017 else if (!backend.allow_truncated_access_chain)
9018 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9019 }
9020
9021 if (pending_array_enclose)
9022 {
9023 SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
9024 "but the access chain was terminated in the middle of a multidimensional array. "
9025 "This is not supported.");
9026 }
9027
9028 if (meta)
9029 {
9030 meta->need_transpose = row_major_matrix_needs_conversion;
9031 meta->storage_is_packed = is_packed;
9032 meta->storage_is_invariant = is_invariant;
9033 meta->storage_physical_type = physical_type;
9034 }
9035
9036 return expr;
9037}
9038
9039void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
9040{
9041}
9042
9043string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
9044{
9045 auto ret = join(basename, "_", to_member_name(type, index));
9046 ParsedIR::sanitize_underscores(ret);
9047 return ret;
9048}
9049
9050string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
9051 AccessChainMeta *meta, bool ptr_chain)
9052{
9053 if (flattened_buffer_blocks.count(base))
9054 {
9055 uint32_t matrix_stride = 0;
9056 uint32_t array_stride = 0;
9057 bool need_transpose = false;
9058 flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
9059 &array_stride, ptr_chain);
9060
9061 if (meta)
9062 {
9063 meta->need_transpose = target_type.columns > 1 && need_transpose;
9064 meta->storage_is_packed = false;
9065 }
9066
9067 return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
9068 need_transpose);
9069 }
9070 else if (flattened_structs.count(base) && count > 0)
9071 {
9072 AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
9073 if (ptr_chain)
9074 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
9075
9076 if (flattened_structs[base])
9077 {
9078 flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
9079 if (meta)
9080 meta->flattened_struct = target_type.basetype == SPIRType::Struct;
9081 }
9082
9083 auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1);
9084 if (meta)
9085 {
9086 meta->need_transpose = false;
9087 meta->storage_is_packed = false;
9088 }
9089
9090 auto basename = to_flattened_access_chain_expression(base);
9091 auto ret = join(basename, "_", chain);
9092 ParsedIR::sanitize_underscores(ret);
9093 return ret;
9094 }
9095 else
9096 {
9097 AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
9098 if (ptr_chain)
9099 flags |= ACCESS_CHAIN_PTR_CHAIN_BIT;
9100 return access_chain_internal(base, indices, count, flags, meta);
9101 }
9102}
9103
9104string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
9105{
9106 auto expr = type_to_glsl_constructor(type);
9107 expr += '(';
9108
9109 for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
9110 {
9111 if (i)
9112 expr += ", ";
9113
9114 auto &member_type = get<SPIRType>(type.member_types[i]);
9115 if (member_type.basetype == SPIRType::Struct)
9116 expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
9117 else
9118 expr += to_flattened_struct_member(basename, type, i);
9119 }
9120 expr += ')';
9121 return expr;
9122}
9123
9124std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
9125{
9126 // Do not use to_expression as that will unflatten access chains.
9127 string basename;
9128 if (const auto *var = maybe_get<SPIRVariable>(id))
9129 basename = to_name(var->self);
9130 else if (const auto *expr = maybe_get<SPIRExpression>(id))
9131 basename = expr->expression;
9132 else
9133 basename = to_expression(id);
9134
9135 return basename;
9136}
9137
9138void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
9139 const SmallVector<uint32_t> &indices)
9140{
9141 SmallVector<uint32_t> sub_indices = indices;
9142 sub_indices.push_back(0);
9143
9144 auto *member_type = &type;
9145 for (auto &index : indices)
9146 member_type = &get<SPIRType>(member_type->member_types[index]);
9147
9148 for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++)
9149 {
9150 sub_indices.back() = i;
9151 auto lhs = join(basename, "_", to_member_name(*member_type, i));
9152 ParsedIR::sanitize_underscores(lhs);
9153
9154 if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct)
9155 {
9156 store_flattened_struct(lhs, rhs_id, type, sub_indices);
9157 }
9158 else
9159 {
9160 auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
9161 statement(lhs, " = ", rhs, ";");
9162 }
9163 }
9164}
9165
9166void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
9167{
9168 auto &type = expression_type(lhs_id);
9169 auto basename = to_flattened_access_chain_expression(lhs_id);
9170 store_flattened_struct(basename, value, type, {});
9171}
9172
9173std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
9174 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
9175 uint32_t /* array_stride */, bool need_transpose)
9176{
9177 if (!target_type.array.empty())
9178 SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
9179 else if (target_type.basetype == SPIRType::Struct)
9180 return flattened_access_chain_struct(base, indices, count, target_type, offset);
9181 else if (target_type.columns > 1)
9182 return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9183 else
9184 return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9185}
9186
9187std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
9188 const SPIRType &target_type, uint32_t offset)
9189{
9190 std::string expr;
9191
9192 expr += type_to_glsl_constructor(target_type);
9193 expr += "(";
9194
9195 for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
9196 {
9197 if (i != 0)
9198 expr += ", ";
9199
9200 const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
9201 uint32_t member_offset = type_struct_member_offset(target_type, i);
9202
9203 // The access chain terminates at the struct, so we need to find matrix strides and row-major information
9204 // ahead of time.
9205 bool need_transpose = false;
9206 uint32_t matrix_stride = 0;
9207 if (member_type.columns > 1)
9208 {
9209 need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
9210 matrix_stride = type_struct_member_matrix_stride(target_type, i);
9211 }
9212
9213 auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
9214 0 /* array_stride */, need_transpose);
9215
9216 // Cannot forward transpositions, so resolve them here.
9217 if (need_transpose)
9218 expr += convert_row_major_matrix(tmp, member_type, 0, false);
9219 else
9220 expr += tmp;
9221 }
9222
9223 expr += ")";
9224
9225 return expr;
9226}
9227
9228std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
9229 const SPIRType &target_type, uint32_t offset,
9230 uint32_t matrix_stride, bool need_transpose)
9231{
9232 assert(matrix_stride);
9233 SPIRType tmp_type = target_type;
9234 if (need_transpose)
9235 swap(tmp_type.vecsize, tmp_type.columns);
9236
9237 std::string expr;
9238
9239 expr += type_to_glsl_constructor(tmp_type);
9240 expr += "(";
9241
9242 for (uint32_t i = 0; i < tmp_type.columns; i++)
9243 {
9244 if (i != 0)
9245 expr += ", ";
9246
9247 expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
9248 /* need_transpose= */ false);
9249 }
9250
9251 expr += ")";
9252
9253 return expr;
9254}
9255
9256std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
9257 const SPIRType &target_type, uint32_t offset,
9258 uint32_t matrix_stride, bool need_transpose)
9259{
9260 auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
9261
9262 auto buffer_name = to_name(expression_type(base).self);
9263
9264 if (need_transpose)
9265 {
9266 std::string expr;
9267
9268 if (target_type.vecsize > 1)
9269 {
9270 expr += type_to_glsl_constructor(target_type);
9271 expr += "(";
9272 }
9273
9274 for (uint32_t i = 0; i < target_type.vecsize; ++i)
9275 {
9276 if (i != 0)
9277 expr += ", ";
9278
9279 uint32_t component_offset = result.second + i * matrix_stride;
9280
9281 assert(component_offset % (target_type.width / 8) == 0);
9282 uint32_t index = component_offset / (target_type.width / 8);
9283
9284 expr += buffer_name;
9285 expr += "[";
9286 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9287 expr += convert_to_string(index / 4);
9288 expr += "]";
9289
9290 expr += vector_swizzle(1, index % 4);
9291 }
9292
9293 if (target_type.vecsize > 1)
9294 {
9295 expr += ")";
9296 }
9297
9298 return expr;
9299 }
9300 else
9301 {
9302 assert(result.second % (target_type.width / 8) == 0);
9303 uint32_t index = result.second / (target_type.width / 8);
9304
9305 std::string expr;
9306
9307 expr += buffer_name;
9308 expr += "[";
9309 expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
9310 expr += convert_to_string(index / 4);
9311 expr += "]";
9312
9313 expr += vector_swizzle(target_type.vecsize, index % 4);
9314
9315 return expr;
9316 }
9317}
9318
9319std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
9320 const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
9321 bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
9322{
9323 // Start traversing type hierarchy at the proper non-pointer types.
9324 const auto *type = &get_pointee_type(basetype);
9325
9326 std::string expr;
9327
9328 // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
9329 bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
9330 uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
9331 uint32_t array_stride = out_array_stride ? *out_array_stride : 0;
9332
9333 for (uint32_t i = 0; i < count; i++)
9334 {
9335 uint32_t index = indices[i];
9336
9337 // Pointers
9338 if (ptr_chain && i == 0)
9339 {
9340 // Here, the pointer type will be decorated with an array stride.
9341 array_stride = get_decoration(basetype.self, DecorationArrayStride);
9342 if (!array_stride)
9343 SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
9344
9345 auto *constant = maybe_get<SPIRConstant>(index);
9346 if (constant)
9347 {
9348 // Constant array access.
9349 offset += constant->scalar() * array_stride;
9350 }
9351 else
9352 {
9353 // Dynamic array access.
9354 if (array_stride % word_stride)
9355 {
9356 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9357 "of a 4-component vector. "
9358 "Likely culprit here is a float or vec2 array inside a push "
9359 "constant block which is std430. "
9360 "This cannot be flattened. Try using std140 layout instead.");
9361 }
9362
9363 expr += to_enclosed_expression(index);
9364 expr += " * ";
9365 expr += convert_to_string(array_stride / word_stride);
9366 expr += " + ";
9367 }
9368 }
9369 // Arrays
9370 else if (!type->array.empty())
9371 {
9372 auto *constant = maybe_get<SPIRConstant>(index);
9373 if (constant)
9374 {
9375 // Constant array access.
9376 offset += constant->scalar() * array_stride;
9377 }
9378 else
9379 {
9380 // Dynamic array access.
9381 if (array_stride % word_stride)
9382 {
9383 SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9384 "of a 4-component vector. "
9385 "Likely culprit here is a float or vec2 array inside a push "
9386 "constant block which is std430. "
9387 "This cannot be flattened. Try using std140 layout instead.");
9388 }
9389
9390 expr += to_enclosed_expression(index, false);
9391 expr += " * ";
9392 expr += convert_to_string(array_stride / word_stride);
9393 expr += " + ";
9394 }
9395
9396 uint32_t parent_type = type->parent_type;
9397 type = &get<SPIRType>(parent_type);
9398
9399 if (!type->array.empty())
9400 array_stride = get_decoration(parent_type, DecorationArrayStride);
9401 }
9402 // For structs, the index refers to a constant, which indexes into the members.
9403 // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9404 else if (type->basetype == SPIRType::Struct)
9405 {
9406 index = evaluate_constant_u32(index);
9407
9408 if (index >= type->member_types.size())
9409 SPIRV_CROSS_THROW("Member index is out of bounds!");
9410
9411 offset += type_struct_member_offset(*type, index);
9412
9413 auto &struct_type = *type;
9414 type = &get<SPIRType>(type->member_types[index]);
9415
9416 if (type->columns > 1)
9417 {
9418 matrix_stride = type_struct_member_matrix_stride(struct_type, index);
9419 row_major_matrix_needs_conversion =
9420 combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
9421 }
9422 else
9423 row_major_matrix_needs_conversion = false;
9424
9425 if (!type->array.empty())
9426 array_stride = type_struct_member_array_stride(struct_type, index);
9427 }
9428 // Matrix -> Vector
9429 else if (type->columns > 1)
9430 {
9431 auto *constant = maybe_get<SPIRConstant>(index);
9432 if (constant)
9433 {
9434 index = evaluate_constant_u32(index);
9435 offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
9436 }
9437 else
9438 {
9439 uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
9440 // Dynamic array access.
9441 if (indexing_stride % word_stride)
9442 {
9443 SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9444 "4-component vector. "
9445 "Likely culprit here is a row-major matrix being accessed dynamically. "
9446 "This cannot be flattened. Try using std140 layout instead.");
9447 }
9448
9449 expr += to_enclosed_expression(index, false);
9450 expr += " * ";
9451 expr += convert_to_string(indexing_stride / word_stride);
9452 expr += " + ";
9453 }
9454
9455 type = &get<SPIRType>(type->parent_type);
9456 }
9457 // Vector -> Scalar
9458 else if (type->vecsize > 1)
9459 {
9460 auto *constant = maybe_get<SPIRConstant>(index);
9461 if (constant)
9462 {
9463 index = evaluate_constant_u32(index);
9464 offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
9465 }
9466 else
9467 {
9468 uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
9469
9470 // Dynamic array access.
9471 if (indexing_stride % word_stride)
9472 {
9473 SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9474 "size of a 4-component vector. "
9475 "This cannot be flattened in legacy targets.");
9476 }
9477
9478 expr += to_enclosed_expression(index, false);
9479 expr += " * ";
9480 expr += convert_to_string(indexing_stride / word_stride);
9481 expr += " + ";
9482 }
9483
9484 type = &get<SPIRType>(type->parent_type);
9485 }
9486 else
9487 SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9488 }
9489
9490 if (need_transpose)
9491 *need_transpose = row_major_matrix_needs_conversion;
9492 if (out_matrix_stride)
9493 *out_matrix_stride = matrix_stride;
9494 if (out_array_stride)
9495 *out_array_stride = array_stride;
9496
9497 return std::make_pair(expr, offset);
9498}
9499
9500bool CompilerGLSL::should_dereference(uint32_t id)
9501{
9502 const auto &type = expression_type(id);
9503 // Non-pointer expressions don't need to be dereferenced.
9504 if (!type.pointer)
9505 return false;
9506
9507 // Handles shouldn't be dereferenced either.
9508 if (!expression_is_lvalue(id))
9509 return false;
9510
9511 // If id is a variable but not a phi variable, we should not dereference it.
9512 if (auto *var = maybe_get<SPIRVariable>(id))
9513 return var->phi_variable;
9514
9515 // If id is an access chain, we should not dereference it.
9516 if (auto *expr = maybe_get<SPIRExpression>(id))
9517 return !expr->access_chain;
9518
9519 // Otherwise, we should dereference this pointer expression.
9520 return true;
9521}
9522
9523bool CompilerGLSL::should_forward(uint32_t id) const
9524{
9525 // If id is a variable we will try to forward it regardless of force_temporary check below
9526 // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9527
9528 auto *var = maybe_get<SPIRVariable>(id);
9529 if (var && var->forwardable)
9530 return true;
9531
9532 // For debugging emit temporary variables for all expressions
9533 if (options.force_temporary)
9534 return false;
9535
9536 // If an expression carries enough dependencies we need to stop forwarding at some point,
9537 // or we explode compilers. There are usually limits to how much we can nest expressions.
9538 auto *expr = maybe_get<SPIRExpression>(id);
9539 const uint32_t max_expression_dependencies = 64;
9540 if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
9541 return false;
9542
9543 // Immutable expression can always be forwarded.
9544 if (is_immutable(id))
9545 return true;
9546
9547 return false;
9548}
9549
9550bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9551{
9552 // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9553 return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id);
9554}
9555
9556void CompilerGLSL::track_expression_read(uint32_t id)
9557{
9558 switch (ir.ids[id].get_type())
9559 {
9560 case TypeExpression:
9561 {
9562 auto &e = get<SPIRExpression>(id);
9563 for (auto implied_read : e.implied_read_expressions)
9564 track_expression_read(implied_read);
9565 break;
9566 }
9567
9568 case TypeAccessChain:
9569 {
9570 auto &e = get<SPIRAccessChain>(id);
9571 for (auto implied_read : e.implied_read_expressions)
9572 track_expression_read(implied_read);
9573 break;
9574 }
9575
9576 default:
9577 break;
9578 }
9579
9580 // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9581 // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9582 if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9583 {
9584 auto &v = expression_usage_counts[id];
9585 v++;
9586
9587 // If we create an expression outside a loop,
9588 // but access it inside a loop, we're implicitly reading it multiple times.
9589 // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9590 // working inside the backend compiler.
9591 if (expression_read_implies_multiple_reads(id))
9592 v++;
9593
9594 if (v >= 2)
9595 {
9596 //if (v == 2)
9597 // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9598
9599 forced_temporaries.insert(id);
9600 // Force a recompile after this pass to avoid forwarding this variable.
9601 force_recompile();
9602 }
9603 }
9604}
9605
9606bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
9607{
9608 if (forced_temporaries.find(id) != end(forced_temporaries))
9609 return false;
9610
9611 for (uint32_t i = 0; i < num_args; i++)
9612 if (!should_forward(args[i]))
9613 return false;
9614
9615 // We need to forward globals as well.
9616 if (!pure)
9617 {
9618 for (auto global : global_variables)
9619 if (!should_forward(global))
9620 return false;
9621 for (auto aliased : aliased_variables)
9622 if (!should_forward(aliased))
9623 return false;
9624 }
9625
9626 return true;
9627}
9628
9629void CompilerGLSL::register_impure_function_call()
9630{
9631 // Impure functions can modify globals and aliased variables, so invalidate them as well.
9632 for (auto global : global_variables)
9633 flush_dependees(get<SPIRVariable>(global));
9634 for (auto aliased : aliased_variables)
9635 flush_dependees(get<SPIRVariable>(aliased));
9636}
9637
9638void CompilerGLSL::register_call_out_argument(uint32_t id)
9639{
9640 register_write(id);
9641
9642 auto *var = maybe_get<SPIRVariable>(id);
9643 if (var)
9644 flush_variable_declaration(var->self);
9645}
9646
9647string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9648{
9649 // These variables are always function local,
9650 // so make sure we emit the variable without storage qualifiers.
9651 // Some backends will inject custom variables locally in a function
9652 // with a storage qualifier which is not function-local.
9653 auto old_storage = var.storage;
9654 var.storage = StorageClassFunction;
9655 auto expr = variable_decl(var);
9656 var.storage = old_storage;
9657 return expr;
9658}
9659
9660void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9661{
9662 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9663 if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
9664 {
9665 auto &type = get<SPIRType>(var.basetype);
9666 auto &flags = get_decoration_bitset(var.self);
9667 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
9668 flushed_phi_variables.insert(var.self);
9669 }
9670}
9671
9672void CompilerGLSL::flush_variable_declaration(uint32_t id)
9673{
9674 // Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9675 auto *var = maybe_get<SPIRVariable>(id);
9676 if (var && var->deferred_declaration)
9677 {
9678 string initializer;
9679 if (options.force_zero_initialized_variables &&
9680 (var->storage == StorageClassFunction || var->storage == StorageClassGeneric ||
9681 var->storage == StorageClassPrivate) &&
9682 !var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
9683 {
9684 initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
9685 }
9686
9687 statement(variable_decl_function_local(*var), initializer, ";");
9688 var->deferred_declaration = false;
9689 }
9690 if (var)
9691 {
9692 emit_variable_temporary_copies(*var);
9693 }
9694}
9695
9696bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9697{
9698 auto pos = op.find_last_of('.');
9699 if (pos == string::npos || pos == 0)
9700 return false;
9701
9702 string final_swiz = op.substr(pos + 1, string::npos);
9703
9704 if (backend.swizzle_is_function)
9705 {
9706 if (final_swiz.size() < 2)
9707 return false;
9708
9709 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9710 final_swiz.erase(final_swiz.size() - 2, string::npos);
9711 else
9712 return false;
9713 }
9714
9715 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9716 // If so, and previous swizzle is of same length,
9717 // we can drop the final swizzle altogether.
9718 for (uint32_t i = 0; i < final_swiz.size(); i++)
9719 {
9720 static const char expected[] = { 'x', 'y', 'z', 'w' };
9721 if (i >= 4 || final_swiz[i] != expected[i])
9722 return false;
9723 }
9724
9725 auto prevpos = op.find_last_of('.', pos - 1);
9726 if (prevpos == string::npos)
9727 return false;
9728
9729 prevpos++;
9730
9731 // Make sure there are only swizzles here ...
9732 for (auto i = prevpos; i < pos; i++)
9733 {
9734 if (op[i] < 'w' || op[i] > 'z')
9735 {
9736 // If swizzles are foo.xyz() like in C++ backend for example, check for that.
9737 if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
9738 break;
9739 return false;
9740 }
9741 }
9742
9743 // If original swizzle is large enough, just carve out the components we need.
9744 // E.g. foobar.wyx.xy will turn into foobar.wy.
9745 if (pos - prevpos >= final_swiz.size())
9746 {
9747 op.erase(prevpos + final_swiz.size(), string::npos);
9748
9749 // Add back the function call ...
9750 if (backend.swizzle_is_function)
9751 op += "()";
9752 }
9753 return true;
9754}
9755
9756// Optimizes away vector swizzles where we have something like
9757// vec3 foo;
9758// foo.xyz <-- swizzle expression does nothing.
9759// This is a very common pattern after OpCompositeCombine.
9760bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
9761{
9762 auto pos = op.find_last_of('.');
9763 if (pos == string::npos || pos == 0)
9764 return false;
9765
9766 string final_swiz = op.substr(pos + 1, string::npos);
9767
9768 if (backend.swizzle_is_function)
9769 {
9770 if (final_swiz.size() < 2)
9771 return false;
9772
9773 if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
9774 final_swiz.erase(final_swiz.size() - 2, string::npos);
9775 else
9776 return false;
9777 }
9778
9779 // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9780 // If so, and previous swizzle is of same length,
9781 // we can drop the final swizzle altogether.
9782 for (uint32_t i = 0; i < final_swiz.size(); i++)
9783 {
9784 static const char expected[] = { 'x', 'y', 'z', 'w' };
9785 if (i >= 4 || final_swiz[i] != expected[i])
9786 return false;
9787 }
9788
9789 auto &type = expression_type(base);
9790
9791 // Sanity checking ...
9792 assert(type.columns == 1 && type.array.empty());
9793
9794 if (type.vecsize == final_swiz.size())
9795 op.erase(pos, string::npos);
9796 return true;
9797}
9798
9799string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
9800{
9801 ID base = 0;
9802 string op;
9803 string subop;
9804
9805 // Can only merge swizzles for vectors.
9806 auto &type = get<SPIRType>(return_type);
9807 bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
9808 bool swizzle_optimization = false;
9809
9810 for (uint32_t i = 0; i < length; i++)
9811 {
9812 auto *e = maybe_get<SPIRExpression>(elems[i]);
9813
9814 // If we're merging another scalar which belongs to the same base
9815 // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
9816 if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
9817 {
9818 // Only supposed to be used for vector swizzle -> scalar.
9819 assert(!e->expression.empty() && e->expression.front() == '.');
9820 subop += e->expression.substr(1, string::npos);
9821 swizzle_optimization = true;
9822 }
9823 else
9824 {
9825 // We'll likely end up with duplicated swizzles, e.g.
9826 // foobar.xyz.xyz from patterns like
9827 // OpVectorShuffle
9828 // OpCompositeExtract x 3
9829 // OpCompositeConstruct 3x + other scalar.
9830 // Just modify op in-place.
9831 if (swizzle_optimization)
9832 {
9833 if (backend.swizzle_is_function)
9834 subop += "()";
9835
9836 // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
9837 // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
9838 // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
9839 // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
9840 // Case 1:
9841 // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
9842 // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
9843 // Case 2:
9844 // foo.xyz: Duplicate swizzle won't kick in.
9845 // If foo is vec3, we can remove xyz, giving just foo.
9846 if (!remove_duplicate_swizzle(subop))
9847 remove_unity_swizzle(base, subop);
9848
9849 // Strips away redundant parens if we created them during component extraction.
9850 strip_enclosed_expression(subop);
9851 swizzle_optimization = false;
9852 op += subop;
9853 }
9854 else
9855 op += subop;
9856
9857 if (i)
9858 op += ", ";
9859
9860 bool uses_buffer_offset =
9861 type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
9862 subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
9863 }
9864
9865 base = e ? e->base_expression : ID(0);
9866 }
9867
9868 if (swizzle_optimization)
9869 {
9870 if (backend.swizzle_is_function)
9871 subop += "()";
9872
9873 if (!remove_duplicate_swizzle(subop))
9874 remove_unity_swizzle(base, subop);
9875 // Strips away redundant parens if we created them during component extraction.
9876 strip_enclosed_expression(subop);
9877 }
9878
9879 op += subop;
9880 return op;
9881}
9882
9883bool CompilerGLSL::skip_argument(uint32_t id) const
9884{
9885 if (!combined_image_samplers.empty() || !options.vulkan_semantics)
9886 {
9887 auto &type = expression_type(id);
9888 if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
9889 return true;
9890 }
9891 return false;
9892}
9893
9894bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
9895{
9896 // Do this with strings because we have a very clear pattern we can check for and it avoids
9897 // adding lots of special cases to the code emission.
9898 if (rhs.size() < lhs.size() + 3)
9899 return false;
9900
9901 // Do not optimize matrices. They are a bit awkward to reason about in general
9902 // (in which order does operation happen?), and it does not work on MSL anyways.
9903 if (type.vecsize > 1 && type.columns > 1)
9904 return false;
9905
9906 auto index = rhs.find(lhs);
9907 if (index != 0)
9908 return false;
9909
9910 // TODO: Shift operators, but it's not important for now.
9911 auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
9912 if (op != lhs.size() + 1)
9913 return false;
9914
9915 // Check that the op is followed by space. This excludes && and ||.
9916 if (rhs[op + 1] != ' ')
9917 return false;
9918
9919 char bop = rhs[op];
9920 auto expr = rhs.substr(lhs.size() + 3);
9921 // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
9922 // Find some common patterns which are equivalent.
9923 if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
9924 statement(lhs, bop, bop, ";");
9925 else
9926 statement(lhs, " ", bop, "= ", expr, ";");
9927 return true;
9928}
9929
9930void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
9931{
9932 if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
9933 return;
9934
9935 assert(current_emitting_block);
9936 current_emitting_block->invalidate_expressions.push_back(expr);
9937}
9938
9939void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
9940{
9941 current_emitting_block = &block;
9942 for (auto &op : block.ops)
9943 emit_instruction(op);
9944 current_emitting_block = nullptr;
9945}
9946
9947void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
9948{
9949 // Allow trivially forwarded expressions like OpLoad or trivial shuffles,
9950 // these will be marked as having suppressed usage tracking.
9951 // Our only concern is to make sure arithmetic operations are done in similar ways.
9952 if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
9953 forced_invariant_temporaries.count(expr.self) == 0)
9954 {
9955 forced_temporaries.insert(expr.self);
9956 forced_invariant_temporaries.insert(expr.self);
9957 force_recompile();
9958
9959 for (auto &dependent : expr.expression_dependencies)
9960 disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
9961 }
9962}
9963
9964void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
9965{
9966 // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
9967 // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
9968 // in one translation unit, but not another, e.g. due to multiple use of an expression.
9969 // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
9970 // expressions to be temporaries.
9971 // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
9972 // for all reasonable uses of invariant.
9973 if (!has_decoration(store_id, DecorationInvariant))
9974 return;
9975
9976 auto *expr = maybe_get<SPIRExpression>(value_id);
9977 if (!expr)
9978 return;
9979
9980 disallow_forwarding_in_expression_chain(*expr);
9981}
9982
9983void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
9984{
9985 auto rhs = to_pointer_expression(rhs_expression);
9986
9987 // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
9988 if (!rhs.empty())
9989 {
9990 handle_store_to_invariant_variable(lhs_expression, rhs_expression);
9991
9992 if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
9993 {
9994 auto lhs = to_dereferenced_expression(lhs_expression);
9995 if (has_decoration(lhs_expression, DecorationNonUniform))
9996 convert_non_uniform_expression(lhs, lhs_expression);
9997
9998 // We might need to cast in order to store to a builtin.
9999 cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression));
10000
10001 // Tries to optimize assignments like "<lhs> = <lhs> op expr".
10002 // While this is purely cosmetic, this is important for legacy ESSL where loop
10003 // variable increments must be in either i++ or i += const-expr.
10004 // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
10005 if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
10006 statement(lhs, " = ", rhs, ";");
10007 }
10008 register_write(lhs_expression);
10009 }
10010}
10011
10012uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
10013{
10014 if (instr.length < 3)
10015 return 32;
10016
10017 auto *ops = stream(instr);
10018
10019 switch (instr.op)
10020 {
10021 case OpSConvert:
10022 case OpConvertSToF:
10023 case OpUConvert:
10024 case OpConvertUToF:
10025 case OpIEqual:
10026 case OpINotEqual:
10027 case OpSLessThan:
10028 case OpSLessThanEqual:
10029 case OpSGreaterThan:
10030 case OpSGreaterThanEqual:
10031 case OpULessThan:
10032 case OpULessThanEqual:
10033 case OpUGreaterThan:
10034 case OpUGreaterThanEqual:
10035 return expression_type(ops[2]).width;
10036
10037 default:
10038 {
10039 // We can look at result type which is more robust.
10040 auto *type = maybe_get<SPIRType>(ops[0]);
10041 if (type && type_is_integral(*type))
10042 return type->width;
10043 else
10044 return 32;
10045 }
10046 }
10047}
10048
10049uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const
10050{
10051 if (length < 1)
10052 return 32;
10053
10054 switch (op)
10055 {
10056 case GLSLstd450SAbs:
10057 case GLSLstd450SSign:
10058 case GLSLstd450UMin:
10059 case GLSLstd450SMin:
10060 case GLSLstd450UMax:
10061 case GLSLstd450SMax:
10062 case GLSLstd450UClamp:
10063 case GLSLstd450SClamp:
10064 case GLSLstd450FindSMsb:
10065 case GLSLstd450FindUMsb:
10066 return expression_type(ops[0]).width;
10067
10068 default:
10069 {
10070 // We don't need to care about other opcodes, just return 32.
10071 return 32;
10072 }
10073 }
10074}
10075
10076void CompilerGLSL::emit_instruction(const Instruction &instruction)
10077{
10078 auto ops = stream(instruction);
10079 auto opcode = static_cast<Op>(instruction.op);
10080 uint32_t length = instruction.length;
10081
10082#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
10083#define GLSL_BOP_CAST(op, type) \
10084 emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
10085#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
10086#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
10087#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
10088#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
10089#define GLSL_BFOP_CAST(op, type) \
10090 emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
10091#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
10092#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
10093
10094 // If we need to do implicit bitcasts, make sure we do it with the correct type.
10095 uint32_t integer_width = get_integer_width_for_instruction(instruction);
10096 auto int_type = to_signed_basetype(integer_width);
10097 auto uint_type = to_unsigned_basetype(integer_width);
10098
10099 switch (opcode)
10100 {
10101 // Dealing with memory
10102 case OpLoad:
10103 {
10104 uint32_t result_type = ops[0];
10105 uint32_t id = ops[1];
10106 uint32_t ptr = ops[2];
10107
10108 flush_variable_declaration(ptr);
10109
10110 // If we're loading from memory that cannot be changed by the shader,
10111 // just forward the expression directly to avoid needless temporaries.
10112 // If an expression is mutable and forwardable, we speculate that it is immutable.
10113 bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
10114
10115 // If loading a non-native row-major matrix, mark the expression as need_transpose.
10116 bool need_transpose = false;
10117 bool old_need_transpose = false;
10118
10119 auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
10120
10121 if (forward)
10122 {
10123 // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
10124 // taking the expression.
10125 if (ptr_expression && ptr_expression->need_transpose)
10126 {
10127 old_need_transpose = true;
10128 ptr_expression->need_transpose = false;
10129 need_transpose = true;
10130 }
10131 else if (is_non_native_row_major_matrix(ptr))
10132 need_transpose = true;
10133 }
10134
10135 // If we are forwarding this load,
10136 // don't register the read to access chain here, defer that to when we actually use the expression,
10137 // using the add_implied_read_expression mechanism.
10138 string expr;
10139
10140 bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
10141 bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
10142 if (forward || (!is_packed && !is_remapped))
10143 {
10144 // For the simple case, we do not need to deal with repacking.
10145 expr = to_dereferenced_expression(ptr, false);
10146 }
10147 else
10148 {
10149 // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
10150 // storing the expression to a temporary.
10151 expr = to_unpacked_expression(ptr);
10152 }
10153
10154 auto &type = get<SPIRType>(result_type);
10155 auto &expr_type = expression_type(ptr);
10156
10157 // If the expression has more vector components than the result type, insert
10158 // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
10159 // happen with e.g. the MSL backend replacing the type of an input variable.
10160 if (expr_type.vecsize > type.vecsize)
10161 expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0));
10162
10163 // We might need to cast in order to load from a builtin.
10164 cast_from_variable_load(ptr, expr, type);
10165
10166 // We might be trying to load a gl_Position[N], where we should be
10167 // doing float4[](gl_in[i].gl_Position, ...) instead.
10168 // Similar workarounds are required for input arrays in tessellation.
10169 // Also, loading from gl_SampleMask array needs special unroll.
10170 unroll_array_from_complex_load(id, ptr, expr);
10171
10172 if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
10173 {
10174 // If we're loading something non-opaque, we need to handle non-uniform descriptor access.
10175 convert_non_uniform_expression(expr, ptr);
10176 }
10177
10178 if (forward && ptr_expression)
10179 ptr_expression->need_transpose = old_need_transpose;
10180
10181 bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0;
10182
10183 if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
10184 rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
10185
10186 // By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
10187 // However, if we try to load a complex, composite object from a flattened buffer,
10188 // we should avoid emitting the same code over and over and lower the result to a temporary.
10189 bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1));
10190
10191 SPIRExpression *e = nullptr;
10192 if (!forward && expression_is_non_value_type_array(ptr))
10193 {
10194 // Complicated load case where we need to make a copy of ptr, but we cannot, because
10195 // it is an array, and our backend does not support arrays as value types.
10196 // Emit the temporary, and copy it explicitly.
10197 e = &emit_uninitialized_temporary_expression(result_type, id);
10198 emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
10199 }
10200 else
10201 e = &emit_op(result_type, id, expr, forward, !usage_tracking);
10202
10203 e->need_transpose = need_transpose;
10204 register_read(id, ptr, forward);
10205
10206 if (forward)
10207 {
10208 // Pass through whether the result is of a packed type and the physical type ID.
10209 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
10210 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10211 if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
10212 {
10213 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
10214 get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
10215 }
10216 }
10217 else
10218 {
10219 // This might have been set on an earlier compilation iteration, force it to be unset.
10220 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10221 unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
10222 }
10223
10224 inherit_expression_dependencies(id, ptr);
10225 if (forward)
10226 add_implied_read_expression(*e, ptr);
10227 break;
10228 }
10229
10230 case OpInBoundsAccessChain:
10231 case OpAccessChain:
10232 case OpPtrAccessChain:
10233 {
10234 auto *var = maybe_get<SPIRVariable>(ops[2]);
10235 if (var)
10236 flush_variable_declaration(var->self);
10237
10238 // If the base is immutable, the access chain pointer must also be.
10239 // If an expression is mutable and forwardable, we speculate that it is immutable.
10240 AccessChainMeta meta;
10241 bool ptr_chain = opcode == OpPtrAccessChain;
10242 auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &meta, ptr_chain);
10243
10244 auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
10245
10246 auto *backing_variable = maybe_get_backing_variable(ops[2]);
10247 expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]);
10248 expr.need_transpose = meta.need_transpose;
10249 expr.access_chain = true;
10250
10251 // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
10252 if (meta.storage_is_packed)
10253 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked);
10254 if (meta.storage_physical_type != 0)
10255 set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10256 if (meta.storage_is_invariant)
10257 set_decoration(ops[1], DecorationInvariant);
10258 if (meta.flattened_struct)
10259 flattened_structs[ops[1]] = true;
10260
10261 // If we have some expression dependencies in our access chain, this access chain is technically a forwarded
10262 // temporary which could be subject to invalidation.
10263 // Need to assume we're forwarded while calling inherit_expression_depdendencies.
10264 forwarded_temporaries.insert(ops[1]);
10265 // The access chain itself is never forced to a temporary, but its dependencies might.
10266 suppressed_usage_tracking.insert(ops[1]);
10267
10268 for (uint32_t i = 2; i < length; i++)
10269 {
10270 inherit_expression_dependencies(ops[1], ops[i]);
10271 add_implied_read_expression(expr, ops[i]);
10272 }
10273
10274 // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
10275 // we're not forwarded after all.
10276 if (expr.expression_dependencies.empty())
10277 forwarded_temporaries.erase(ops[1]);
10278
10279 break;
10280 }
10281
10282 case OpStore:
10283 {
10284 auto *var = maybe_get<SPIRVariable>(ops[0]);
10285
10286 if (var && var->statically_assigned)
10287 var->static_expression = ops[1];
10288 else if (var && var->loop_variable && !var->loop_variable_enable)
10289 var->static_expression = ops[1];
10290 else if (var && var->remapped_variable && var->static_expression)
10291 {
10292 // Skip the write.
10293 }
10294 else if (flattened_structs.count(ops[0]))
10295 {
10296 store_flattened_struct(ops[0], ops[1]);
10297 register_write(ops[0]);
10298 }
10299 else
10300 {
10301 emit_store_statement(ops[0], ops[1]);
10302 }
10303
10304 // Storing a pointer results in a variable pointer, so we must conservatively assume
10305 // we can write through it.
10306 if (expression_type(ops[1]).pointer)
10307 register_write(ops[1]);
10308 break;
10309 }
10310
10311 case OpArrayLength:
10312 {
10313 uint32_t result_type = ops[0];
10314 uint32_t id = ops[1];
10315 auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10316 if (has_decoration(ops[2], DecorationNonUniform))
10317 convert_non_uniform_expression(e, ops[2]);
10318 set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
10319 true);
10320 break;
10321 }
10322
10323 // Function calls
10324 case OpFunctionCall:
10325 {
10326 uint32_t result_type = ops[0];
10327 uint32_t id = ops[1];
10328 uint32_t func = ops[2];
10329 const auto *arg = &ops[3];
10330 length -= 3;
10331
10332 auto &callee = get<SPIRFunction>(func);
10333 auto &return_type = get<SPIRType>(callee.return_type);
10334 bool pure = function_is_pure(callee);
10335
10336 bool callee_has_out_variables = false;
10337 bool emit_return_value_as_argument = false;
10338
10339 // Invalidate out variables passed to functions since they can be OpStore'd to.
10340 for (uint32_t i = 0; i < length; i++)
10341 {
10342 if (callee.arguments[i].write_count)
10343 {
10344 register_call_out_argument(arg[i]);
10345 callee_has_out_variables = true;
10346 }
10347
10348 flush_variable_declaration(arg[i]);
10349 }
10350
10351 if (!return_type.array.empty() && !backend.can_return_array)
10352 {
10353 callee_has_out_variables = true;
10354 emit_return_value_as_argument = true;
10355 }
10356
10357 if (!pure)
10358 register_impure_function_call();
10359
10360 string funexpr;
10361 SmallVector<string> arglist;
10362 funexpr += to_name(func) + "(";
10363
10364 if (emit_return_value_as_argument)
10365 {
10366 statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
10367 arglist.push_back(to_name(id));
10368 }
10369
10370 for (uint32_t i = 0; i < length; i++)
10371 {
10372 // Do not pass in separate images or samplers if we're remapping
10373 // to combined image samplers.
10374 if (skip_argument(arg[i]))
10375 continue;
10376
10377 arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i]));
10378 }
10379
10380 for (auto &combined : callee.combined_parameters)
10381 {
10382 auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]);
10383 auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]);
10384 arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
10385 }
10386
10387 append_global_func_args(callee, length, arglist);
10388
10389 funexpr += merge(arglist);
10390 funexpr += ")";
10391
10392 // Check for function call constraints.
10393 check_function_call_constraints(arg, length);
10394
10395 if (return_type.basetype != SPIRType::Void)
10396 {
10397 // If the function actually writes to an out variable,
10398 // take the conservative route and do not forward.
10399 // The problem is that we might not read the function
10400 // result (and emit the function) before an out variable
10401 // is read (common case when return value is ignored!
10402 // In order to avoid start tracking invalid variables,
10403 // just avoid the forwarding problem altogether.
10404 bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
10405 (forced_temporaries.find(id) == end(forced_temporaries));
10406
10407 if (emit_return_value_as_argument)
10408 {
10409 statement(funexpr, ";");
10410 set<SPIRExpression>(id, to_name(id), result_type, true);
10411 }
10412 else
10413 emit_op(result_type, id, funexpr, forward);
10414
10415 // Function calls are implicit loads from all variables in question.
10416 // Set dependencies for them.
10417 for (uint32_t i = 0; i < length; i++)
10418 register_read(id, arg[i], forward);
10419
10420 // If we're going to forward the temporary result,
10421 // put dependencies on every variable that must not change.
10422 if (forward)
10423 register_global_read_dependencies(callee, id);
10424 }
10425 else
10426 statement(funexpr, ";");
10427
10428 break;
10429 }
10430
10431 // Composite munging
10432 case OpCompositeConstruct:
10433 {
10434 uint32_t result_type = ops[0];
10435 uint32_t id = ops[1];
10436 const auto *const elems = &ops[2];
10437 length -= 2;
10438
10439 bool forward = true;
10440 for (uint32_t i = 0; i < length; i++)
10441 forward = forward && should_forward(elems[i]);
10442
10443 auto &out_type = get<SPIRType>(result_type);
10444 auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
10445
10446 // Only splat if we have vector constructors.
10447 // Arrays and structs must be initialized properly in full.
10448 bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
10449
10450 bool splat = false;
10451 bool swizzle_splat = false;
10452
10453 if (in_type)
10454 {
10455 splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
10456 swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
10457
10458 if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
10459 {
10460 // Cannot swizzle literal integers as a special case.
10461 swizzle_splat = false;
10462 }
10463 }
10464
10465 if (splat || swizzle_splat)
10466 {
10467 uint32_t input = elems[0];
10468 for (uint32_t i = 0; i < length; i++)
10469 {
10470 if (input != elems[i])
10471 {
10472 splat = false;
10473 swizzle_splat = false;
10474 }
10475 }
10476 }
10477
10478 if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
10479 forward = false;
10480 if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
10481 forward = false;
10482 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10483 forward = false;
10484
10485 string constructor_op;
10486 if (backend.use_initializer_list && composite)
10487 {
10488 bool needs_trailing_tracket = false;
10489 // Only use this path if we are building composites.
10490 // This path cannot be used for arithmetic.
10491 if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
10492 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
10493 else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
10494 {
10495 // MSL path. Array constructor is baked into type here, do not use _constructor variant.
10496 constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10497 needs_trailing_tracket = true;
10498 }
10499 constructor_op += "{ ";
10500
10501 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10502 constructor_op += "0";
10503 else if (splat)
10504 constructor_op += to_unpacked_expression(elems[0]);
10505 else
10506 constructor_op += build_composite_combiner(result_type, elems, length);
10507 constructor_op += " }";
10508 if (needs_trailing_tracket)
10509 constructor_op += ")";
10510 }
10511 else if (swizzle_splat && !composite)
10512 {
10513 constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0]));
10514 }
10515 else
10516 {
10517 constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10518 if (type_is_empty(out_type) && !backend.supports_empty_struct)
10519 constructor_op += "0";
10520 else if (splat)
10521 constructor_op += to_unpacked_expression(elems[0]);
10522 else
10523 constructor_op += build_composite_combiner(result_type, elems, length);
10524 constructor_op += ")";
10525 }
10526
10527 if (!constructor_op.empty())
10528 {
10529 emit_op(result_type, id, constructor_op, forward);
10530 for (uint32_t i = 0; i < length; i++)
10531 inherit_expression_dependencies(id, elems[i]);
10532 }
10533 break;
10534 }
10535
10536 case OpVectorInsertDynamic:
10537 {
10538 uint32_t result_type = ops[0];
10539 uint32_t id = ops[1];
10540 uint32_t vec = ops[2];
10541 uint32_t comp = ops[3];
10542 uint32_t index = ops[4];
10543
10544 flush_variable_declaration(vec);
10545
10546 // Make a copy, then use access chain to store the variable.
10547 statement(declare_temporary(result_type, id), to_expression(vec), ";");
10548 set<SPIRExpression>(id, to_name(id), result_type, true);
10549 auto chain = access_chain_internal(id, &index, 1, 0, nullptr);
10550 statement(chain, " = ", to_unpacked_expression(comp), ";");
10551 break;
10552 }
10553
10554 case OpVectorExtractDynamic:
10555 {
10556 uint32_t result_type = ops[0];
10557 uint32_t id = ops[1];
10558
10559 auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr);
10560 emit_op(result_type, id, expr, should_forward(ops[2]));
10561 inherit_expression_dependencies(id, ops[2]);
10562 inherit_expression_dependencies(id, ops[3]);
10563 break;
10564 }
10565
10566 case OpCompositeExtract:
10567 {
10568 uint32_t result_type = ops[0];
10569 uint32_t id = ops[1];
10570 length -= 3;
10571
10572 auto &type = get<SPIRType>(result_type);
10573
10574 // We can only split the expression here if our expression is forwarded as a temporary.
10575 bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
10576
10577 // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
10578 auto &composite_type = expression_type(ops[2]);
10579 bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty();
10580 if (composite_type_is_complex)
10581 allow_base_expression = false;
10582
10583 // Packed expressions or physical ID mapped expressions cannot be split up.
10584 if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
10585 has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
10586 allow_base_expression = false;
10587
10588 // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
10589 // into the base expression.
10590 if (is_non_native_row_major_matrix(ops[2]))
10591 allow_base_expression = false;
10592
10593 AccessChainMeta meta;
10594 SPIRExpression *e = nullptr;
10595 auto *c = maybe_get<SPIRConstant>(ops[2]);
10596
10597 if (c && !c->specialization && !composite_type_is_complex)
10598 {
10599 auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length);
10600 e = &emit_op(result_type, id, expr, true, true);
10601 }
10602 else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
10603 {
10604 // Only apply this optimization if result is scalar.
10605
10606 // We want to split the access chain from the base.
10607 // This is so we can later combine different CompositeExtract results
10608 // with CompositeConstruct without emitting code like
10609 //
10610 // vec3 temp = texture(...).xyz
10611 // vec4(temp.x, temp.y, temp.z, 1.0).
10612 //
10613 // when we actually wanted to emit this
10614 // vec4(texture(...).xyz, 1.0).
10615 //
10616 // Including the base will prevent this and would trigger multiple reads
10617 // from expression causing it to be forced to an actual temporary in GLSL.
10618 auto expr = access_chain_internal(ops[2], &ops[3], length,
10619 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT |
10620 ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10621 e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2]));
10622 inherit_expression_dependencies(id, ops[2]);
10623 e->base_expression = ops[2];
10624 }
10625 else
10626 {
10627 auto expr = access_chain_internal(ops[2], &ops[3], length,
10628 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10629 e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2]));
10630 inherit_expression_dependencies(id, ops[2]);
10631 }
10632
10633 // Pass through some meta information to the loaded expression.
10634 // We can still end up loading a buffer type to a variable, then CompositeExtract from it
10635 // instead of loading everything through an access chain.
10636 e->need_transpose = meta.need_transpose;
10637 if (meta.storage_is_packed)
10638 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10639 if (meta.storage_physical_type != 0)
10640 set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10641 if (meta.storage_is_invariant)
10642 set_decoration(id, DecorationInvariant);
10643
10644 break;
10645 }
10646
10647 case OpCompositeInsert:
10648 {
10649 uint32_t result_type = ops[0];
10650 uint32_t id = ops[1];
10651 uint32_t obj = ops[2];
10652 uint32_t composite = ops[3];
10653 const auto *elems = &ops[4];
10654 length -= 4;
10655
10656 flush_variable_declaration(composite);
10657
10658 // Make a copy, then use access chain to store the variable.
10659 statement(declare_temporary(result_type, id), to_expression(composite), ";");
10660 set<SPIRExpression>(id, to_name(id), result_type, true);
10661 auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10662 statement(chain, " = ", to_unpacked_expression(obj), ";");
10663
10664 break;
10665 }
10666
10667 case OpCopyMemory:
10668 {
10669 uint32_t lhs = ops[0];
10670 uint32_t rhs = ops[1];
10671 if (lhs != rhs)
10672 {
10673 uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
10674 if (!tmp_id)
10675 tmp_id = ir.increase_bound_by(1);
10676 uint32_t tmp_type_id = expression_type(rhs).parent_type;
10677
10678 EmbeddedInstruction fake_load, fake_store;
10679 fake_load.op = OpLoad;
10680 fake_load.length = 3;
10681 fake_load.ops.push_back(tmp_type_id);
10682 fake_load.ops.push_back(tmp_id);
10683 fake_load.ops.push_back(rhs);
10684
10685 fake_store.op = OpStore;
10686 fake_store.length = 2;
10687 fake_store.ops.push_back(lhs);
10688 fake_store.ops.push_back(tmp_id);
10689
10690 // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible.
10691 // Synthesize a fake Load and Store pair for CopyMemory.
10692 emit_instruction(fake_load);
10693 emit_instruction(fake_store);
10694 }
10695 break;
10696 }
10697
10698 case OpCopyLogical:
10699 {
10700 // This is used for copying object of different types, arrays and structs.
10701 // We need to unroll the copy, element-by-element.
10702 uint32_t result_type = ops[0];
10703 uint32_t id = ops[1];
10704 uint32_t rhs = ops[2];
10705
10706 emit_uninitialized_temporary_expression(result_type, id);
10707 emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
10708 break;
10709 }
10710
10711 case OpCopyObject:
10712 {
10713 uint32_t result_type = ops[0];
10714 uint32_t id = ops[1];
10715 uint32_t rhs = ops[2];
10716 bool pointer = get<SPIRType>(result_type).pointer;
10717
10718 auto *chain = maybe_get<SPIRAccessChain>(rhs);
10719 auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
10720 if (chain)
10721 {
10722 // Cannot lower to a SPIRExpression, just copy the object.
10723 auto &e = set<SPIRAccessChain>(id, *chain);
10724 e.self = id;
10725 }
10726 else if (imgsamp)
10727 {
10728 // Cannot lower to a SPIRExpression, just copy the object.
10729 // GLSL does not currently use this type and will never get here, but MSL does.
10730 // Handled here instead of CompilerMSL for better integration and general handling,
10731 // and in case GLSL or other subclasses require it in the future.
10732 auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
10733 e.self = id;
10734 }
10735 else if (expression_is_lvalue(rhs) && !pointer)
10736 {
10737 // Need a copy.
10738 // For pointer types, we copy the pointer itself.
10739 statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
10740 set<SPIRExpression>(id, to_name(id), result_type, true);
10741 }
10742 else
10743 {
10744 // RHS expression is immutable, so just forward it.
10745 // Copying these things really make no sense, but
10746 // seems to be allowed anyways.
10747 auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
10748 if (pointer)
10749 {
10750 auto *var = maybe_get_backing_variable(rhs);
10751 e.loaded_from = var ? var->self : ID(0);
10752 }
10753
10754 // If we're copying an access chain, need to inherit the read expressions.
10755 auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
10756 if (rhs_expr)
10757 {
10758 e.implied_read_expressions = rhs_expr->implied_read_expressions;
10759 e.expression_dependencies = rhs_expr->expression_dependencies;
10760 }
10761 }
10762 break;
10763 }
10764
10765 case OpVectorShuffle:
10766 {
10767 uint32_t result_type = ops[0];
10768 uint32_t id = ops[1];
10769 uint32_t vec0 = ops[2];
10770 uint32_t vec1 = ops[3];
10771 const auto *elems = &ops[4];
10772 length -= 4;
10773
10774 auto &type0 = expression_type(vec0);
10775
10776 // If we have the undefined swizzle index -1, we need to swizzle in undefined data,
10777 // or in our case, T(0).
10778 bool shuffle = false;
10779 for (uint32_t i = 0; i < length; i++)
10780 if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu)
10781 shuffle = true;
10782
10783 // Cannot use swizzles with packed expressions, force shuffle path.
10784 if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
10785 shuffle = true;
10786
10787 string expr;
10788 bool should_fwd, trivial_forward;
10789
10790 if (shuffle)
10791 {
10792 should_fwd = should_forward(vec0) && should_forward(vec1);
10793 trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
10794
10795 // Constructor style and shuffling from two different vectors.
10796 SmallVector<string> args;
10797 for (uint32_t i = 0; i < length; i++)
10798 {
10799 if (elems[i] == 0xffffffffu)
10800 {
10801 // Use a constant 0 here.
10802 // We could use the first component or similar, but then we risk propagating
10803 // a value we might not need, and bog down codegen.
10804 SPIRConstant c;
10805 c.constant_type = type0.parent_type;
10806 assert(type0.parent_type != ID(0));
10807 args.push_back(constant_expression(c));
10808 }
10809 else if (elems[i] >= type0.vecsize)
10810 args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
10811 else
10812 args.push_back(to_extract_component_expression(vec0, elems[i]));
10813 }
10814 expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
10815 }
10816 else
10817 {
10818 should_fwd = should_forward(vec0);
10819 trivial_forward = should_suppress_usage_tracking(vec0);
10820
10821 // We only source from first vector, so can use swizzle.
10822 // If the vector is packed, unpack it before applying a swizzle (needed for MSL)
10823 expr += to_enclosed_unpacked_expression(vec0);
10824 expr += ".";
10825 for (uint32_t i = 0; i < length; i++)
10826 {
10827 assert(elems[i] != 0xffffffffu);
10828 expr += index_to_swizzle(elems[i]);
10829 }
10830
10831 if (backend.swizzle_is_function && length > 1)
10832 expr += "()";
10833 }
10834
10835 // A shuffle is trivial in that it doesn't actually *do* anything.
10836 // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
10837
10838 emit_op(result_type, id, expr, should_fwd, trivial_forward);
10839
10840 inherit_expression_dependencies(id, vec0);
10841 if (vec0 != vec1)
10842 inherit_expression_dependencies(id, vec1);
10843 break;
10844 }
10845
10846 // ALU
10847 case OpIsNan:
10848 GLSL_UFOP(isnan);
10849 break;
10850
10851 case OpIsInf:
10852 GLSL_UFOP(isinf);
10853 break;
10854
10855 case OpSNegate:
10856 case OpFNegate:
10857 GLSL_UOP(-);
10858 break;
10859
10860 case OpIAdd:
10861 {
10862 // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
10863 auto type = get<SPIRType>(ops[0]).basetype;
10864 GLSL_BOP_CAST(+, type);
10865 break;
10866 }
10867
10868 case OpFAdd:
10869 GLSL_BOP(+);
10870 break;
10871
10872 case OpISub:
10873 {
10874 auto type = get<SPIRType>(ops[0]).basetype;
10875 GLSL_BOP_CAST(-, type);
10876 break;
10877 }
10878
10879 case OpFSub:
10880 GLSL_BOP(-);
10881 break;
10882
10883 case OpIMul:
10884 {
10885 auto type = get<SPIRType>(ops[0]).basetype;
10886 GLSL_BOP_CAST(*, type);
10887 break;
10888 }
10889
10890 case OpVectorTimesMatrix:
10891 case OpMatrixTimesVector:
10892 {
10893 // If the matrix needs transpose, just flip the multiply order.
10894 auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
10895 if (e && e->need_transpose)
10896 {
10897 e->need_transpose = false;
10898 string expr;
10899
10900 if (opcode == OpMatrixTimesVector)
10901 expr = join(to_enclosed_unpacked_expression(ops[3]), " * ",
10902 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10903 else
10904 expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10905 to_enclosed_unpacked_expression(ops[2]));
10906
10907 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10908 emit_op(ops[0], ops[1], expr, forward);
10909 e->need_transpose = true;
10910 inherit_expression_dependencies(ops[1], ops[2]);
10911 inherit_expression_dependencies(ops[1], ops[3]);
10912 }
10913 else
10914 GLSL_BOP(*);
10915 break;
10916 }
10917
10918 case OpMatrixTimesMatrix:
10919 {
10920 auto *a = maybe_get<SPIRExpression>(ops[2]);
10921 auto *b = maybe_get<SPIRExpression>(ops[3]);
10922
10923 // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
10924 // a^T * b^T = (b * a)^T.
10925 if (a && b && a->need_transpose && b->need_transpose)
10926 {
10927 a->need_transpose = false;
10928 b->need_transpose = false;
10929 auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ",
10930 enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])));
10931 bool forward = should_forward(ops[2]) && should_forward(ops[3]);
10932 auto &e = emit_op(ops[0], ops[1], expr, forward);
10933 e.need_transpose = true;
10934 a->need_transpose = true;
10935 b->need_transpose = true;
10936 inherit_expression_dependencies(ops[1], ops[2]);
10937 inherit_expression_dependencies(ops[1], ops[3]);
10938 }
10939 else
10940 GLSL_BOP(*);
10941
10942 break;
10943 }
10944
10945 case OpFMul:
10946 case OpMatrixTimesScalar:
10947 case OpVectorTimesScalar:
10948 GLSL_BOP(*);
10949 break;
10950
10951 case OpOuterProduct:
10952 GLSL_BFOP(outerProduct);
10953 break;
10954
10955 case OpDot:
10956 GLSL_BFOP(dot);
10957 break;
10958
10959 case OpTranspose:
10960 if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00
10961 {
10962 // transpose() is not available, so instead, flip need_transpose,
10963 // which can later be turned into an emulated transpose op by
10964 // convert_row_major_matrix(), if necessary.
10965 uint32_t result_type = ops[0];
10966 uint32_t result_id = ops[1];
10967 uint32_t input = ops[2];
10968
10969 // Force need_transpose to false temporarily to prevent
10970 // to_expression() from doing the transpose.
10971 bool need_transpose = false;
10972 auto *input_e = maybe_get<SPIRExpression>(input);
10973 if (input_e)
10974 swap(need_transpose, input_e->need_transpose);
10975
10976 bool forward = should_forward(input);
10977 auto &e = emit_op(result_type, result_id, to_expression(input), forward);
10978 e.need_transpose = !need_transpose;
10979
10980 // Restore the old need_transpose flag.
10981 if (input_e)
10982 input_e->need_transpose = need_transpose;
10983 }
10984 else
10985 GLSL_UFOP(transpose);
10986 break;
10987
10988 case OpSRem:
10989 {
10990 uint32_t result_type = ops[0];
10991 uint32_t result_id = ops[1];
10992 uint32_t op0 = ops[2];
10993 uint32_t op1 = ops[3];
10994
10995 // Needs special handling.
10996 bool forward = should_forward(op0) && should_forward(op1);
10997 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
10998 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10999
11000 emit_op(result_type, result_id, expr, forward);
11001 inherit_expression_dependencies(result_id, op0);
11002 inherit_expression_dependencies(result_id, op1);
11003 break;
11004 }
11005
11006 case OpSDiv:
11007 GLSL_BOP_CAST(/, int_type);
11008 break;
11009
11010 case OpUDiv:
11011 GLSL_BOP_CAST(/, uint_type);
11012 break;
11013
11014 case OpIAddCarry:
11015 case OpISubBorrow:
11016 {
11017 if (options.es && options.version < 310)
11018 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
11019 else if (!options.es && options.version < 400)
11020 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
11021
11022 uint32_t result_type = ops[0];
11023 uint32_t result_id = ops[1];
11024 uint32_t op0 = ops[2];
11025 uint32_t op1 = ops[3];
11026 auto &type = get<SPIRType>(result_type);
11027 emit_uninitialized_temporary_expression(result_type, result_id);
11028 const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
11029
11030 statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ",
11031 to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");");
11032 break;
11033 }
11034
11035 case OpUMulExtended:
11036 case OpSMulExtended:
11037 {
11038 if (options.es && options.version < 310)
11039 SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
11040 else if (!options.es && options.version < 400)
11041 SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
11042
11043 uint32_t result_type = ops[0];
11044 uint32_t result_id = ops[1];
11045 uint32_t op0 = ops[2];
11046 uint32_t op1 = ops[3];
11047 auto &type = get<SPIRType>(result_type);
11048 emit_uninitialized_temporary_expression(result_type, result_id);
11049 const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
11050
11051 statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
11052 to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");");
11053 break;
11054 }
11055
11056 case OpFDiv:
11057 GLSL_BOP(/);
11058 break;
11059
11060 case OpShiftRightLogical:
11061 GLSL_BOP_CAST(>>, uint_type);
11062 break;
11063
11064 case OpShiftRightArithmetic:
11065 GLSL_BOP_CAST(>>, int_type);
11066 break;
11067
11068 case OpShiftLeftLogical:
11069 {
11070 auto type = get<SPIRType>(ops[0]).basetype;
11071 GLSL_BOP_CAST(<<, type);
11072 break;
11073 }
11074
11075 case OpBitwiseOr:
11076 {
11077 auto type = get<SPIRType>(ops[0]).basetype;
11078 GLSL_BOP_CAST(|, type);
11079 break;
11080 }
11081
11082 case OpBitwiseXor:
11083 {
11084 auto type = get<SPIRType>(ops[0]).basetype;
11085 GLSL_BOP_CAST(^, type);
11086 break;
11087 }
11088
11089 case OpBitwiseAnd:
11090 {
11091 auto type = get<SPIRType>(ops[0]).basetype;
11092 GLSL_BOP_CAST(&, type);
11093 break;
11094 }
11095
11096 case OpNot:
11097 GLSL_UOP(~);
11098 break;
11099
11100 case OpUMod:
11101 GLSL_BOP_CAST(%, uint_type);
11102 break;
11103
11104 case OpSMod:
11105 GLSL_BOP_CAST(%, int_type);
11106 break;
11107
11108 case OpFMod:
11109 GLSL_BFOP(mod);
11110 break;
11111
11112 case OpFRem:
11113 {
11114 if (is_legacy())
11115 SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
11116 "needed for legacy.");
11117
11118 uint32_t result_type = ops[0];
11119 uint32_t result_id = ops[1];
11120 uint32_t op0 = ops[2];
11121 uint32_t op1 = ops[3];
11122
11123 // Needs special handling.
11124 bool forward = should_forward(op0) && should_forward(op1);
11125 auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
11126 to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
11127
11128 emit_op(result_type, result_id, expr, forward);
11129 inherit_expression_dependencies(result_id, op0);
11130 inherit_expression_dependencies(result_id, op1);
11131 break;
11132 }
11133
11134 // Relational
11135 case OpAny:
11136 GLSL_UFOP(any);
11137 break;
11138
11139 case OpAll:
11140 GLSL_UFOP(all);
11141 break;
11142
11143 case OpSelect:
11144 emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
11145 break;
11146
11147 case OpLogicalOr:
11148 {
11149 // No vector variant in GLSL for logical OR.
11150 auto result_type = ops[0];
11151 auto id = ops[1];
11152 auto &type = get<SPIRType>(result_type);
11153
11154 if (type.vecsize > 1)
11155 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown);
11156 else
11157 GLSL_BOP(||);
11158 break;
11159 }
11160
11161 case OpLogicalAnd:
11162 {
11163 // No vector variant in GLSL for logical AND.
11164 auto result_type = ops[0];
11165 auto id = ops[1];
11166 auto &type = get<SPIRType>(result_type);
11167
11168 if (type.vecsize > 1)
11169 emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown);
11170 else
11171 GLSL_BOP(&&);
11172 break;
11173 }
11174
11175 case OpLogicalNot:
11176 {
11177 auto &type = get<SPIRType>(ops[0]);
11178 if (type.vecsize > 1)
11179 GLSL_UFOP(not );
11180 else
11181 GLSL_UOP(!);
11182 break;
11183 }
11184
11185 case OpIEqual:
11186 {
11187 if (expression_type(ops[2]).vecsize > 1)
11188 GLSL_BFOP_CAST(equal, int_type);
11189 else
11190 GLSL_BOP_CAST(==, int_type);
11191 break;
11192 }
11193
11194 case OpLogicalEqual:
11195 case OpFOrdEqual:
11196 {
11197 if (expression_type(ops[2]).vecsize > 1)
11198 GLSL_BFOP(equal);
11199 else
11200 GLSL_BOP(==);
11201 break;
11202 }
11203
11204 case OpINotEqual:
11205 {
11206 if (expression_type(ops[2]).vecsize > 1)
11207 GLSL_BFOP_CAST(notEqual, int_type);
11208 else
11209 GLSL_BOP_CAST(!=, int_type);
11210 break;
11211 }
11212
11213 case OpLogicalNotEqual:
11214 case OpFOrdNotEqual:
11215 {
11216 if (expression_type(ops[2]).vecsize > 1)
11217 GLSL_BFOP(notEqual);
11218 else
11219 GLSL_BOP(!=);
11220 break;
11221 }
11222
11223 case OpUGreaterThan:
11224 case OpSGreaterThan:
11225 {
11226 auto type = opcode == OpUGreaterThan ? uint_type : int_type;
11227 if (expression_type(ops[2]).vecsize > 1)
11228 GLSL_BFOP_CAST(greaterThan, type);
11229 else
11230 GLSL_BOP_CAST(>, type);
11231 break;
11232 }
11233
11234 case OpFOrdGreaterThan:
11235 {
11236 if (expression_type(ops[2]).vecsize > 1)
11237 GLSL_BFOP(greaterThan);
11238 else
11239 GLSL_BOP(>);
11240 break;
11241 }
11242
11243 case OpUGreaterThanEqual:
11244 case OpSGreaterThanEqual:
11245 {
11246 auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
11247 if (expression_type(ops[2]).vecsize > 1)
11248 GLSL_BFOP_CAST(greaterThanEqual, type);
11249 else
11250 GLSL_BOP_CAST(>=, type);
11251 break;
11252 }
11253
11254 case OpFOrdGreaterThanEqual:
11255 {
11256 if (expression_type(ops[2]).vecsize > 1)
11257 GLSL_BFOP(greaterThanEqual);
11258 else
11259 GLSL_BOP(>=);
11260 break;
11261 }
11262
11263 case OpULessThan:
11264 case OpSLessThan:
11265 {
11266 auto type = opcode == OpULessThan ? uint_type : int_type;
11267 if (expression_type(ops[2]).vecsize > 1)
11268 GLSL_BFOP_CAST(lessThan, type);
11269 else
11270 GLSL_BOP_CAST(<, type);
11271 break;
11272 }
11273
11274 case OpFOrdLessThan:
11275 {
11276 if (expression_type(ops[2]).vecsize > 1)
11277 GLSL_BFOP(lessThan);
11278 else
11279 GLSL_BOP(<);
11280 break;
11281 }
11282
11283 case OpULessThanEqual:
11284 case OpSLessThanEqual:
11285 {
11286 auto type = opcode == OpULessThanEqual ? uint_type : int_type;
11287 if (expression_type(ops[2]).vecsize > 1)
11288 GLSL_BFOP_CAST(lessThanEqual, type);
11289 else
11290 GLSL_BOP_CAST(<=, type);
11291 break;
11292 }
11293
11294 case OpFOrdLessThanEqual:
11295 {
11296 if (expression_type(ops[2]).vecsize > 1)
11297 GLSL_BFOP(lessThanEqual);
11298 else
11299 GLSL_BOP(<=);
11300 break;
11301 }
11302
11303 // Conversion
11304 case OpSConvert:
11305 case OpConvertSToF:
11306 case OpUConvert:
11307 case OpConvertUToF:
11308 {
11309 auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type;
11310 uint32_t result_type = ops[0];
11311 uint32_t id = ops[1];
11312
11313 auto &type = get<SPIRType>(result_type);
11314 auto &arg_type = expression_type(ops[2]);
11315 auto func = type_to_glsl_constructor(type);
11316
11317 if (arg_type.width < type.width || type_is_floating_point(type))
11318 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype);
11319 else
11320 emit_unary_func_op(result_type, id, ops[2], func.c_str());
11321 break;
11322 }
11323
11324 case OpConvertFToU:
11325 case OpConvertFToS:
11326 {
11327 // Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
11328 uint32_t result_type = ops[0];
11329 uint32_t id = ops[1];
11330 auto &type = get<SPIRType>(result_type);
11331 auto expected_type = type;
11332 auto &float_type = expression_type(ops[2]);
11333 expected_type.basetype =
11334 opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
11335
11336 auto func = type_to_glsl_constructor(expected_type);
11337 emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype);
11338 break;
11339 }
11340
11341 case OpFConvert:
11342 {
11343 uint32_t result_type = ops[0];
11344 uint32_t id = ops[1];
11345
11346 auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
11347 emit_unary_func_op(result_type, id, ops[2], func.c_str());
11348 break;
11349 }
11350
11351 case OpBitcast:
11352 {
11353 uint32_t result_type = ops[0];
11354 uint32_t id = ops[1];
11355 uint32_t arg = ops[2];
11356
11357 if (!emit_complex_bitcast(result_type, id, arg))
11358 {
11359 auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
11360 emit_unary_func_op(result_type, id, arg, op.c_str());
11361 }
11362 break;
11363 }
11364
11365 case OpQuantizeToF16:
11366 {
11367 uint32_t result_type = ops[0];
11368 uint32_t id = ops[1];
11369 uint32_t arg = ops[2];
11370
11371 string op;
11372 auto &type = get<SPIRType>(result_type);
11373
11374 switch (type.vecsize)
11375 {
11376 case 1:
11377 op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
11378 break;
11379 case 2:
11380 op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
11381 break;
11382 case 3:
11383 {
11384 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11385 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
11386 op = join("vec3(", op0, ", ", op1, ")");
11387 break;
11388 }
11389 case 4:
11390 {
11391 auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11392 auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
11393 op = join("vec4(", op0, ", ", op1, ")");
11394 break;
11395 }
11396 default:
11397 SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
11398 }
11399
11400 emit_op(result_type, id, op, should_forward(arg));
11401 inherit_expression_dependencies(id, arg);
11402 break;
11403 }
11404
11405 // Derivatives
11406 case OpDPdx:
11407 GLSL_UFOP(dFdx);
11408 if (is_legacy_es())
11409 require_extension_internal("GL_OES_standard_derivatives");
11410 register_control_dependent_expression(ops[1]);
11411 break;
11412
11413 case OpDPdy:
11414 GLSL_UFOP(dFdy);
11415 if (is_legacy_es())
11416 require_extension_internal("GL_OES_standard_derivatives");
11417 register_control_dependent_expression(ops[1]);
11418 break;
11419
11420 case OpDPdxFine:
11421 GLSL_UFOP(dFdxFine);
11422 if (options.es)
11423 {
11424 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11425 }
11426 if (options.version < 450)
11427 require_extension_internal("GL_ARB_derivative_control");
11428 register_control_dependent_expression(ops[1]);
11429 break;
11430
11431 case OpDPdyFine:
11432 GLSL_UFOP(dFdyFine);
11433 if (options.es)
11434 {
11435 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11436 }
11437 if (options.version < 450)
11438 require_extension_internal("GL_ARB_derivative_control");
11439 register_control_dependent_expression(ops[1]);
11440 break;
11441
11442 case OpDPdxCoarse:
11443 if (options.es)
11444 {
11445 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11446 }
11447 GLSL_UFOP(dFdxCoarse);
11448 if (options.version < 450)
11449 require_extension_internal("GL_ARB_derivative_control");
11450 register_control_dependent_expression(ops[1]);
11451 break;
11452
11453 case OpDPdyCoarse:
11454 GLSL_UFOP(dFdyCoarse);
11455 if (options.es)
11456 {
11457 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11458 }
11459 if (options.version < 450)
11460 require_extension_internal("GL_ARB_derivative_control");
11461 register_control_dependent_expression(ops[1]);
11462 break;
11463
11464 case OpFwidth:
11465 GLSL_UFOP(fwidth);
11466 if (is_legacy_es())
11467 require_extension_internal("GL_OES_standard_derivatives");
11468 register_control_dependent_expression(ops[1]);
11469 break;
11470
11471 case OpFwidthCoarse:
11472 GLSL_UFOP(fwidthCoarse);
11473 if (options.es)
11474 {
11475 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11476 }
11477 if (options.version < 450)
11478 require_extension_internal("GL_ARB_derivative_control");
11479 register_control_dependent_expression(ops[1]);
11480 break;
11481
11482 case OpFwidthFine:
11483 GLSL_UFOP(fwidthFine);
11484 if (options.es)
11485 {
11486 SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11487 }
11488 if (options.version < 450)
11489 require_extension_internal("GL_ARB_derivative_control");
11490 register_control_dependent_expression(ops[1]);
11491 break;
11492
11493 // Bitfield
11494 case OpBitFieldInsert:
11495 {
11496 emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int);
11497 break;
11498 }
11499
11500 case OpBitFieldSExtract:
11501 {
11502 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type,
11503 SPIRType::Int, SPIRType::Int);
11504 break;
11505 }
11506
11507 case OpBitFieldUExtract:
11508 {
11509 emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type,
11510 SPIRType::Int, SPIRType::Int);
11511 break;
11512 }
11513
11514 case OpBitReverse:
11515 // BitReverse does not have issues with sign since result type must match input type.
11516 GLSL_UFOP(bitfieldReverse);
11517 break;
11518
11519 case OpBitCount:
11520 {
11521 auto basetype = expression_type(ops[2]).basetype;
11522 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type);
11523 break;
11524 }
11525
11526 // Atomics
11527 case OpAtomicExchange:
11528 {
11529 uint32_t result_type = ops[0];
11530 uint32_t id = ops[1];
11531 uint32_t ptr = ops[2];
11532 // Ignore semantics for now, probably only relevant to CL.
11533 uint32_t val = ops[5];
11534 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11535
11536 emit_atomic_func_op(result_type, id, ptr, val, op);
11537 break;
11538 }
11539
11540 case OpAtomicCompareExchange:
11541 {
11542 uint32_t result_type = ops[0];
11543 uint32_t id = ops[1];
11544 uint32_t ptr = ops[2];
11545 uint32_t val = ops[6];
11546 uint32_t comp = ops[7];
11547 const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
11548
11549 emit_atomic_func_op(result_type, id, ptr, comp, val, op);
11550 break;
11551 }
11552
11553 case OpAtomicLoad:
11554 {
11555 // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
11556 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11557 auto &type = expression_type(ops[2]);
11558 forced_temporaries.insert(ops[1]);
11559 bool atomic_image = check_atomic_image(ops[2]);
11560 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11561 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11562 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11563 const char *increment = unsigned_type ? "0u" : "0";
11564 emit_op(ops[0], ops[1],
11565 join(op, "(",
11566 to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
11567 flush_all_atomic_capable_variables();
11568 break;
11569 }
11570
11571 case OpAtomicStore:
11572 {
11573 // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
11574 // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11575 uint32_t ptr = ops[0];
11576 // Ignore semantics for now, probably only relevant to CL.
11577 uint32_t val = ops[3];
11578 const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11579 statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
11580 flush_all_atomic_capable_variables();
11581 break;
11582 }
11583
11584 case OpAtomicIIncrement:
11585 case OpAtomicIDecrement:
11586 {
11587 forced_temporaries.insert(ops[1]);
11588 auto &type = expression_type(ops[2]);
11589 if (type.storage == StorageClassAtomicCounter)
11590 {
11591 // Legacy GLSL stuff, not sure if this is relevant to support.
11592 if (opcode == OpAtomicIIncrement)
11593 GLSL_UFOP(atomicCounterIncrement);
11594 else
11595 GLSL_UFOP(atomicCounterDecrement);
11596 }
11597 else
11598 {
11599 bool atomic_image = check_atomic_image(ops[2]);
11600 bool unsigned_type = (type.basetype == SPIRType::UInt) ||
11601 (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11602 const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11603
11604 const char *increment = nullptr;
11605 if (opcode == OpAtomicIIncrement && unsigned_type)
11606 increment = "1u";
11607 else if (opcode == OpAtomicIIncrement)
11608 increment = "1";
11609 else if (unsigned_type)
11610 increment = "uint(-1)";
11611 else
11612 increment = "-1";
11613
11614 emit_op(ops[0], ops[1],
11615 join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
11616 }
11617
11618 flush_all_atomic_capable_variables();
11619 break;
11620 }
11621
11622 case OpAtomicIAdd:
11623 {
11624 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11625 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11626 break;
11627 }
11628
11629 case OpAtomicISub:
11630 {
11631 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
11632 forced_temporaries.insert(ops[1]);
11633 auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
11634 emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
11635 flush_all_atomic_capable_variables();
11636 break;
11637 }
11638
11639 case OpAtomicSMin:
11640 case OpAtomicUMin:
11641 {
11642 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
11643 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11644 break;
11645 }
11646
11647 case OpAtomicSMax:
11648 case OpAtomicUMax:
11649 {
11650 const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
11651 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11652 break;
11653 }
11654
11655 case OpAtomicAnd:
11656 {
11657 const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
11658 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11659 break;
11660 }
11661
11662 case OpAtomicOr:
11663 {
11664 const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
11665 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11666 break;
11667 }
11668
11669 case OpAtomicXor:
11670 {
11671 const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
11672 emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op);
11673 break;
11674 }
11675
11676 // Geometry shaders
11677 case OpEmitVertex:
11678 statement("EmitVertex();");
11679 break;
11680
11681 case OpEndPrimitive:
11682 statement("EndPrimitive();");
11683 break;
11684
11685 case OpEmitStreamVertex:
11686 {
11687 if (options.es)
11688 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11689 else if (!options.es && options.version < 400)
11690 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11691
11692 auto stream_expr = to_expression(ops[0]);
11693 if (expression_type(ops[0]).basetype != SPIRType::Int)
11694 stream_expr = join("int(", stream_expr, ")");
11695 statement("EmitStreamVertex(", stream_expr, ");");
11696 break;
11697 }
11698
11699 case OpEndStreamPrimitive:
11700 {
11701 if (options.es)
11702 SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11703 else if (!options.es && options.version < 400)
11704 SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11705
11706 auto stream_expr = to_expression(ops[0]);
11707 if (expression_type(ops[0]).basetype != SPIRType::Int)
11708 stream_expr = join("int(", stream_expr, ")");
11709 statement("EndStreamPrimitive(", stream_expr, ");");
11710 break;
11711 }
11712
11713 // Textures
11714 case OpImageSampleExplicitLod:
11715 case OpImageSampleProjExplicitLod:
11716 case OpImageSampleDrefExplicitLod:
11717 case OpImageSampleProjDrefExplicitLod:
11718 case OpImageSampleImplicitLod:
11719 case OpImageSampleProjImplicitLod:
11720 case OpImageSampleDrefImplicitLod:
11721 case OpImageSampleProjDrefImplicitLod:
11722 case OpImageFetch:
11723 case OpImageGather:
11724 case OpImageDrefGather:
11725 // Gets a bit hairy, so move this to a separate instruction.
11726 emit_texture_op(instruction, false);
11727 break;
11728
11729 case OpImageSparseSampleExplicitLod:
11730 case OpImageSparseSampleProjExplicitLod:
11731 case OpImageSparseSampleDrefExplicitLod:
11732 case OpImageSparseSampleProjDrefExplicitLod:
11733 case OpImageSparseSampleImplicitLod:
11734 case OpImageSparseSampleProjImplicitLod:
11735 case OpImageSparseSampleDrefImplicitLod:
11736 case OpImageSparseSampleProjDrefImplicitLod:
11737 case OpImageSparseFetch:
11738 case OpImageSparseGather:
11739 case OpImageSparseDrefGather:
11740 // Gets a bit hairy, so move this to a separate instruction.
11741 emit_texture_op(instruction, true);
11742 break;
11743
11744 case OpImageSparseTexelsResident:
11745 if (options.es)
11746 SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
11747 require_extension_internal("GL_ARB_sparse_texture2");
11748 emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
11749 break;
11750
11751 case OpImage:
11752 {
11753 uint32_t result_type = ops[0];
11754 uint32_t id = ops[1];
11755
11756 // Suppress usage tracking.
11757 auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
11758
11759 // When using the image, we need to know which variable it is actually loaded from.
11760 auto *var = maybe_get_backing_variable(ops[2]);
11761 e.loaded_from = var ? var->self : ID(0);
11762 break;
11763 }
11764
11765 case OpImageQueryLod:
11766 {
11767 const char *op = nullptr;
11768 if (!options.es && options.version < 400)
11769 {
11770 require_extension_internal("GL_ARB_texture_query_lod");
11771 // For some reason, the ARB spec is all-caps.
11772 op = "textureQueryLOD";
11773 }
11774 else if (options.es)
11775 SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
11776 else
11777 op = "textureQueryLod";
11778
11779 auto sampler_expr = to_expression(ops[2]);
11780 if (has_decoration(ops[2], DecorationNonUniform))
11781 {
11782 if (maybe_get_backing_variable(ops[2]))
11783 convert_non_uniform_expression(sampler_expr, ops[2]);
11784 else if (*backend.nonuniform_qualifier != '\0')
11785 sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
11786 }
11787
11788 bool forward = should_forward(ops[3]);
11789 emit_op(ops[0], ops[1],
11790 join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"),
11791 forward);
11792 inherit_expression_dependencies(ops[1], ops[2]);
11793 inherit_expression_dependencies(ops[1], ops[3]);
11794 register_control_dependent_expression(ops[1]);
11795 break;
11796 }
11797
11798 case OpImageQueryLevels:
11799 {
11800 uint32_t result_type = ops[0];
11801 uint32_t id = ops[1];
11802
11803 if (!options.es && options.version < 430)
11804 require_extension_internal("GL_ARB_texture_query_levels");
11805 if (options.es)
11806 SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
11807
11808 auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")");
11809 auto &restype = get<SPIRType>(ops[0]);
11810 expr = bitcast_expression(restype, SPIRType::Int, expr);
11811 emit_op(result_type, id, expr, true);
11812 break;
11813 }
11814
11815 case OpImageQuerySamples:
11816 {
11817 auto &type = expression_type(ops[2]);
11818 uint32_t result_type = ops[0];
11819 uint32_t id = ops[1];
11820
11821 string expr;
11822 if (type.image.sampled == 2)
11823 expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")");
11824 else
11825 expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")");
11826
11827 auto &restype = get<SPIRType>(ops[0]);
11828 expr = bitcast_expression(restype, SPIRType::Int, expr);
11829 emit_op(result_type, id, expr, true);
11830 break;
11831 }
11832
11833 case OpSampledImage:
11834 {
11835 uint32_t result_type = ops[0];
11836 uint32_t id = ops[1];
11837 emit_sampled_image_op(result_type, id, ops[2], ops[3]);
11838 inherit_expression_dependencies(id, ops[2]);
11839 inherit_expression_dependencies(id, ops[3]);
11840 break;
11841 }
11842
11843 case OpImageQuerySizeLod:
11844 {
11845 uint32_t result_type = ops[0];
11846 uint32_t id = ops[1];
11847 uint32_t img = ops[2];
11848
11849 std::string fname = "textureSize";
11850 if (is_legacy_desktop())
11851 {
11852 auto &type = expression_type(img);
11853 auto &imgtype = get<SPIRType>(type.self);
11854 fname = legacy_tex_op(fname, imgtype, img);
11855 }
11856 else if (is_legacy_es())
11857 SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
11858
11859 auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
11860 bitcast_expression(SPIRType::Int, ops[3]), ")");
11861 auto &restype = get<SPIRType>(ops[0]);
11862 expr = bitcast_expression(restype, SPIRType::Int, expr);
11863 emit_op(result_type, id, expr, true);
11864 break;
11865 }
11866
11867 // Image load/store
11868 case OpImageRead:
11869 case OpImageSparseRead:
11870 {
11871 // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
11872 // not adding the proper qualifiers.
11873 // If it turns out we need to read the image after all, remove the qualifier and recompile.
11874 auto *var = maybe_get_backing_variable(ops[2]);
11875 if (var)
11876 {
11877 auto &flags = ir.meta[var->self].decoration.decoration_flags;
11878 if (flags.get(DecorationNonReadable))
11879 {
11880 flags.clear(DecorationNonReadable);
11881 force_recompile();
11882 }
11883 }
11884
11885 uint32_t result_type = ops[0];
11886 uint32_t id = ops[1];
11887
11888 bool pure;
11889 string imgexpr;
11890 auto &type = expression_type(ops[2]);
11891
11892 if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
11893 {
11894 if (type.image.ms)
11895 SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
11896
11897 auto itr =
11898 find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
11899
11900 if (itr == end(pls_inputs))
11901 {
11902 // For non-PLS inputs, we rely on subpass type remapping information to get it right
11903 // since ImageRead always returns 4-component vectors and the backing type is opaque.
11904 if (!var->remapped_components)
11905 SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
11906 imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
11907 }
11908 else
11909 {
11910 // PLS input could have different number of components than what the SPIR expects, swizzle to
11911 // the appropriate vector size.
11912 uint32_t components = pls_format_to_components(itr->format);
11913 imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
11914 }
11915 pure = true;
11916 }
11917 else if (type.image.dim == DimSubpassData)
11918 {
11919 if (var && subpass_input_is_framebuffer_fetch(var->self))
11920 {
11921 imgexpr = to_expression(var->self);
11922 }
11923 else if (options.vulkan_semantics)
11924 {
11925 // With Vulkan semantics, use the proper Vulkan GLSL construct.
11926 if (type.image.ms)
11927 {
11928 uint32_t operands = ops[4];
11929 if (operands != ImageOperandsSampleMask || length != 6)
11930 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11931 "operand mask was used.");
11932
11933 uint32_t samples = ops[5];
11934 imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")");
11935 }
11936 else
11937 imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")");
11938 }
11939 else
11940 {
11941 if (type.image.ms)
11942 {
11943 uint32_t operands = ops[4];
11944 if (operands != ImageOperandsSampleMask || length != 6)
11945 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11946 "operand mask was used.");
11947
11948 uint32_t samples = ops[5];
11949 imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
11950 to_expression(samples), ")");
11951 }
11952 else
11953 {
11954 // Implement subpass loads via texture barrier style sampling.
11955 imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
11956 }
11957 }
11958 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
11959 pure = true;
11960 }
11961 else
11962 {
11963 bool sparse = opcode == OpImageSparseRead;
11964 uint32_t sparse_code_id = 0;
11965 uint32_t sparse_texel_id = 0;
11966 if (sparse)
11967 emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id);
11968
11969 // imageLoad only accepts int coords, not uint.
11970 auto coord_expr = to_expression(ops[3]);
11971 auto target_coord_type = expression_type(ops[3]);
11972 target_coord_type.basetype = SPIRType::Int;
11973 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
11974
11975 // Plain image load/store.
11976 if (sparse)
11977 {
11978 if (type.image.ms)
11979 {
11980 uint32_t operands = ops[4];
11981 if (operands != ImageOperandsSampleMask || length != 6)
11982 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11983 "operand mask was used.");
11984
11985 uint32_t samples = ops[5];
11986 statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
11987 coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
11988 }
11989 else
11990 {
11991 statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ",
11992 coord_expr, ", ", to_expression(sparse_texel_id), ");");
11993 }
11994 imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
11995 to_expression(sparse_texel_id), ")");
11996 }
11997 else
11998 {
11999 if (type.image.ms)
12000 {
12001 uint32_t operands = ops[4];
12002 if (operands != ImageOperandsSampleMask || length != 6)
12003 SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
12004 "operand mask was used.");
12005
12006 uint32_t samples = ops[5];
12007 imgexpr =
12008 join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
12009 }
12010 else
12011 imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")");
12012 }
12013
12014 if (!sparse)
12015 imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
12016 pure = false;
12017 }
12018
12019 if (var && var->forwardable)
12020 {
12021 bool forward = forced_temporaries.find(id) == end(forced_temporaries);
12022 auto &e = emit_op(result_type, id, imgexpr, forward);
12023
12024 // We only need to track dependencies if we're reading from image load/store.
12025 if (!pure)
12026 {
12027 e.loaded_from = var->self;
12028 if (forward)
12029 var->dependees.push_back(id);
12030 }
12031 }
12032 else
12033 emit_op(result_type, id, imgexpr, false);
12034
12035 inherit_expression_dependencies(id, ops[2]);
12036 if (type.image.ms)
12037 inherit_expression_dependencies(id, ops[5]);
12038 break;
12039 }
12040
12041 case OpImageTexelPointer:
12042 {
12043 uint32_t result_type = ops[0];
12044 uint32_t id = ops[1];
12045
12046 auto coord_expr = to_expression(ops[3]);
12047 auto target_coord_type = expression_type(ops[3]);
12048 target_coord_type.basetype = SPIRType::Int;
12049 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
12050
12051 auto expr = join(to_expression(ops[2]), ", ", coord_expr);
12052 auto &e = set<SPIRExpression>(id, expr, result_type, true);
12053
12054 // When using the pointer, we need to know which variable it is actually loaded from.
12055 auto *var = maybe_get_backing_variable(ops[2]);
12056 e.loaded_from = var ? var->self : ID(0);
12057 inherit_expression_dependencies(id, ops[3]);
12058 break;
12059 }
12060
12061 case OpImageWrite:
12062 {
12063 // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
12064 // not adding the proper qualifiers.
12065 // If it turns out we need to write to the image after all, remove the qualifier and recompile.
12066 auto *var = maybe_get_backing_variable(ops[0]);
12067 if (var)
12068 {
12069 auto &flags = ir.meta[var->self].decoration.decoration_flags;
12070 if (flags.get(DecorationNonWritable))
12071 {
12072 flags.clear(DecorationNonWritable);
12073 force_recompile();
12074 }
12075 }
12076
12077 auto &type = expression_type(ops[0]);
12078 auto &value_type = expression_type(ops[2]);
12079 auto store_type = value_type;
12080 store_type.vecsize = 4;
12081
12082 // imageStore only accepts int coords, not uint.
12083 auto coord_expr = to_expression(ops[1]);
12084 auto target_coord_type = expression_type(ops[1]);
12085 target_coord_type.basetype = SPIRType::Int;
12086 coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
12087
12088 if (type.image.ms)
12089 {
12090 uint32_t operands = ops[3];
12091 if (operands != ImageOperandsSampleMask || length != 5)
12092 SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
12093 uint32_t samples = ops[4];
12094 statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
12095 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
12096 }
12097 else
12098 statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ",
12099 remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
12100
12101 if (var && variable_storage_is_aliased(*var))
12102 flush_all_aliased_variables();
12103 break;
12104 }
12105
12106 case OpImageQuerySize:
12107 {
12108 auto &type = expression_type(ops[2]);
12109 uint32_t result_type = ops[0];
12110 uint32_t id = ops[1];
12111
12112 if (type.basetype == SPIRType::Image)
12113 {
12114 string expr;
12115 if (type.image.sampled == 2)
12116 {
12117 if (!options.es && options.version < 430)
12118 require_extension_internal("GL_ARB_shader_image_size");
12119 else if (options.es && options.version < 310)
12120 SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
12121
12122 // The size of an image is always constant.
12123 expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")");
12124 }
12125 else
12126 {
12127 // This path is hit for samplerBuffers and multisampled images which do not have LOD.
12128 std::string fname = "textureSize";
12129 if (is_legacy())
12130 {
12131 auto &imgtype = get<SPIRType>(type.self);
12132 fname = legacy_tex_op(fname, imgtype, ops[2]);
12133 }
12134 expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")");
12135 }
12136
12137 auto &restype = get<SPIRType>(ops[0]);
12138 expr = bitcast_expression(restype, SPIRType::Int, expr);
12139 emit_op(result_type, id, expr, true);
12140 }
12141 else
12142 SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
12143 break;
12144 }
12145
12146 // Compute
12147 case OpControlBarrier:
12148 case OpMemoryBarrier:
12149 {
12150 uint32_t execution_scope = 0;
12151 uint32_t memory;
12152 uint32_t semantics;
12153
12154 if (opcode == OpMemoryBarrier)
12155 {
12156 memory = evaluate_constant_u32(ops[0]);
12157 semantics = evaluate_constant_u32(ops[1]);
12158 }
12159 else
12160 {
12161 execution_scope = evaluate_constant_u32(ops[0]);
12162 memory = evaluate_constant_u32(ops[1]);
12163 semantics = evaluate_constant_u32(ops[2]);
12164 }
12165
12166 if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
12167 {
12168 // OpControlBarrier with ScopeSubgroup is subgroupBarrier()
12169 if (opcode != OpControlBarrier)
12170 {
12171 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
12172 }
12173 else
12174 {
12175 request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
12176 }
12177 }
12178
12179 if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
12180 {
12181 // Control shaders only have barriers, and it implies memory barriers.
12182 if (opcode == OpControlBarrier)
12183 statement("barrier();");
12184 break;
12185 }
12186
12187 // We only care about these flags, acquire/release and friends are not relevant to GLSL.
12188 semantics = mask_relevant_memory_semantics(semantics);
12189
12190 if (opcode == OpMemoryBarrier)
12191 {
12192 // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
12193 // does what we need, so we avoid redundant barriers.
12194 const Instruction *next = get_next_instruction_in_block(instruction);
12195 if (next && next->op == OpControlBarrier)
12196 {
12197 auto *next_ops = stream(*next);
12198 uint32_t next_memory = evaluate_constant_u32(next_ops[1]);
12199 uint32_t next_semantics = evaluate_constant_u32(next_ops[2]);
12200 next_semantics = mask_relevant_memory_semantics(next_semantics);
12201
12202 bool memory_scope_covered = false;
12203 if (next_memory == memory)
12204 memory_scope_covered = true;
12205 else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
12206 {
12207 // If we only care about workgroup memory, either Device or Workgroup scope is fine,
12208 // scope does not have to match.
12209 if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
12210 (memory == ScopeDevice || memory == ScopeWorkgroup))
12211 {
12212 memory_scope_covered = true;
12213 }
12214 }
12215 else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
12216 {
12217 // The control barrier has device scope, but the memory barrier just has workgroup scope.
12218 memory_scope_covered = true;
12219 }
12220
12221 // If we have the same memory scope, and all memory types are covered, we're good.
12222 if (memory_scope_covered && (semantics & next_semantics) == semantics)
12223 break;
12224 }
12225 }
12226
12227 // We are synchronizing some memory or syncing execution,
12228 // so we cannot forward any loads beyond the memory barrier.
12229 if (semantics || opcode == OpControlBarrier)
12230 {
12231 assert(current_emitting_block);
12232 flush_control_dependent_expressions(current_emitting_block->self);
12233 flush_all_active_variables();
12234 }
12235
12236 if (memory == ScopeWorkgroup) // Only need to consider memory within a group
12237 {
12238 if (semantics == MemorySemanticsWorkgroupMemoryMask)
12239 {
12240 // OpControlBarrier implies a memory barrier for shared memory as well.
12241 bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
12242 if (!implies_shared_barrier)
12243 statement("memoryBarrierShared();");
12244 }
12245 else if (semantics != 0)
12246 statement("groupMemoryBarrier();");
12247 }
12248 else if (memory == ScopeSubgroup)
12249 {
12250 const uint32_t all_barriers =
12251 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12252
12253 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12254 {
12255 // These are not relevant for GLSL, but assume it means memoryBarrier().
12256 // memoryBarrier() does everything, so no need to test anything else.
12257 statement("subgroupMemoryBarrier();");
12258 }
12259 else if ((semantics & all_barriers) == all_barriers)
12260 {
12261 // Short-hand instead of emitting 3 barriers.
12262 statement("subgroupMemoryBarrier();");
12263 }
12264 else
12265 {
12266 // Pick out individual barriers.
12267 if (semantics & MemorySemanticsWorkgroupMemoryMask)
12268 statement("subgroupMemoryBarrierShared();");
12269 if (semantics & MemorySemanticsUniformMemoryMask)
12270 statement("subgroupMemoryBarrierBuffer();");
12271 if (semantics & MemorySemanticsImageMemoryMask)
12272 statement("subgroupMemoryBarrierImage();");
12273 }
12274 }
12275 else
12276 {
12277 const uint32_t all_barriers =
12278 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
12279
12280 if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
12281 {
12282 // These are not relevant for GLSL, but assume it means memoryBarrier().
12283 // memoryBarrier() does everything, so no need to test anything else.
12284 statement("memoryBarrier();");
12285 }
12286 else if ((semantics & all_barriers) == all_barriers)
12287 {
12288 // Short-hand instead of emitting 4 barriers.
12289 statement("memoryBarrier();");
12290 }
12291 else
12292 {
12293 // Pick out individual barriers.
12294 if (semantics & MemorySemanticsWorkgroupMemoryMask)
12295 statement("memoryBarrierShared();");
12296 if (semantics & MemorySemanticsUniformMemoryMask)
12297 statement("memoryBarrierBuffer();");
12298 if (semantics & MemorySemanticsImageMemoryMask)
12299 statement("memoryBarrierImage();");
12300 }
12301 }
12302
12303 if (opcode == OpControlBarrier)
12304 {
12305 if (execution_scope == ScopeSubgroup)
12306 statement("subgroupBarrier();");
12307 else
12308 statement("barrier();");
12309 }
12310 break;
12311 }
12312
12313 case OpExtInst:
12314 {
12315 uint32_t extension_set = ops[2];
12316
12317 if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
12318 {
12319 emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12320 }
12321 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
12322 {
12323 emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12324 }
12325 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
12326 {
12327 emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12328 }
12329 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
12330 {
12331 emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12332 }
12333 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
12334 {
12335 emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
12336 }
12337 else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
12338 {
12339 break; // Ignore SPIR-V debug information extended instructions.
12340 }
12341 else
12342 {
12343 statement("// unimplemented ext op ", instruction.op);
12344 break;
12345 }
12346
12347 break;
12348 }
12349
12350 // Legacy sub-group stuff ...
12351 case OpSubgroupBallotKHR:
12352 {
12353 uint32_t result_type = ops[0];
12354 uint32_t id = ops[1];
12355 string expr;
12356 expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
12357 emit_op(result_type, id, expr, should_forward(ops[2]));
12358
12359 require_extension_internal("GL_ARB_shader_ballot");
12360 inherit_expression_dependencies(id, ops[2]);
12361 register_control_dependent_expression(ops[1]);
12362 break;
12363 }
12364
12365 case OpSubgroupFirstInvocationKHR:
12366 {
12367 uint32_t result_type = ops[0];
12368 uint32_t id = ops[1];
12369 emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
12370
12371 require_extension_internal("GL_ARB_shader_ballot");
12372 register_control_dependent_expression(ops[1]);
12373 break;
12374 }
12375
12376 case OpSubgroupReadInvocationKHR:
12377 {
12378 uint32_t result_type = ops[0];
12379 uint32_t id = ops[1];
12380 emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
12381
12382 require_extension_internal("GL_ARB_shader_ballot");
12383 register_control_dependent_expression(ops[1]);
12384 break;
12385 }
12386
12387 case OpSubgroupAllKHR:
12388 {
12389 uint32_t result_type = ops[0];
12390 uint32_t id = ops[1];
12391 emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
12392
12393 require_extension_internal("GL_ARB_shader_group_vote");
12394 register_control_dependent_expression(ops[1]);
12395 break;
12396 }
12397
12398 case OpSubgroupAnyKHR:
12399 {
12400 uint32_t result_type = ops[0];
12401 uint32_t id = ops[1];
12402 emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
12403
12404 require_extension_internal("GL_ARB_shader_group_vote");
12405 register_control_dependent_expression(ops[1]);
12406 break;
12407 }
12408
12409 case OpSubgroupAllEqualKHR:
12410 {
12411 uint32_t result_type = ops[0];
12412 uint32_t id = ops[1];
12413 emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
12414
12415 require_extension_internal("GL_ARB_shader_group_vote");
12416 register_control_dependent_expression(ops[1]);
12417 break;
12418 }
12419
12420 case OpGroupIAddNonUniformAMD:
12421 case OpGroupFAddNonUniformAMD:
12422 {
12423 uint32_t result_type = ops[0];
12424 uint32_t id = ops[1];
12425 emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
12426
12427 require_extension_internal("GL_AMD_shader_ballot");
12428 register_control_dependent_expression(ops[1]);
12429 break;
12430 }
12431
12432 case OpGroupFMinNonUniformAMD:
12433 case OpGroupUMinNonUniformAMD:
12434 case OpGroupSMinNonUniformAMD:
12435 {
12436 uint32_t result_type = ops[0];
12437 uint32_t id = ops[1];
12438 emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
12439
12440 require_extension_internal("GL_AMD_shader_ballot");
12441 register_control_dependent_expression(ops[1]);
12442 break;
12443 }
12444
12445 case OpGroupFMaxNonUniformAMD:
12446 case OpGroupUMaxNonUniformAMD:
12447 case OpGroupSMaxNonUniformAMD:
12448 {
12449 uint32_t result_type = ops[0];
12450 uint32_t id = ops[1];
12451 emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
12452
12453 require_extension_internal("GL_AMD_shader_ballot");
12454 register_control_dependent_expression(ops[1]);
12455 break;
12456 }
12457
12458 case OpFragmentMaskFetchAMD:
12459 {
12460 auto &type = expression_type(ops[2]);
12461 uint32_t result_type = ops[0];
12462 uint32_t id = ops[1];
12463
12464 if (type.image.dim == spv::DimSubpassData)
12465 {
12466 emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
12467 }
12468 else
12469 {
12470 emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
12471 }
12472
12473 require_extension_internal("GL_AMD_shader_fragment_mask");
12474 break;
12475 }
12476
12477 case OpFragmentFetchAMD:
12478 {
12479 auto &type = expression_type(ops[2]);
12480 uint32_t result_type = ops[0];
12481 uint32_t id = ops[1];
12482
12483 if (type.image.dim == spv::DimSubpassData)
12484 {
12485 emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
12486 }
12487 else
12488 {
12489 emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
12490 }
12491
12492 require_extension_internal("GL_AMD_shader_fragment_mask");
12493 break;
12494 }
12495
12496 // Vulkan 1.1 sub-group stuff ...
12497 case OpGroupNonUniformElect:
12498 case OpGroupNonUniformBroadcast:
12499 case OpGroupNonUniformBroadcastFirst:
12500 case OpGroupNonUniformBallot:
12501 case OpGroupNonUniformInverseBallot:
12502 case OpGroupNonUniformBallotBitExtract:
12503 case OpGroupNonUniformBallotBitCount:
12504 case OpGroupNonUniformBallotFindLSB:
12505 case OpGroupNonUniformBallotFindMSB:
12506 case OpGroupNonUniformShuffle:
12507 case OpGroupNonUniformShuffleXor:
12508 case OpGroupNonUniformShuffleUp:
12509 case OpGroupNonUniformShuffleDown:
12510 case OpGroupNonUniformAll:
12511 case OpGroupNonUniformAny:
12512 case OpGroupNonUniformAllEqual:
12513 case OpGroupNonUniformFAdd:
12514 case OpGroupNonUniformIAdd:
12515 case OpGroupNonUniformFMul:
12516 case OpGroupNonUniformIMul:
12517 case OpGroupNonUniformFMin:
12518 case OpGroupNonUniformFMax:
12519 case OpGroupNonUniformSMin:
12520 case OpGroupNonUniformSMax:
12521 case OpGroupNonUniformUMin:
12522 case OpGroupNonUniformUMax:
12523 case OpGroupNonUniformBitwiseAnd:
12524 case OpGroupNonUniformBitwiseOr:
12525 case OpGroupNonUniformBitwiseXor:
12526 case OpGroupNonUniformLogicalAnd:
12527 case OpGroupNonUniformLogicalOr:
12528 case OpGroupNonUniformLogicalXor:
12529 case OpGroupNonUniformQuadSwap:
12530 case OpGroupNonUniformQuadBroadcast:
12531 emit_subgroup_op(instruction);
12532 break;
12533
12534 case OpFUnordEqual:
12535 case OpFUnordNotEqual:
12536 case OpFUnordLessThan:
12537 case OpFUnordGreaterThan:
12538 case OpFUnordLessThanEqual:
12539 case OpFUnordGreaterThanEqual:
12540 {
12541 // GLSL doesn't specify if floating point comparisons are ordered or unordered,
12542 // but glslang always emits ordered floating point compares for GLSL.
12543 // To get unordered compares, we can test the opposite thing and invert the result.
12544 // This way, we force true when there is any NaN present.
12545 uint32_t op0 = ops[2];
12546 uint32_t op1 = ops[3];
12547
12548 string expr;
12549 if (expression_type(op0).vecsize > 1)
12550 {
12551 const char *comp_op = nullptr;
12552 switch (opcode)
12553 {
12554 case OpFUnordEqual:
12555 comp_op = "notEqual";
12556 break;
12557
12558 case OpFUnordNotEqual:
12559 comp_op = "equal";
12560 break;
12561
12562 case OpFUnordLessThan:
12563 comp_op = "greaterThanEqual";
12564 break;
12565
12566 case OpFUnordLessThanEqual:
12567 comp_op = "greaterThan";
12568 break;
12569
12570 case OpFUnordGreaterThan:
12571 comp_op = "lessThanEqual";
12572 break;
12573
12574 case OpFUnordGreaterThanEqual:
12575 comp_op = "lessThan";
12576 break;
12577
12578 default:
12579 assert(0);
12580 break;
12581 }
12582
12583 expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
12584 }
12585 else
12586 {
12587 const char *comp_op = nullptr;
12588 switch (opcode)
12589 {
12590 case OpFUnordEqual:
12591 comp_op = " != ";
12592 break;
12593
12594 case OpFUnordNotEqual:
12595 comp_op = " == ";
12596 break;
12597
12598 case OpFUnordLessThan:
12599 comp_op = " >= ";
12600 break;
12601
12602 case OpFUnordLessThanEqual:
12603 comp_op = " > ";
12604 break;
12605
12606 case OpFUnordGreaterThan:
12607 comp_op = " <= ";
12608 break;
12609
12610 case OpFUnordGreaterThanEqual:
12611 comp_op = " < ";
12612 break;
12613
12614 default:
12615 assert(0);
12616 break;
12617 }
12618
12619 expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
12620 }
12621
12622 emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1));
12623 inherit_expression_dependencies(ops[1], op0);
12624 inherit_expression_dependencies(ops[1], op1);
12625 break;
12626 }
12627
12628 case OpReportIntersectionKHR:
12629 // NV is same opcode.
12630 forced_temporaries.insert(ops[1]);
12631 if (ray_tracing_is_khr)
12632 GLSL_BFOP(reportIntersectionEXT);
12633 else
12634 GLSL_BFOP(reportIntersectionNV);
12635 flush_control_dependent_expressions(current_emitting_block->self);
12636 break;
12637 case OpIgnoreIntersectionNV:
12638 // KHR variant is a terminator.
12639 statement("ignoreIntersectionNV();");
12640 flush_control_dependent_expressions(current_emitting_block->self);
12641 break;
12642 case OpTerminateRayNV:
12643 // KHR variant is a terminator.
12644 statement("terminateRayNV();");
12645 flush_control_dependent_expressions(current_emitting_block->self);
12646 break;
12647 case OpTraceNV:
12648 statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12649 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12650 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12651 to_expression(ops[9]), ", ", to_expression(ops[10]), ");");
12652 flush_control_dependent_expressions(current_emitting_block->self);
12653 break;
12654 case OpTraceRayKHR:
12655 if (!has_decoration(ops[10], DecorationLocation))
12656 SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
12657 statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ",
12658 to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12659 to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ",
12660 to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");");
12661 flush_control_dependent_expressions(current_emitting_block->self);
12662 break;
12663 case OpExecuteCallableNV:
12664 statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12665 flush_control_dependent_expressions(current_emitting_block->self);
12666 break;
12667 case OpExecuteCallableKHR:
12668 if (!has_decoration(ops[1], DecorationLocation))
12669 SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
12670 statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");");
12671 flush_control_dependent_expressions(current_emitting_block->self);
12672 break;
12673
12674 // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
12675 case OpRayQueryInitializeKHR:
12676 flush_variable_declaration(ops[0]);
12677 statement("rayQueryInitializeEXT(",
12678 to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
12679 to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
12680 to_expression(ops[4]), ", ", to_expression(ops[5]), ", ",
12681 to_expression(ops[6]), ", ", to_expression(ops[7]), ");");
12682 break;
12683 case OpRayQueryProceedKHR:
12684 flush_variable_declaration(ops[0]);
12685 emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false);
12686 break;
12687 case OpRayQueryTerminateKHR:
12688 flush_variable_declaration(ops[0]);
12689 statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");");
12690 break;
12691 case OpRayQueryGenerateIntersectionKHR:
12692 flush_variable_declaration(ops[0]);
12693 statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");");
12694 break;
12695 case OpRayQueryConfirmIntersectionKHR:
12696 flush_variable_declaration(ops[0]);
12697 statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");");
12698 break;
12699#define GLSL_RAY_QUERY_GET_OP(op) \
12700 case OpRayQueryGet##op##KHR: \
12701 flush_variable_declaration(ops[2]); \
12702 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
12703 break
12704#define GLSL_RAY_QUERY_GET_OP2(op) \
12705 case OpRayQueryGet##op##KHR: \
12706 flush_variable_declaration(ops[2]); \
12707 emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
12708 break
12709 GLSL_RAY_QUERY_GET_OP(RayTMin);
12710 GLSL_RAY_QUERY_GET_OP(RayFlags);
12711 GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
12712 GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
12713 GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
12714 GLSL_RAY_QUERY_GET_OP2(IntersectionType);
12715 GLSL_RAY_QUERY_GET_OP2(IntersectionT);
12716 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
12717 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
12718 GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
12719 GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
12720 GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
12721 GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
12722 GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
12723 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
12724 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
12725 GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
12726 GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
12727#undef GLSL_RAY_QUERY_GET_OP
12728#undef GLSL_RAY_QUERY_GET_OP2
12729
12730 case OpConvertUToAccelerationStructureKHR:
12731 require_extension_internal("GL_EXT_ray_tracing");
12732 GLSL_UFOP(accelerationStructureEXT);
12733 break;
12734
12735 case OpConvertUToPtr:
12736 {
12737 auto &type = get<SPIRType>(ops[0]);
12738 if (type.storage != StorageClassPhysicalStorageBufferEXT)
12739 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
12740
12741 auto &in_type = expression_type(ops[2]);
12742 if (in_type.vecsize == 2)
12743 require_extension_internal("GL_EXT_buffer_reference_uvec2");
12744
12745 auto op = type_to_glsl(type);
12746 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12747 break;
12748 }
12749
12750 case OpConvertPtrToU:
12751 {
12752 auto &type = get<SPIRType>(ops[0]);
12753 auto &ptr_type = expression_type(ops[2]);
12754 if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
12755 SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
12756
12757 if (type.vecsize == 2)
12758 require_extension_internal("GL_EXT_buffer_reference_uvec2");
12759
12760 auto op = type_to_glsl(type);
12761 emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str());
12762 break;
12763 }
12764
12765 case OpUndef:
12766 // Undefined value has been declared.
12767 break;
12768
12769 case OpLine:
12770 {
12771 emit_line_directive(ops[0], ops[1]);
12772 break;
12773 }
12774
12775 case OpNoLine:
12776 break;
12777
12778 case OpDemoteToHelperInvocationEXT:
12779 if (!options.vulkan_semantics)
12780 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12781 require_extension_internal("GL_EXT_demote_to_helper_invocation");
12782 statement(backend.demote_literal, ";");
12783 break;
12784
12785 case OpIsHelperInvocationEXT:
12786 if (!options.vulkan_semantics)
12787 SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12788 require_extension_internal("GL_EXT_demote_to_helper_invocation");
12789 emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
12790 break;
12791
12792 case OpBeginInvocationInterlockEXT:
12793 // If the interlock is complex, we emit this elsewhere.
12794 if (!interlocked_is_complex)
12795 {
12796 statement("SPIRV_Cross_beginInvocationInterlock();");
12797 flush_all_active_variables();
12798 // Make sure forwarding doesn't propagate outside interlock region.
12799 }
12800 break;
12801
12802 case OpEndInvocationInterlockEXT:
12803 // If the interlock is complex, we emit this elsewhere.
12804 if (!interlocked_is_complex)
12805 {
12806 statement("SPIRV_Cross_endInvocationInterlock();");
12807 flush_all_active_variables();
12808 // Make sure forwarding doesn't propagate outside interlock region.
12809 }
12810 break;
12811
12812 default:
12813 statement("// unimplemented op ", instruction.op);
12814 break;
12815 }
12816}
12817
12818// Appends function arguments, mapped from global variables, beyond the specified arg index.
12819// This is used when a function call uses fewer arguments than the function defines.
12820// This situation may occur if the function signature has been dynamically modified to
12821// extract global variables referenced from within the function, and convert them to
12822// function arguments. This is necessary for shader languages that do not support global
12823// access to shader input content from within a function (eg. Metal). Each additional
12824// function args uses the name of the global variable. Function nesting will modify the
12825// functions and function calls all the way up the nesting chain.
12826void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
12827{
12828 auto &args = func.arguments;
12829 uint32_t arg_cnt = uint32_t(args.size());
12830 for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
12831 {
12832 auto &arg = args[arg_idx];
12833 assert(arg.alias_global_variable);
12834
12835 // If the underlying variable needs to be declared
12836 // (ie. a local variable with deferred declaration), do so now.
12837 uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
12838 if (var_id)
12839 flush_variable_declaration(var_id);
12840
12841 arglist.push_back(to_func_call_arg(arg, arg.id));
12842 }
12843}
12844
12845string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
12846{
12847 if (type.type_alias != TypeID(0) &&
12848 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12849 {
12850 return to_member_name(get<SPIRType>(type.type_alias), index);
12851 }
12852
12853 auto &memb = ir.meta[type.self].members;
12854 if (index < memb.size() && !memb[index].alias.empty())
12855 return memb[index].alias;
12856 else
12857 return join("_m", index);
12858}
12859
12860string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
12861{
12862 return join(".", to_member_name(type, index));
12863}
12864
12865string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
12866{
12867 string ret;
12868 auto *member_type = &type;
12869 for (auto &index : indices)
12870 {
12871 ret += join(".", to_member_name(*member_type, index));
12872 member_type = &get<SPIRType>(member_type->member_types[index]);
12873 }
12874 return ret;
12875}
12876
12877void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
12878{
12879 auto &memb = ir.meta[type.self].members;
12880 if (index < memb.size() && !memb[index].alias.empty())
12881 {
12882 auto &name = memb[index].alias;
12883 if (name.empty())
12884 return;
12885
12886 ParsedIR::sanitize_identifier(name, true, true);
12887 update_name_cache(type.member_name_cache, name);
12888 }
12889}
12890
12891// Checks whether the ID is a row_major matrix that requires conversion before use
12892bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
12893{
12894 // Natively supported row-major matrices do not need to be converted.
12895 // Legacy targets do not support row major.
12896 if (backend.native_row_major_matrix && !is_legacy())
12897 return false;
12898
12899 auto *e = maybe_get<SPIRExpression>(id);
12900 if (e)
12901 return e->need_transpose;
12902 else
12903 return has_decoration(id, DecorationRowMajor);
12904}
12905
12906// Checks whether the member is a row_major matrix that requires conversion before use
12907bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
12908{
12909 // Natively supported row-major matrices do not need to be converted.
12910 if (backend.native_row_major_matrix && !is_legacy())
12911 return false;
12912
12913 // Non-matrix or column-major matrix types do not need to be converted.
12914 if (!has_member_decoration(type.self, index, DecorationRowMajor))
12915 return false;
12916
12917 // Only square row-major matrices can be converted at this time.
12918 // Converting non-square matrices will require defining custom GLSL function that
12919 // swaps matrix elements while retaining the original dimensional form of the matrix.
12920 const auto mbr_type = get<SPIRType>(type.member_types[index]);
12921 if (mbr_type.columns != mbr_type.vecsize)
12922 SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
12923
12924 return true;
12925}
12926
12927// Checks if we need to remap physical type IDs when declaring the type in a buffer.
12928bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
12929{
12930 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
12931}
12932
12933// Checks whether the member is in packed data type, that might need to be unpacked.
12934bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
12935{
12936 return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
12937}
12938
12939// Wraps the expression string in a function call that converts the
12940// row_major matrix result of the expression to a column_major matrix.
12941// Base implementation uses the standard library transpose() function.
12942// Subclasses may override to use a different function.
12943string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */,
12944 bool /*is_packed*/)
12945{
12946 strip_enclosed_expression(exp_str);
12947 if (!is_matrix(exp_type))
12948 {
12949 auto column_index = exp_str.find_last_of('[');
12950 if (column_index == string::npos)
12951 return exp_str;
12952
12953 auto column_expr = exp_str.substr(column_index);
12954 exp_str.resize(column_index);
12955
12956 auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
12957
12958 // Loading a column from a row-major matrix. Unroll the load.
12959 for (uint32_t c = 0; c < exp_type.vecsize; c++)
12960 {
12961 transposed_expr += join(exp_str, '[', c, ']', column_expr);
12962 if (c + 1 < exp_type.vecsize)
12963 transposed_expr += ", ";
12964 }
12965
12966 transposed_expr += ")";
12967 return transposed_expr;
12968 }
12969 else if (options.version < 120)
12970 {
12971 // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
12972 // these GLSL versions do not support non-square matrices.
12973 if (exp_type.vecsize == 2 && exp_type.columns == 2)
12974 {
12975 if (!requires_transpose_2x2)
12976 {
12977 requires_transpose_2x2 = true;
12978 force_recompile();
12979 }
12980 }
12981 else if (exp_type.vecsize == 3 && exp_type.columns == 3)
12982 {
12983 if (!requires_transpose_3x3)
12984 {
12985 requires_transpose_3x3 = true;
12986 force_recompile();
12987 }
12988 }
12989 else if (exp_type.vecsize == 4 && exp_type.columns == 4)
12990 {
12991 if (!requires_transpose_4x4)
12992 {
12993 requires_transpose_4x4 = true;
12994 force_recompile();
12995 }
12996 }
12997 else
12998 SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
12999 return join("spvTranspose(", exp_str, ")");
13000 }
13001 else
13002 return join("transpose(", exp_str, ")");
13003}
13004
13005string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
13006{
13007 string type_name = type_to_glsl(type, id);
13008 remap_variable_type_name(type, name, type_name);
13009 return join(type_name, " ", name, type_to_array_glsl(type));
13010}
13011
13012bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
13013{
13014 return var.storage == storage;
13015}
13016
13017// Emit a structure member. Subclasses may override to modify output,
13018// or to dynamically add a padding member if needed.
13019void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
13020 const string &qualifier, uint32_t)
13021{
13022 auto &membertype = get<SPIRType>(member_type_id);
13023
13024 Bitset memberflags;
13025 auto &memb = ir.meta[type.self].members;
13026 if (index < memb.size())
13027 memberflags = memb[index].decoration_flags;
13028
13029 string qualifiers;
13030 bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
13031 ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
13032
13033 if (is_block)
13034 qualifiers = to_interpolation_qualifiers(memberflags);
13035
13036 statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
13037 variable_decl(membertype, to_member_name(type, index)), ";");
13038}
13039
13040void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
13041{
13042}
13043
13044string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
13045{
13046 // GL_EXT_buffer_reference variables can be marked as restrict.
13047 if (flags.get(DecorationRestrictPointerEXT))
13048 return "restrict ";
13049
13050 string qual;
13051
13052 if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
13053 qual = "precise ";
13054
13055 // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
13056 bool type_supports_precision =
13057 type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt ||
13058 type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage ||
13059 type.basetype == SPIRType::Sampler;
13060
13061 if (!type_supports_precision)
13062 return qual;
13063
13064 if (options.es)
13065 {
13066 auto &execution = get_entry_point();
13067
13068 if (flags.get(DecorationRelaxedPrecision))
13069 {
13070 bool implied_fmediump = type.basetype == SPIRType::Float &&
13071 options.fragment.default_float_precision == Options::Mediump &&
13072 execution.model == ExecutionModelFragment;
13073
13074 bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
13075 options.fragment.default_int_precision == Options::Mediump &&
13076 execution.model == ExecutionModelFragment;
13077
13078 qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
13079 }
13080 else
13081 {
13082 bool implied_fhighp =
13083 type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
13084 execution.model == ExecutionModelFragment) ||
13085 (execution.model != ExecutionModelFragment));
13086
13087 bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
13088 ((options.fragment.default_int_precision == Options::Highp &&
13089 execution.model == ExecutionModelFragment) ||
13090 (execution.model != ExecutionModelFragment));
13091
13092 qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
13093 }
13094 }
13095 else if (backend.allow_precision_qualifiers)
13096 {
13097 // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
13098 // The default is highp however, so only emit mediump in the rare case that a shader has these.
13099 if (flags.get(DecorationRelaxedPrecision))
13100 qual += "mediump ";
13101 }
13102
13103 return qual;
13104}
13105
13106string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
13107{
13108 auto &type = expression_type(id);
13109 bool use_precision_qualifiers = backend.allow_precision_qualifiers;
13110 if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
13111 {
13112 // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
13113 auto &result_type = get<SPIRType>(type.image.type);
13114 if (result_type.width < 32)
13115 return "mediump ";
13116 }
13117 return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags);
13118}
13119
13120void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
13121{
13122 // Works around weird behavior in glslangValidator where
13123 // a patch out block is translated to just block members getting the decoration.
13124 // To make glslang not complain when we compile again, we have to transform this back to a case where
13125 // the variable itself has Patch decoration, and not members.
13126 auto &type = get<SPIRType>(var.basetype);
13127 if (has_decoration(type.self, DecorationBlock))
13128 {
13129 uint32_t member_count = uint32_t(type.member_types.size());
13130 for (uint32_t i = 0; i < member_count; i++)
13131 {
13132 if (has_member_decoration(type.self, i, DecorationPatch))
13133 {
13134 set_decoration(var.self, DecorationPatch);
13135 break;
13136 }
13137 }
13138
13139 if (has_decoration(var.self, DecorationPatch))
13140 for (uint32_t i = 0; i < member_count; i++)
13141 unset_member_decoration(type.self, i, DecorationPatch);
13142 }
13143}
13144
13145string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
13146{
13147 auto &flags = ir.meta[id].decoration.decoration_flags;
13148 string res;
13149
13150 auto *var = maybe_get<SPIRVariable>(id);
13151
13152 if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
13153 res += "shared ";
13154
13155 res += to_interpolation_qualifiers(flags);
13156 if (var)
13157 res += to_storage_qualifiers_glsl(*var);
13158
13159 auto &type = expression_type(id);
13160 if (type.image.dim != DimSubpassData && type.image.sampled == 2)
13161 {
13162 if (flags.get(DecorationCoherent))
13163 res += "coherent ";
13164 if (flags.get(DecorationRestrict))
13165 res += "restrict ";
13166
13167 if (flags.get(DecorationNonWritable))
13168 res += "readonly ";
13169
13170 bool formatted_load = type.image.format == ImageFormatUnknown;
13171 if (flags.get(DecorationNonReadable))
13172 {
13173 res += "writeonly ";
13174 formatted_load = false;
13175 }
13176
13177 if (formatted_load)
13178 {
13179 if (!options.es)
13180 require_extension_internal("GL_EXT_shader_image_load_formatted");
13181 else
13182 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
13183 }
13184 }
13185
13186 res += to_precision_qualifiers_glsl(id);
13187
13188 return res;
13189}
13190
13191string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
13192{
13193 // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
13194 auto &type = expression_type(arg.id);
13195 const char *direction = "";
13196
13197 if (type.pointer)
13198 {
13199 if (arg.write_count && arg.read_count)
13200 direction = "inout ";
13201 else if (arg.write_count)
13202 direction = "out ";
13203 }
13204
13205 return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
13206}
13207
13208string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
13209{
13210 return to_unpacked_expression(var.initializer);
13211}
13212
13213string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
13214{
13215#ifndef NDEBUG
13216 auto &type = get<SPIRType>(type_id);
13217 assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction ||
13218 type.storage == StorageClassGeneric);
13219#endif
13220 uint32_t id = ir.increase_bound_by(1);
13221 ir.make_constant_null(id, type_id, false);
13222 return constant_expression(get<SPIRConstant>(id));
13223}
13224
13225bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
13226{
13227 if (type.pointer)
13228 return false;
13229
13230 if (!type.array.empty() && options.flatten_multidimensional_arrays)
13231 return false;
13232
13233 for (auto &literal : type.array_size_literal)
13234 if (!literal)
13235 return false;
13236
13237 for (auto &memb : type.member_types)
13238 if (!type_can_zero_initialize(get<SPIRType>(memb)))
13239 return false;
13240
13241 return true;
13242}
13243
13244string CompilerGLSL::variable_decl(const SPIRVariable &variable)
13245{
13246 // Ignore the pointer type since GLSL doesn't have pointers.
13247 auto &type = get_variable_data_type(variable);
13248
13249 if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer)
13250 SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
13251
13252 auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
13253
13254 if (variable.loop_variable && variable.static_expression)
13255 {
13256 uint32_t expr = variable.static_expression;
13257 if (ir.ids[expr].get_type() != TypeUndef)
13258 res += join(" = ", to_unpacked_expression(variable.static_expression));
13259 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13260 res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13261 }
13262 else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
13263 {
13264 uint32_t expr = variable.initializer;
13265 if (ir.ids[expr].get_type() != TypeUndef)
13266 res += join(" = ", to_initializer_expression(variable));
13267 else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13268 res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13269 }
13270
13271 return res;
13272}
13273
13274const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
13275{
13276 auto &flags = ir.meta[variable.self].decoration.decoration_flags;
13277 if (flags.get(DecorationRelaxedPrecision))
13278 return "mediump ";
13279 else
13280 return "highp ";
13281}
13282
13283string CompilerGLSL::pls_decl(const PlsRemap &var)
13284{
13285 auto &variable = get<SPIRVariable>(var.id);
13286
13287 SPIRType type;
13288 type.vecsize = pls_format_to_components(var.format);
13289 type.basetype = pls_format_to_basetype(var.format);
13290
13291 return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
13292 to_name(variable.self));
13293}
13294
13295uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
13296{
13297 return to_array_size_literal(type, uint32_t(type.array.size() - 1));
13298}
13299
13300uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
13301{
13302 assert(type.array.size() == type.array_size_literal.size());
13303
13304 if (type.array_size_literal[index])
13305 {
13306 return type.array[index];
13307 }
13308 else
13309 {
13310 // Use the default spec constant value.
13311 // This is the best we can do.
13312 return evaluate_constant_u32(type.array[index]);
13313 }
13314}
13315
13316string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
13317{
13318 assert(type.array.size() == type.array_size_literal.size());
13319
13320 auto &size = type.array[index];
13321 if (!type.array_size_literal[index])
13322 return to_expression(size);
13323 else if (size)
13324 return convert_to_string(size);
13325 else if (!backend.unsized_array_supported)
13326 {
13327 // For runtime-sized arrays, we can work around
13328 // lack of standard support for this by simply having
13329 // a single element array.
13330 //
13331 // Runtime length arrays must always be the last element
13332 // in an interface block.
13333 return "1";
13334 }
13335 else
13336 return "";
13337}
13338
13339string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
13340{
13341 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13342 {
13343 // We are using a wrapped pointer type, and we should not emit any array declarations here.
13344 return "";
13345 }
13346
13347 if (type.array.empty())
13348 return "";
13349
13350 if (options.flatten_multidimensional_arrays)
13351 {
13352 string res;
13353 res += "[";
13354 for (auto i = uint32_t(type.array.size()); i; i--)
13355 {
13356 res += enclose_expression(to_array_size(type, i - 1));
13357 if (i > 1)
13358 res += " * ";
13359 }
13360 res += "]";
13361 return res;
13362 }
13363 else
13364 {
13365 if (type.array.size() > 1)
13366 {
13367 if (!options.es && options.version < 430)
13368 require_extension_internal("GL_ARB_arrays_of_arrays");
13369 else if (options.es && options.version < 310)
13370 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
13371 "Try using --flatten-multidimensional-arrays or set "
13372 "options.flatten_multidimensional_arrays to true.");
13373 }
13374
13375 string res;
13376 for (auto i = uint32_t(type.array.size()); i; i--)
13377 {
13378 res += "[";
13379 res += to_array_size(type, i - 1);
13380 res += "]";
13381 }
13382 return res;
13383 }
13384}
13385
13386string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
13387{
13388 auto &imagetype = get<SPIRType>(type.image.type);
13389 string res;
13390
13391 switch (imagetype.basetype)
13392 {
13393 case SPIRType::Int:
13394 case SPIRType::Short:
13395 case SPIRType::SByte:
13396 res = "i";
13397 break;
13398 case SPIRType::UInt:
13399 case SPIRType::UShort:
13400 case SPIRType::UByte:
13401 res = "u";
13402 break;
13403 default:
13404 break;
13405 }
13406
13407 // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
13408 // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
13409
13410 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
13411 return res + "subpassInput" + (type.image.ms ? "MS" : "");
13412 else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
13413 subpass_input_is_framebuffer_fetch(id))
13414 {
13415 SPIRType sampled_type = get<SPIRType>(type.image.type);
13416 sampled_type.vecsize = 4;
13417 return type_to_glsl(sampled_type);
13418 }
13419
13420 // If we're emulating subpassInput with samplers, force sampler2D
13421 // so we don't have to specify format.
13422 if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
13423 {
13424 // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
13425 if (type.image.dim == DimBuffer && type.image.sampled == 1)
13426 res += "sampler";
13427 else
13428 res += type.image.sampled == 2 ? "image" : "texture";
13429 }
13430 else
13431 res += "sampler";
13432
13433 switch (type.image.dim)
13434 {
13435 case Dim1D:
13436 res += "1D";
13437 break;
13438 case Dim2D:
13439 res += "2D";
13440 break;
13441 case Dim3D:
13442 res += "3D";
13443 break;
13444 case DimCube:
13445 res += "Cube";
13446 break;
13447 case DimRect:
13448 if (options.es)
13449 SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
13450
13451 if (is_legacy_desktop())
13452 require_extension_internal("GL_ARB_texture_rectangle");
13453
13454 res += "2DRect";
13455 break;
13456
13457 case DimBuffer:
13458 if (options.es && options.version < 320)
13459 require_extension_internal("GL_EXT_texture_buffer");
13460 else if (!options.es && options.version < 300)
13461 require_extension_internal("GL_EXT_texture_buffer_object");
13462 res += "Buffer";
13463 break;
13464
13465 case DimSubpassData:
13466 res += "2D";
13467 break;
13468 default:
13469 SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
13470 }
13471
13472 if (type.image.ms)
13473 res += "MS";
13474 if (type.image.arrayed)
13475 {
13476 if (is_legacy_desktop())
13477 require_extension_internal("GL_EXT_texture_array");
13478 res += "Array";
13479 }
13480
13481 // "Shadow" state in GLSL only exists for samplers and combined image samplers.
13482 if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) &&
13483 is_depth_image(type, id))
13484 {
13485 res += "Shadow";
13486 }
13487
13488 return res;
13489}
13490
13491string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
13492{
13493 if (backend.use_array_constructor && type.array.size() > 1)
13494 {
13495 if (options.flatten_multidimensional_arrays)
13496 SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
13497 "e.g. float[][]().");
13498 else if (!options.es && options.version < 430)
13499 require_extension_internal("GL_ARB_arrays_of_arrays");
13500 else if (options.es && options.version < 310)
13501 SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
13502 }
13503
13504 auto e = type_to_glsl(type);
13505 if (backend.use_array_constructor)
13506 {
13507 for (uint32_t i = 0; i < type.array.size(); i++)
13508 e += "[]";
13509 }
13510 return e;
13511}
13512
13513// The optional id parameter indicates the object whose type we are trying
13514// to find the description for. It is optional. Most type descriptions do not
13515// depend on a specific object's use of that type.
13516string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
13517{
13518 if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13519 {
13520 // Need to create a magic type name which compacts the entire type information.
13521 string name = type_to_glsl(get_pointee_type(type));
13522 for (size_t i = 0; i < type.array.size(); i++)
13523 {
13524 if (type.array_size_literal[i])
13525 name += join(type.array[i], "_");
13526 else
13527 name += join("id", type.array[i], "_");
13528 }
13529 name += "Pointer";
13530 return name;
13531 }
13532
13533 switch (type.basetype)
13534 {
13535 case SPIRType::Struct:
13536 // Need OpName lookup here to get a "sensible" name for a struct.
13537 if (backend.explicit_struct_type)
13538 return join("struct ", to_name(type.self));
13539 else
13540 return to_name(type.self);
13541
13542 case SPIRType::Image:
13543 case SPIRType::SampledImage:
13544 return image_type_glsl(type, id);
13545
13546 case SPIRType::Sampler:
13547 // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
13548 // this distinction into the type system.
13549 return comparison_ids.count(id) ? "samplerShadow" : "sampler";
13550
13551 case SPIRType::AccelerationStructure:
13552 return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
13553
13554 case SPIRType::RayQuery:
13555 return "rayQueryEXT";
13556
13557 case SPIRType::Void:
13558 return "void";
13559
13560 default:
13561 break;
13562 }
13563
13564 if (type.basetype == SPIRType::UInt && is_legacy())
13565 SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
13566
13567 if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
13568 {
13569 switch (type.basetype)
13570 {
13571 case SPIRType::Boolean:
13572 return "bool";
13573 case SPIRType::SByte:
13574 return backend.basic_int8_type;
13575 case SPIRType::UByte:
13576 return backend.basic_uint8_type;
13577 case SPIRType::Short:
13578 return backend.basic_int16_type;
13579 case SPIRType::UShort:
13580 return backend.basic_uint16_type;
13581 case SPIRType::Int:
13582 return backend.basic_int_type;
13583 case SPIRType::UInt:
13584 return backend.basic_uint_type;
13585 case SPIRType::AtomicCounter:
13586 return "atomic_uint";
13587 case SPIRType::Half:
13588 return "float16_t";
13589 case SPIRType::Float:
13590 return "float";
13591 case SPIRType::Double:
13592 return "double";
13593 case SPIRType::Int64:
13594 return "int64_t";
13595 case SPIRType::UInt64:
13596 return "uint64_t";
13597 default:
13598 return "???";
13599 }
13600 }
13601 else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
13602 {
13603 switch (type.basetype)
13604 {
13605 case SPIRType::Boolean:
13606 return join("bvec", type.vecsize);
13607 case SPIRType::SByte:
13608 return join("i8vec", type.vecsize);
13609 case SPIRType::UByte:
13610 return join("u8vec", type.vecsize);
13611 case SPIRType::Short:
13612 return join("i16vec", type.vecsize);
13613 case SPIRType::UShort:
13614 return join("u16vec", type.vecsize);
13615 case SPIRType::Int:
13616 return join("ivec", type.vecsize);
13617 case SPIRType::UInt:
13618 return join("uvec", type.vecsize);
13619 case SPIRType::Half:
13620 return join("f16vec", type.vecsize);
13621 case SPIRType::Float:
13622 return join("vec", type.vecsize);
13623 case SPIRType::Double:
13624 return join("dvec", type.vecsize);
13625 case SPIRType::Int64:
13626 return join("i64vec", type.vecsize);
13627 case SPIRType::UInt64:
13628 return join("u64vec", type.vecsize);
13629 default:
13630 return "???";
13631 }
13632 }
13633 else if (type.vecsize == type.columns) // Simple Matrix builtin
13634 {
13635 switch (type.basetype)
13636 {
13637 case SPIRType::Boolean:
13638 return join("bmat", type.vecsize);
13639 case SPIRType::Int:
13640 return join("imat", type.vecsize);
13641 case SPIRType::UInt:
13642 return join("umat", type.vecsize);
13643 case SPIRType::Half:
13644 return join("f16mat", type.vecsize);
13645 case SPIRType::Float:
13646 return join("mat", type.vecsize);
13647 case SPIRType::Double:
13648 return join("dmat", type.vecsize);
13649 // Matrix types not supported for int64/uint64.
13650 default:
13651 return "???";
13652 }
13653 }
13654 else
13655 {
13656 switch (type.basetype)
13657 {
13658 case SPIRType::Boolean:
13659 return join("bmat", type.columns, "x", type.vecsize);
13660 case SPIRType::Int:
13661 return join("imat", type.columns, "x", type.vecsize);
13662 case SPIRType::UInt:
13663 return join("umat", type.columns, "x", type.vecsize);
13664 case SPIRType::Half:
13665 return join("f16mat", type.columns, "x", type.vecsize);
13666 case SPIRType::Float:
13667 return join("mat", type.columns, "x", type.vecsize);
13668 case SPIRType::Double:
13669 return join("dmat", type.columns, "x", type.vecsize);
13670 // Matrix types not supported for int64/uint64.
13671 default:
13672 return "???";
13673 }
13674 }
13675}
13676
13677void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
13678 const unordered_set<string> &variables_secondary, string &name)
13679{
13680 if (name.empty())
13681 return;
13682
13683 ParsedIR::sanitize_underscores(name);
13684 if (ParsedIR::is_globally_reserved_identifier(name, true))
13685 {
13686 name.clear();
13687 return;
13688 }
13689
13690 update_name_cache(variables_primary, variables_secondary, name);
13691}
13692
13693void CompilerGLSL::add_local_variable_name(uint32_t id)
13694{
13695 add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias);
13696}
13697
13698void CompilerGLSL::add_resource_name(uint32_t id)
13699{
13700 add_variable(resource_names, block_names, ir.meta[id].decoration.alias);
13701}
13702
13703void CompilerGLSL::add_header_line(const std::string &line)
13704{
13705 header_lines.push_back(line);
13706}
13707
13708bool CompilerGLSL::has_extension(const std::string &ext) const
13709{
13710 auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
13711 return itr != end(forced_extensions);
13712}
13713
13714void CompilerGLSL::require_extension(const std::string &ext)
13715{
13716 if (!has_extension(ext))
13717 forced_extensions.push_back(ext);
13718}
13719
13720void CompilerGLSL::require_extension_internal(const string &ext)
13721{
13722 if (backend.supports_extensions && !has_extension(ext))
13723 {
13724 forced_extensions.push_back(ext);
13725 force_recompile();
13726 }
13727}
13728
13729void CompilerGLSL::flatten_buffer_block(VariableID id)
13730{
13731 auto &var = get<SPIRVariable>(id);
13732 auto &type = get<SPIRType>(var.basetype);
13733 auto name = to_name(type.self, false);
13734 auto &flags = ir.meta[type.self].decoration.decoration_flags;
13735
13736 if (!type.array.empty())
13737 SPIRV_CROSS_THROW(name + " is an array of UBOs.");
13738 if (type.basetype != SPIRType::Struct)
13739 SPIRV_CROSS_THROW(name + " is not a struct.");
13740 if (!flags.get(DecorationBlock))
13741 SPIRV_CROSS_THROW(name + " is not a block.");
13742 if (type.member_types.empty())
13743 SPIRV_CROSS_THROW(name + " is an empty struct.");
13744
13745 flattened_buffer_blocks.insert(id);
13746}
13747
13748bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const
13749{
13750 return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
13751}
13752
13753bool CompilerGLSL::check_atomic_image(uint32_t id)
13754{
13755 auto &type = expression_type(id);
13756 if (type.storage == StorageClassImage)
13757 {
13758 if (options.es && options.version < 320)
13759 require_extension_internal("GL_OES_shader_image_atomic");
13760
13761 auto *var = maybe_get_backing_variable(id);
13762 if (var)
13763 {
13764 auto &flags = ir.meta[var->self].decoration.decoration_flags;
13765 if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
13766 {
13767 flags.clear(DecorationNonWritable);
13768 flags.clear(DecorationNonReadable);
13769 force_recompile();
13770 }
13771 }
13772 return true;
13773 }
13774 else
13775 return false;
13776}
13777
13778void CompilerGLSL::add_function_overload(const SPIRFunction &func)
13779{
13780 Hasher hasher;
13781 for (auto &arg : func.arguments)
13782 {
13783 // Parameters can vary with pointer type or not,
13784 // but that will not change the signature in GLSL/HLSL,
13785 // so strip the pointer type before hashing.
13786 uint32_t type_id = get_pointee_type_id(arg.type);
13787 auto &type = get<SPIRType>(type_id);
13788
13789 if (!combined_image_samplers.empty())
13790 {
13791 // If we have combined image samplers, we cannot really trust the image and sampler arguments
13792 // we pass down to callees, because they may be shuffled around.
13793 // Ignore these arguments, to make sure that functions need to differ in some other way
13794 // to be considered different overloads.
13795 if (type.basetype == SPIRType::SampledImage ||
13796 (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler)
13797 {
13798 continue;
13799 }
13800 }
13801
13802 hasher.u32(type_id);
13803 }
13804 uint64_t types_hash = hasher.get();
13805
13806 auto function_name = to_name(func.self);
13807 auto itr = function_overloads.find(function_name);
13808 if (itr != end(function_overloads))
13809 {
13810 // There exists a function with this name already.
13811 auto &overloads = itr->second;
13812 if (overloads.count(types_hash) != 0)
13813 {
13814 // Overload conflict, assign a new name.
13815 add_resource_name(func.self);
13816 function_overloads[to_name(func.self)].insert(types_hash);
13817 }
13818 else
13819 {
13820 // Can reuse the name.
13821 overloads.insert(types_hash);
13822 }
13823 }
13824 else
13825 {
13826 // First time we see this function name.
13827 add_resource_name(func.self);
13828 function_overloads[to_name(func.self)].insert(types_hash);
13829 }
13830}
13831
13832void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
13833{
13834 if (func.self != ir.default_entry_point)
13835 add_function_overload(func);
13836
13837 // Avoid shadow declarations.
13838 local_variable_names = resource_names;
13839
13840 string decl;
13841
13842 auto &type = get<SPIRType>(func.return_type);
13843 decl += flags_to_qualifiers_glsl(type, return_flags);
13844 decl += type_to_glsl(type);
13845 decl += type_to_array_glsl(type);
13846 decl += " ";
13847
13848 if (func.self == ir.default_entry_point)
13849 {
13850 // If we need complex fallback in GLSL, we just wrap main() in a function
13851 // and interlock the entire shader ...
13852 if (interlocked_is_complex)
13853 decl += "spvMainInterlockedBody";
13854 else
13855 decl += "main";
13856
13857 processing_entry_point = true;
13858 }
13859 else
13860 decl += to_name(func.self);
13861
13862 decl += "(";
13863 SmallVector<string> arglist;
13864 for (auto &arg : func.arguments)
13865 {
13866 // Do not pass in separate images or samplers if we're remapping
13867 // to combined image samplers.
13868 if (skip_argument(arg.id))
13869 continue;
13870
13871 // Might change the variable name if it already exists in this function.
13872 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13873 // to use same name for variables.
13874 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13875 add_local_variable_name(arg.id);
13876
13877 arglist.push_back(argument_decl(arg));
13878
13879 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13880 auto *var = maybe_get<SPIRVariable>(arg.id);
13881 if (var)
13882 var->parameter = &arg;
13883 }
13884
13885 for (auto &arg : func.shadow_arguments)
13886 {
13887 // Might change the variable name if it already exists in this function.
13888 // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13889 // to use same name for variables.
13890 // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13891 add_local_variable_name(arg.id);
13892
13893 arglist.push_back(argument_decl(arg));
13894
13895 // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13896 auto *var = maybe_get<SPIRVariable>(arg.id);
13897 if (var)
13898 var->parameter = &arg;
13899 }
13900
13901 decl += merge(arglist);
13902 decl += ")";
13903 statement(decl);
13904}
13905
13906void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
13907{
13908 // Avoid potential cycles.
13909 if (func.active)
13910 return;
13911 func.active = true;
13912
13913 // If we depend on a function, emit that function before we emit our own function.
13914 for (auto block : func.blocks)
13915 {
13916 auto &b = get<SPIRBlock>(block);
13917 for (auto &i : b.ops)
13918 {
13919 auto ops = stream(i);
13920 auto op = static_cast<Op>(i.op);
13921
13922 if (op == OpFunctionCall)
13923 {
13924 // Recursively emit functions which are called.
13925 uint32_t id = ops[2];
13926 emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags);
13927 }
13928 }
13929 }
13930
13931 if (func.entry_line.file_id != 0)
13932 emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
13933 emit_function_prototype(func, return_flags);
13934 begin_scope();
13935
13936 if (func.self == ir.default_entry_point)
13937 emit_entry_point_declarations();
13938
13939 current_function = &func;
13940 auto &entry_block = get<SPIRBlock>(func.entry_block);
13941
13942 sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
13943 for (auto &array : func.constant_arrays_needed_on_stack)
13944 {
13945 auto &c = get<SPIRConstant>(array);
13946 auto &type = get<SPIRType>(c.constant_type);
13947 statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
13948 }
13949
13950 for (auto &v : func.local_variables)
13951 {
13952 auto &var = get<SPIRVariable>(v);
13953 var.deferred_declaration = false;
13954
13955 if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
13956 {
13957 // Special variable type which cannot have initializer,
13958 // need to be declared as standalone variables.
13959 // Comes from MSL which can push global variables as local variables in main function.
13960 add_local_variable_name(var.self);
13961 statement(variable_decl(var), ";");
13962 var.deferred_declaration = false;
13963 }
13964 else if (var.storage == StorageClassPrivate)
13965 {
13966 // These variables will not have had their CFG usage analyzed, so move it to the entry block.
13967 // Comes from MSL which can push global variables as local variables in main function.
13968 // We could just declare them right now, but we would miss out on an important initialization case which is
13969 // LUT declaration in MSL.
13970 // If we don't declare the variable when it is assigned we're forced to go through a helper function
13971 // which copies elements one by one.
13972 add_local_variable_name(var.self);
13973
13974 if (var.initializer)
13975 {
13976 statement(variable_decl(var), ";");
13977 var.deferred_declaration = false;
13978 }
13979 else
13980 {
13981 auto &dominated = entry_block.dominated_variables;
13982 if (find(begin(dominated), end(dominated), var.self) == end(dominated))
13983 entry_block.dominated_variables.push_back(var.self);
13984 var.deferred_declaration = true;
13985 }
13986 }
13987 else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
13988 {
13989 // No need to declare this variable, it has a static expression.
13990 var.deferred_declaration = false;
13991 }
13992 else if (expression_is_lvalue(v))
13993 {
13994 add_local_variable_name(var.self);
13995
13996 // Loop variables should never be declared early, they are explicitly emitted in a loop.
13997 if (var.initializer && !var.loop_variable)
13998 statement(variable_decl_function_local(var), ";");
13999 else
14000 {
14001 // Don't declare variable until first use to declutter the GLSL output quite a lot.
14002 // If we don't touch the variable before first branch,
14003 // declare it then since we need variable declaration to be in top scope.
14004 var.deferred_declaration = true;
14005 }
14006 }
14007 else
14008 {
14009 // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
14010 // For these types (non-lvalue), we enforce forwarding through a shadowed variable.
14011 // This means that when we OpStore to these variables, we just write in the expression ID directly.
14012 // This breaks any kind of branching, since the variable must be statically assigned.
14013 // Branching on samplers and images would be pretty much impossible to fake in GLSL.
14014 var.statically_assigned = true;
14015 }
14016
14017 var.loop_variable_enable = false;
14018
14019 // Loop variables are never declared outside their for-loop, so block any implicit declaration.
14020 if (var.loop_variable)
14021 var.deferred_declaration = false;
14022 }
14023
14024 // Enforce declaration order for regression testing purposes.
14025 for (auto &block_id : func.blocks)
14026 {
14027 auto &block = get<SPIRBlock>(block_id);
14028 sort(begin(block.dominated_variables), end(block.dominated_variables));
14029 }
14030
14031 for (auto &line : current_function->fixup_hooks_in)
14032 line();
14033
14034 emit_block_chain(entry_block);
14035
14036 end_scope();
14037 processing_entry_point = false;
14038 statement("");
14039
14040 // Make sure deferred declaration state for local variables is cleared when we are done with function.
14041 // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
14042 for (auto &v : func.local_variables)
14043 {
14044 auto &var = get<SPIRVariable>(v);
14045 var.deferred_declaration = false;
14046 }
14047}
14048
14049void CompilerGLSL::emit_fixup()
14050{
14051 if (is_vertex_like_shader())
14052 {
14053 if (options.vertex.fixup_clipspace)
14054 {
14055 const char *suffix = backend.float_literal_suffix ? "f" : "";
14056 statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
14057 }
14058
14059 if (options.vertex.flip_vert_y)
14060 statement("gl_Position.y = -gl_Position.y;");
14061 }
14062}
14063
14064void CompilerGLSL::flush_phi(BlockID from, BlockID to)
14065{
14066 auto &child = get<SPIRBlock>(to);
14067 if (child.ignore_phi_from_block == from)
14068 return;
14069
14070 unordered_set<uint32_t> temporary_phi_variables;
14071
14072 for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
14073 {
14074 auto &phi = *itr;
14075
14076 if (phi.parent == from)
14077 {
14078 auto &var = get<SPIRVariable>(phi.function_variable);
14079
14080 // A Phi variable might be a loop variable, so flush to static expression.
14081 if (var.loop_variable && !var.loop_variable_enable)
14082 var.static_expression = phi.local_variable;
14083 else
14084 {
14085 flush_variable_declaration(phi.function_variable);
14086
14087 // Check if we are going to write to a Phi variable that another statement will read from
14088 // as part of another Phi node in our target block.
14089 // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
14090 // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
14091 bool need_saved_temporary =
14092 find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
14093 return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from;
14094 }) != end(child.phi_variables);
14095
14096 if (need_saved_temporary)
14097 {
14098 // Need to make sure we declare the phi variable with a copy at the right scope.
14099 // We cannot safely declare a temporary here since we might be inside a continue block.
14100 if (!var.allocate_temporary_copy)
14101 {
14102 var.allocate_temporary_copy = true;
14103 force_recompile();
14104 }
14105 statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
14106 temporary_phi_variables.insert(phi.function_variable);
14107 }
14108
14109 // This might be called in continue block, so make sure we
14110 // use this to emit ESSL 1.0 compliant increments/decrements.
14111 auto lhs = to_expression(phi.function_variable);
14112
14113 string rhs;
14114 if (temporary_phi_variables.count(phi.local_variable))
14115 rhs = join("_", phi.local_variable, "_copy");
14116 else
14117 rhs = to_pointer_expression(phi.local_variable);
14118
14119 if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
14120 statement(lhs, " = ", rhs, ";");
14121 }
14122
14123 register_write(phi.function_variable);
14124 }
14125 }
14126}
14127
14128void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
14129{
14130 auto &to_block = get<SPIRBlock>(to);
14131 if (from == to)
14132 return;
14133
14134 assert(is_continue(to));
14135 if (to_block.complex_continue)
14136 {
14137 // Just emit the whole block chain as is.
14138 auto usage_counts = expression_usage_counts;
14139
14140 emit_block_chain(to_block);
14141
14142 // Expression usage counts are moot after returning from the continue block.
14143 expression_usage_counts = usage_counts;
14144 }
14145 else
14146 {
14147 auto &from_block = get<SPIRBlock>(from);
14148 bool outside_control_flow = false;
14149 uint32_t loop_dominator = 0;
14150
14151 // FIXME: Refactor this to not use the old loop_dominator tracking.
14152 if (from_block.merge_block)
14153 {
14154 // If we are a loop header, we don't set the loop dominator,
14155 // so just use "self" here.
14156 loop_dominator = from;
14157 }
14158 else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator))
14159 {
14160 loop_dominator = from_block.loop_dominator;
14161 }
14162
14163 if (loop_dominator != 0)
14164 {
14165 auto &cfg = get_cfg_for_current_function();
14166
14167 // For non-complex continue blocks, we implicitly branch to the continue block
14168 // by having the continue block be part of the loop header in for (; ; continue-block).
14169 outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
14170 }
14171
14172 // Some simplification for for-loops. We always end up with a useless continue;
14173 // statement since we branch to a loop block.
14174 // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
14175 // we can avoid writing out an explicit continue statement.
14176 // Similar optimization to return statements if we know we're outside flow control.
14177 if (!outside_control_flow)
14178 statement("continue;");
14179 }
14180}
14181
14182void CompilerGLSL::branch(BlockID from, BlockID to)
14183{
14184 flush_phi(from, to);
14185 flush_control_dependent_expressions(from);
14186
14187 bool to_is_continue = is_continue(to);
14188
14189 // This is only a continue if we branch to our loop dominator.
14190 if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to)
14191 {
14192 // This can happen if we had a complex continue block which was emitted.
14193 // Once the continue block tries to branch to the loop header, just emit continue;
14194 // and end the chain here.
14195 statement("continue;");
14196 }
14197 else if (from != to && is_break(to))
14198 {
14199 // We cannot break to ourselves, so check explicitly for from != to.
14200 // This case can trigger if a loop header is all three of these things:
14201 // - Continue block
14202 // - Loop header
14203 // - Break merge target all at once ...
14204
14205 // Very dirty workaround.
14206 // Switch constructs are able to break, but they cannot break out of a loop at the same time.
14207 // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
14208 // write to the ladder here, and defer the break.
14209 // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
14210 if (current_emitting_switch && is_loop_break(to) &&
14211 current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) &&
14212 get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
14213 {
14214 if (!current_emitting_switch->need_ladder_break)
14215 {
14216 force_recompile();
14217 current_emitting_switch->need_ladder_break = true;
14218 }
14219
14220 statement("_", current_emitting_switch->self, "_ladder_break = true;");
14221 }
14222 statement("break;");
14223 }
14224 else if (to_is_continue || from == to)
14225 {
14226 // For from == to case can happen for a do-while loop which branches into itself.
14227 // We don't mark these cases as continue blocks, but the only possible way to branch into
14228 // ourselves is through means of continue blocks.
14229
14230 // If we are merging to a continue block, there is no need to emit the block chain for continue here.
14231 // We can branch to the continue block after we merge execution.
14232
14233 // Here we make use of structured control flow rules from spec:
14234 // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
14235 // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
14236 // If we are branching to a merge block, we must be inside a construct which dominates the merge block.
14237 auto &block_meta = ir.block_meta[to];
14238 bool branching_to_merge =
14239 (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT |
14240 ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0;
14241 if (!to_is_continue || !branching_to_merge)
14242 branch_to_continue(from, to);
14243 }
14244 else if (!is_conditional(to))
14245 emit_block_chain(get<SPIRBlock>(to));
14246
14247 // It is important that we check for break before continue.
14248 // A block might serve two purposes, a break block for the inner scope, and
14249 // a continue block in the outer scope.
14250 // Inner scope always takes precedence.
14251}
14252
14253void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
14254{
14255 auto &from_block = get<SPIRBlock>(from);
14256 BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0);
14257
14258 // If we branch directly to our selection merge target, we don't need a code path.
14259 bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block);
14260 bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block);
14261
14262 if (!true_block_needs_code && !false_block_needs_code)
14263 return;
14264
14265 // We might have a loop merge here. Only consider selection flattening constructs.
14266 // Loop hints are handled explicitly elsewhere.
14267 if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten)
14268 emit_block_hints(from_block);
14269
14270 if (true_block_needs_code)
14271 {
14272 statement("if (", to_expression(cond), ")");
14273 begin_scope();
14274 branch(from, true_block);
14275 end_scope();
14276
14277 if (false_block_needs_code)
14278 {
14279 statement("else");
14280 begin_scope();
14281 branch(from, false_block);
14282 end_scope();
14283 }
14284 }
14285 else if (false_block_needs_code)
14286 {
14287 // Only need false path, use negative conditional.
14288 statement("if (!", to_enclosed_expression(cond), ")");
14289 begin_scope();
14290 branch(from, false_block);
14291 end_scope();
14292 }
14293}
14294
14295// FIXME: This currently cannot handle complex continue blocks
14296// as in do-while.
14297// This should be seen as a "trivial" continue block.
14298string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
14299{
14300 auto *block = &get<SPIRBlock>(continue_block);
14301
14302 // While emitting the continue block, declare_temporary will check this
14303 // if we have to emit temporaries.
14304 current_continue_block = block;
14305
14306 SmallVector<string> statements;
14307
14308 // Capture all statements into our list.
14309 auto *old = redirect_statement;
14310 redirect_statement = &statements;
14311
14312 // Stamp out all blocks one after each other.
14313 while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0)
14314 {
14315 // Write out all instructions we have in this block.
14316 emit_block_instructions(*block);
14317
14318 // For plain branchless for/while continue blocks.
14319 if (block->next_block)
14320 {
14321 flush_phi(continue_block, block->next_block);
14322 block = &get<SPIRBlock>(block->next_block);
14323 }
14324 // For do while blocks. The last block will be a select block.
14325 else if (block->true_block && follow_true_block)
14326 {
14327 flush_phi(continue_block, block->true_block);
14328 block = &get<SPIRBlock>(block->true_block);
14329 }
14330 else if (block->false_block && follow_false_block)
14331 {
14332 flush_phi(continue_block, block->false_block);
14333 block = &get<SPIRBlock>(block->false_block);
14334 }
14335 else
14336 {
14337 SPIRV_CROSS_THROW("Invalid continue block detected!");
14338 }
14339 }
14340
14341 // Restore old pointer.
14342 redirect_statement = old;
14343
14344 // Somewhat ugly, strip off the last ';' since we use ',' instead.
14345 // Ideally, we should select this behavior in statement().
14346 for (auto &s : statements)
14347 {
14348 if (!s.empty() && s.back() == ';')
14349 s.erase(s.size() - 1, 1);
14350 }
14351
14352 current_continue_block = nullptr;
14353 return merge(statements);
14354}
14355
14356void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
14357{
14358 // While loops do not take initializers, so declare all of them outside.
14359 for (auto &loop_var : block.loop_variables)
14360 {
14361 auto &var = get<SPIRVariable>(loop_var);
14362 statement(variable_decl(var), ";");
14363 }
14364}
14365
14366string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
14367{
14368 if (block.loop_variables.empty())
14369 return "";
14370
14371 bool same_types = for_loop_initializers_are_same_type(block);
14372 // We can only declare for loop initializers if all variables are of same type.
14373 // If we cannot do this, declare individual variables before the loop header.
14374
14375 // We might have a loop variable candidate which was not assigned to for some reason.
14376 uint32_t missing_initializers = 0;
14377 for (auto &variable : block.loop_variables)
14378 {
14379 uint32_t expr = get<SPIRVariable>(variable).static_expression;
14380
14381 // Sometimes loop variables are initialized with OpUndef, but we can just declare
14382 // a plain variable without initializer in this case.
14383 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
14384 missing_initializers++;
14385 }
14386
14387 if (block.loop_variables.size() == 1 && missing_initializers == 0)
14388 {
14389 return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
14390 }
14391 else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
14392 {
14393 for (auto &loop_var : block.loop_variables)
14394 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14395 return "";
14396 }
14397 else
14398 {
14399 // We have a mix of loop variables, either ones with a clear initializer, or ones without.
14400 // Separate the two streams.
14401 string expr;
14402
14403 for (auto &loop_var : block.loop_variables)
14404 {
14405 uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
14406 if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef)
14407 {
14408 statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14409 }
14410 else
14411 {
14412 auto &var = get<SPIRVariable>(loop_var);
14413 auto &type = get_variable_data_type(var);
14414 if (expr.empty())
14415 {
14416 // For loop initializers are of the form <type id = value, id = value, id = value, etc ...
14417 expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
14418 }
14419 else
14420 {
14421 expr += ", ";
14422 // In MSL, being based on C++, the asterisk marking a pointer
14423 // binds to the identifier, not the type.
14424 if (type.pointer)
14425 expr += "* ";
14426 }
14427
14428 expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
14429 }
14430 }
14431 return expr;
14432 }
14433}
14434
14435bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
14436{
14437 if (block.loop_variables.size() <= 1)
14438 return true;
14439
14440 uint32_t expected = 0;
14441 Bitset expected_flags;
14442 for (auto &var : block.loop_variables)
14443 {
14444 // Don't care about uninitialized variables as they will not be part of the initializers.
14445 uint32_t expr = get<SPIRVariable>(var).static_expression;
14446 if (expr == 0 || ir.ids[expr].get_type() == TypeUndef)
14447 continue;
14448
14449 if (expected == 0)
14450 {
14451 expected = get<SPIRVariable>(var).basetype;
14452 expected_flags = get_decoration_bitset(var);
14453 }
14454 else if (expected != get<SPIRVariable>(var).basetype)
14455 return false;
14456
14457 // Precision flags and things like that must also match.
14458 if (expected_flags != get_decoration_bitset(var))
14459 return false;
14460 }
14461
14462 return true;
14463}
14464
14465bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
14466{
14467 SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14468
14469 if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
14470 {
14471 uint32_t current_count = statement_count;
14472 // If we're trying to create a true for loop,
14473 // we need to make sure that all opcodes before branch statement do not actually emit any code.
14474 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14475 emit_block_instructions(block);
14476
14477 bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
14478
14479 // This can work! We only did trivial things which could be forwarded in block body!
14480 if (current_count == statement_count && condition_is_temporary)
14481 {
14482 switch (continue_type)
14483 {
14484 case SPIRBlock::ForLoop:
14485 {
14486 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14487 flush_undeclared_variables(block);
14488
14489 // Important that we do this in this order because
14490 // emitting the continue block can invalidate the condition expression.
14491 auto initializer = emit_for_loop_initializers(block);
14492 auto condition = to_expression(block.condition);
14493
14494 // Condition might have to be inverted.
14495 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14496 condition = join("!", enclose_expression(condition));
14497
14498 emit_block_hints(block);
14499 if (method != SPIRBlock::MergeToSelectContinueForLoop)
14500 {
14501 auto continue_block = emit_continue_block(block.continue_block, false, false);
14502 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14503 }
14504 else
14505 statement("for (", initializer, "; ", condition, "; )");
14506 break;
14507 }
14508
14509 case SPIRBlock::WhileLoop:
14510 {
14511 // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
14512 flush_undeclared_variables(block);
14513 emit_while_loop_initializers(block);
14514 emit_block_hints(block);
14515
14516 auto condition = to_expression(block.condition);
14517 // Condition might have to be inverted.
14518 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14519 condition = join("!", enclose_expression(condition));
14520
14521 statement("while (", condition, ")");
14522 break;
14523 }
14524
14525 default:
14526 block.disable_block_optimization = true;
14527 force_recompile();
14528 begin_scope(); // We'll see an end_scope() later.
14529 return false;
14530 }
14531
14532 begin_scope();
14533 return true;
14534 }
14535 else
14536 {
14537 block.disable_block_optimization = true;
14538 force_recompile();
14539 begin_scope(); // We'll see an end_scope() later.
14540 return false;
14541 }
14542 }
14543 else if (method == SPIRBlock::MergeToDirectForLoop)
14544 {
14545 auto &child = get<SPIRBlock>(block.next_block);
14546
14547 // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14548 flush_undeclared_variables(child);
14549
14550 uint32_t current_count = statement_count;
14551
14552 // If we're trying to create a true for loop,
14553 // we need to make sure that all opcodes before branch statement do not actually emit any code.
14554 // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14555 emit_block_instructions(child);
14556
14557 bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
14558
14559 if (current_count == statement_count && condition_is_temporary)
14560 {
14561 uint32_t target_block = child.true_block;
14562
14563 switch (continue_type)
14564 {
14565 case SPIRBlock::ForLoop:
14566 {
14567 // Important that we do this in this order because
14568 // emitting the continue block can invalidate the condition expression.
14569 auto initializer = emit_for_loop_initializers(block);
14570 auto condition = to_expression(child.condition);
14571
14572 // Condition might have to be inverted.
14573 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14574 {
14575 condition = join("!", enclose_expression(condition));
14576 target_block = child.false_block;
14577 }
14578
14579 auto continue_block = emit_continue_block(block.continue_block, false, false);
14580 emit_block_hints(block);
14581 statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14582 break;
14583 }
14584
14585 case SPIRBlock::WhileLoop:
14586 {
14587 emit_while_loop_initializers(block);
14588 emit_block_hints(block);
14589
14590 auto condition = to_expression(child.condition);
14591 // Condition might have to be inverted.
14592 if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14593 {
14594 condition = join("!", enclose_expression(condition));
14595 target_block = child.false_block;
14596 }
14597
14598 statement("while (", condition, ")");
14599 break;
14600 }
14601
14602 default:
14603 block.disable_block_optimization = true;
14604 force_recompile();
14605 begin_scope(); // We'll see an end_scope() later.
14606 return false;
14607 }
14608
14609 begin_scope();
14610 branch(child.self, target_block);
14611 return true;
14612 }
14613 else
14614 {
14615 block.disable_block_optimization = true;
14616 force_recompile();
14617 begin_scope(); // We'll see an end_scope() later.
14618 return false;
14619 }
14620 }
14621 else
14622 return false;
14623}
14624
14625void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
14626{
14627 for (auto &v : block.dominated_variables)
14628 flush_variable_declaration(v);
14629}
14630
14631void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
14632{
14633 // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
14634 // Need to sort these to ensure that reference output is stable.
14635 sort(begin(temporaries), end(temporaries),
14636 [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
14637
14638 for (auto &tmp : temporaries)
14639 {
14640 add_local_variable_name(tmp.second);
14641 auto &flags = ir.meta[tmp.second].decoration.decoration_flags;
14642 auto &type = get<SPIRType>(tmp.first);
14643
14644 // Not all targets support pointer literals, so don't bother with that case.
14645 string initializer;
14646 if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
14647 initializer = join(" = ", to_zero_initialized_expression(tmp.first));
14648
14649 statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
14650
14651 hoisted_temporaries.insert(tmp.second);
14652 forced_temporaries.insert(tmp.second);
14653
14654 // The temporary might be read from before it's assigned, set up the expression now.
14655 set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
14656 }
14657}
14658
14659void CompilerGLSL::emit_block_chain(SPIRBlock &block)
14660{
14661 bool select_branch_to_true_block = false;
14662 bool select_branch_to_false_block = false;
14663 bool skip_direct_branch = false;
14664 bool emitted_loop_header_variables = false;
14665 bool force_complex_continue_block = false;
14666 ValueSaver<uint32_t> loop_level_saver(current_loop_level);
14667
14668 if (block.merge == SPIRBlock::MergeLoop)
14669 add_loop_level();
14670
14671 emit_hoisted_temporaries(block.declare_temporary);
14672
14673 SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
14674 if (block.continue_block)
14675 {
14676 continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14677 // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
14678 if (continue_type == SPIRBlock::ComplexLoop)
14679 block.complex_continue = true;
14680 }
14681
14682 // If we have loop variables, stop masking out access to the variable now.
14683 for (auto var_id : block.loop_variables)
14684 {
14685 auto &var = get<SPIRVariable>(var_id);
14686 var.loop_variable_enable = true;
14687 // We're not going to declare the variable directly, so emit a copy here.
14688 emit_variable_temporary_copies(var);
14689 }
14690
14691 // Remember deferred declaration state. We will restore it before returning.
14692 SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size());
14693 for (size_t i = 0; i < block.dominated_variables.size(); i++)
14694 {
14695 uint32_t var_id = block.dominated_variables[i];
14696 auto &var = get<SPIRVariable>(var_id);
14697 rearm_dominated_variables[i] = var.deferred_declaration;
14698 }
14699
14700 // This is the method often used by spirv-opt to implement loops.
14701 // The loop header goes straight into the continue block.
14702 // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
14703 // it *MUST* be used in the continue block. This loop method will not work.
14704 if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
14705 {
14706 flush_undeclared_variables(block);
14707 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
14708 {
14709 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14710 select_branch_to_false_block = true;
14711 else
14712 select_branch_to_true_block = true;
14713
14714 emitted_loop_header_variables = true;
14715 force_complex_continue_block = true;
14716 }
14717 }
14718 // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
14719 else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
14720 {
14721 flush_undeclared_variables(block);
14722 if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
14723 {
14724 // The body of while, is actually just the true (or false) block, so always branch there unconditionally.
14725 if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14726 select_branch_to_false_block = true;
14727 else
14728 select_branch_to_true_block = true;
14729
14730 emitted_loop_header_variables = true;
14731 }
14732 }
14733 // This is the newer loop behavior in glslang which branches from Loop header directly to
14734 // a new block, which in turn has a OpBranchSelection without a selection merge.
14735 else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
14736 {
14737 flush_undeclared_variables(block);
14738 if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
14739 {
14740 skip_direct_branch = true;
14741 emitted_loop_header_variables = true;
14742 }
14743 }
14744 else if (continue_type == SPIRBlock::DoWhileLoop)
14745 {
14746 flush_undeclared_variables(block);
14747 emit_while_loop_initializers(block);
14748 emitted_loop_header_variables = true;
14749 // We have some temporaries where the loop header is the dominator.
14750 // We risk a case where we have code like:
14751 // for (;;) { create-temporary; break; } consume-temporary;
14752 // so force-declare temporaries here.
14753 emit_hoisted_temporaries(block.potential_declare_temporary);
14754 statement("do");
14755 begin_scope();
14756
14757 emit_block_instructions(block);
14758 }
14759 else if (block.merge == SPIRBlock::MergeLoop)
14760 {
14761 flush_undeclared_variables(block);
14762 emit_while_loop_initializers(block);
14763 emitted_loop_header_variables = true;
14764
14765 // We have a generic loop without any distinguishable pattern like for, while or do while.
14766 get<SPIRBlock>(block.continue_block).complex_continue = true;
14767 continue_type = SPIRBlock::ComplexLoop;
14768
14769 // We have some temporaries where the loop header is the dominator.
14770 // We risk a case where we have code like:
14771 // for (;;) { create-temporary; break; } consume-temporary;
14772 // so force-declare temporaries here.
14773 emit_hoisted_temporaries(block.potential_declare_temporary);
14774 emit_block_hints(block);
14775 statement("for (;;)");
14776 begin_scope();
14777
14778 emit_block_instructions(block);
14779 }
14780 else
14781 {
14782 emit_block_instructions(block);
14783 }
14784
14785 // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
14786 // as writes to said loop variables might have been masked out, we need a recompile.
14787 if (!emitted_loop_header_variables && !block.loop_variables.empty())
14788 {
14789 force_recompile_guarantee_forward_progress();
14790 for (auto var : block.loop_variables)
14791 get<SPIRVariable>(var).loop_variable = false;
14792 block.loop_variables.clear();
14793 }
14794
14795 flush_undeclared_variables(block);
14796 bool emit_next_block = true;
14797
14798 // Handle end of block.
14799 switch (block.terminator)
14800 {
14801 case SPIRBlock::Direct:
14802 // True when emitting complex continue block.
14803 if (block.loop_dominator == block.next_block)
14804 {
14805 branch(block.self, block.next_block);
14806 emit_next_block = false;
14807 }
14808 // True if MergeToDirectForLoop succeeded.
14809 else if (skip_direct_branch)
14810 emit_next_block = false;
14811 else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
14812 {
14813 branch(block.self, block.next_block);
14814 emit_next_block = false;
14815 }
14816 break;
14817
14818 case SPIRBlock::Select:
14819 // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
14820 if (select_branch_to_true_block)
14821 {
14822 if (force_complex_continue_block)
14823 {
14824 assert(block.true_block == block.continue_block);
14825
14826 // We're going to emit a continue block directly here, so make sure it's marked as complex.
14827 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14828 bool old_complex = complex_continue;
14829 complex_continue = true;
14830 branch(block.self, block.true_block);
14831 complex_continue = old_complex;
14832 }
14833 else
14834 branch(block.self, block.true_block);
14835 }
14836 else if (select_branch_to_false_block)
14837 {
14838 if (force_complex_continue_block)
14839 {
14840 assert(block.false_block == block.continue_block);
14841
14842 // We're going to emit a continue block directly here, so make sure it's marked as complex.
14843 auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14844 bool old_complex = complex_continue;
14845 complex_continue = true;
14846 branch(block.self, block.false_block);
14847 complex_continue = old_complex;
14848 }
14849 else
14850 branch(block.self, block.false_block);
14851 }
14852 else
14853 branch(block.self, block.condition, block.true_block, block.false_block);
14854 break;
14855
14856 case SPIRBlock::MultiSelect:
14857 {
14858 auto &type = expression_type(block.condition);
14859 bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort ||
14860 type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64;
14861
14862 if (block.merge == SPIRBlock::MergeNone)
14863 SPIRV_CROSS_THROW("Switch statement is not structured");
14864
14865 if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64))
14866 {
14867 // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
14868 SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
14869 }
14870
14871 const char *label_suffix = "";
14872 if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
14873 label_suffix = "u";
14874 else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
14875 label_suffix = "l";
14876 else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
14877 label_suffix = "ul";
14878 else if (type.basetype == SPIRType::UShort)
14879 label_suffix = backend.uint16_t_literal_suffix;
14880 else if (type.basetype == SPIRType::Short)
14881 label_suffix = backend.int16_t_literal_suffix;
14882
14883 SPIRBlock *old_emitting_switch = current_emitting_switch;
14884 current_emitting_switch = &block;
14885
14886 if (block.need_ladder_break)
14887 statement("bool _", block.self, "_ladder_break = false;");
14888
14889 // Find all unique case constructs.
14890 unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
14891 SmallVector<uint32_t> block_declaration_order;
14892 SmallVector<uint64_t> literals_to_merge;
14893
14894 // If a switch case branches to the default block for some reason, we can just remove that literal from consideration
14895 // and let the default: block handle it.
14896 // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
14897 // We only need to consider possible fallthrough if order[i] branches to order[i + 1].
14898 auto &cases = get_case_list(block);
14899 for (auto &c : cases)
14900 {
14901 if (c.block != block.next_block && c.block != block.default_block)
14902 {
14903 if (!case_constructs.count(c.block))
14904 block_declaration_order.push_back(c.block);
14905 case_constructs[c.block].push_back(c.value);
14906 }
14907 else if (c.block == block.next_block && block.default_block != block.next_block)
14908 {
14909 // We might have to flush phi inside specific case labels.
14910 // If we can piggyback on default:, do so instead.
14911 literals_to_merge.push_back(c.value);
14912 }
14913 }
14914
14915 // Empty literal array -> default.
14916 if (block.default_block != block.next_block)
14917 {
14918 auto &default_block = get<SPIRBlock>(block.default_block);
14919
14920 // We need to slide in the default block somewhere in this chain
14921 // if there are fall-through scenarios since the default is declared separately in OpSwitch.
14922 // Only consider trivial fall-through cases here.
14923 size_t num_blocks = block_declaration_order.size();
14924 bool injected_block = false;
14925
14926 for (size_t i = 0; i < num_blocks; i++)
14927 {
14928 auto &case_block = get<SPIRBlock>(block_declaration_order[i]);
14929 if (execution_is_direct_branch(case_block, default_block))
14930 {
14931 // Fallthrough to default block, we must inject the default block here.
14932 block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block);
14933 injected_block = true;
14934 break;
14935 }
14936 else if (execution_is_direct_branch(default_block, case_block))
14937 {
14938 // Default case is falling through to another case label, we must inject the default block here.
14939 block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
14940 injected_block = true;
14941 break;
14942 }
14943 }
14944
14945 // Order does not matter.
14946 if (!injected_block)
14947 block_declaration_order.push_back(block.default_block);
14948 else if (is_legacy_es())
14949 SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
14950
14951 case_constructs[block.default_block] = {};
14952 }
14953
14954 size_t num_blocks = block_declaration_order.size();
14955
14956 const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
14957 {
14958 if (is_unsigned_case)
14959 return convert_to_string(literal);
14960
14961 // For smaller cases, the literals are compiled as 32 bit wide
14962 // literals so we don't need to care for all sizes specifically.
14963 if (width <= 32)
14964 {
14965 return convert_to_string(int64_t(int32_t(literal)));
14966 }
14967
14968 return convert_to_string(int64_t(literal));
14969 };
14970
14971 const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
14972 const char *suffix) -> string {
14973 string ret;
14974 size_t count = labels.size();
14975 for (size_t i = 0; i < count; i++)
14976 {
14977 if (i)
14978 ret += " || ";
14979 ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix,
14980 count > 1 ? ")" : "");
14981 }
14982 return ret;
14983 };
14984
14985 // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
14986 // we need to flush phi nodes outside the switch block in a branch,
14987 // and skip any Phi handling inside the case label to make fall-through work as expected.
14988 // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
14989 // inside the case label if at all possible.
14990 for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++)
14991 {
14992 if (flush_phi_required(block.self, block_declaration_order[i]) &&
14993 flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i]))
14994 {
14995 uint32_t target_block = block_declaration_order[i];
14996
14997 // Make sure we flush Phi, it might have been marked to be ignored earlier.
14998 get<SPIRBlock>(target_block).ignore_phi_from_block = 0;
14999
15000 auto &literals = case_constructs[target_block];
15001
15002 if (literals.empty())
15003 {
15004 // Oh boy, gotta make a complete negative test instead! o.o
15005 // Find all possible literals that would *not* make us enter the default block.
15006 // If none of those literals match, we flush Phi ...
15007 SmallVector<string> conditions;
15008 for (size_t j = 0; j < num_blocks; j++)
15009 {
15010 auto &negative_literals = case_constructs[block_declaration_order[j]];
15011 for (auto &case_label : negative_literals)
15012 conditions.push_back(join(to_enclosed_expression(block.condition),
15013 " != ", to_case_label(case_label, type.width, unsigned_case)));
15014 }
15015
15016 statement("if (", merge(conditions, " && "), ")");
15017 begin_scope();
15018 flush_phi(block.self, target_block);
15019 end_scope();
15020 }
15021 else
15022 {
15023 SmallVector<string> conditions;
15024 conditions.reserve(literals.size());
15025 for (auto &case_label : literals)
15026 conditions.push_back(join(to_enclosed_expression(block.condition),
15027 " == ", to_case_label(case_label, type.width, unsigned_case)));
15028 statement("if (", merge(conditions, " || "), ")");
15029 begin_scope();
15030 flush_phi(block.self, target_block);
15031 end_scope();
15032 }
15033
15034 // Mark the block so that we don't flush Phi from header to case label.
15035 get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
15036 }
15037 }
15038
15039 // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
15040 // non-structured exits with the help of a switch block.
15041 // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
15042 bool degenerate_switch = block.default_block != block.merge_block && cases.empty();
15043
15044 if (degenerate_switch || is_legacy_es())
15045 {
15046 // ESSL 1.0 is not guaranteed to support do/while.
15047 if (is_legacy_es())
15048 {
15049 uint32_t counter = statement_count;
15050 statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
15051 " < 1; spvDummy", counter, "++)");
15052 }
15053 else
15054 statement("do");
15055 }
15056 else
15057 {
15058 emit_block_hints(block);
15059 statement("switch (", to_unpacked_expression(block.condition), ")");
15060 }
15061 begin_scope();
15062
15063 for (size_t i = 0; i < num_blocks; i++)
15064 {
15065 uint32_t target_block = block_declaration_order[i];
15066 auto &literals = case_constructs[target_block];
15067
15068 if (literals.empty())
15069 {
15070 // Default case.
15071 if (!degenerate_switch)
15072 {
15073 if (is_legacy_es())
15074 statement("else");
15075 else
15076 statement("default:");
15077 }
15078 }
15079 else
15080 {
15081 if (is_legacy_es())
15082 {
15083 statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix),
15084 ")");
15085 }
15086 else
15087 {
15088 for (auto &case_literal : literals)
15089 {
15090 // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
15091 statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
15092 }
15093 }
15094 }
15095
15096 auto &case_block = get<SPIRBlock>(target_block);
15097 if (backend.support_case_fallthrough && i + 1 < num_blocks &&
15098 execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1])))
15099 {
15100 // We will fall through here, so just terminate the block chain early.
15101 // We still need to deal with Phi potentially.
15102 // No need for a stack-like thing here since we only do fall-through when there is a
15103 // single trivial branch to fall-through target..
15104 current_emitting_switch_fallthrough = true;
15105 }
15106 else
15107 current_emitting_switch_fallthrough = false;
15108
15109 if (!degenerate_switch)
15110 begin_scope();
15111 branch(block.self, target_block);
15112 if (!degenerate_switch)
15113 end_scope();
15114
15115 current_emitting_switch_fallthrough = false;
15116 }
15117
15118 // Might still have to flush phi variables if we branch from loop header directly to merge target.
15119 // This is supposed to emit all cases where we branch from header to merge block directly.
15120 // There are two main scenarios where cannot rely on default fallthrough.
15121 // - There is an explicit default: label already.
15122 // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
15123 // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
15124 bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block);
15125 bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty();
15126 if ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())
15127 {
15128 for (auto &case_literal : literals_to_merge)
15129 statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":");
15130
15131 if (block.default_block == block.next_block)
15132 {
15133 if (is_legacy_es())
15134 statement("else");
15135 else
15136 statement("default:");
15137 }
15138
15139 begin_scope();
15140 flush_phi(block.self, block.next_block);
15141 statement("break;");
15142 end_scope();
15143 }
15144
15145 if (degenerate_switch && !is_legacy_es())
15146 end_scope_decl("while(false)");
15147 else
15148 end_scope();
15149
15150 if (block.need_ladder_break)
15151 {
15152 statement("if (_", block.self, "_ladder_break)");
15153 begin_scope();
15154 statement("break;");
15155 end_scope();
15156 }
15157
15158 current_emitting_switch = old_emitting_switch;
15159 break;
15160 }
15161
15162 case SPIRBlock::Return:
15163 {
15164 for (auto &line : current_function->fixup_hooks_out)
15165 line();
15166
15167 if (processing_entry_point)
15168 emit_fixup();
15169
15170 auto &cfg = get_cfg_for_current_function();
15171
15172 if (block.return_value)
15173 {
15174 auto &type = expression_type(block.return_value);
15175 if (!type.array.empty() && !backend.can_return_array)
15176 {
15177 // If we cannot return arrays, we will have a special out argument we can write to instead.
15178 // The backend is responsible for setting this up, and redirection the return values as appropriate.
15179 if (ir.ids[block.return_value].get_type() != TypeUndef)
15180 {
15181 emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction,
15182 get_expression_effective_storage_class(block.return_value));
15183 }
15184
15185 if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
15186 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15187 {
15188 statement("return;");
15189 }
15190 }
15191 else
15192 {
15193 // OpReturnValue can return Undef, so don't emit anything for this case.
15194 if (ir.ids[block.return_value].get_type() != TypeUndef)
15195 statement("return ", to_unpacked_expression(block.return_value), ";");
15196 }
15197 }
15198 else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) ||
15199 block.loop_dominator != BlockID(SPIRBlock::NoDominator))
15200 {
15201 // If this block is the very final block and not called from control flow,
15202 // we do not need an explicit return which looks out of place. Just end the function here.
15203 // In the very weird case of for(;;) { return; } executing return is unconditional,
15204 // but we actually need a return here ...
15205 statement("return;");
15206 }
15207 break;
15208 }
15209
15210 // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
15211 case SPIRBlock::Kill:
15212 statement(backend.discard_literal, ";");
15213 if (block.return_value)
15214 statement("return ", to_unpacked_expression(block.return_value), ";");
15215 break;
15216
15217 case SPIRBlock::Unreachable:
15218 emit_next_block = false;
15219 break;
15220
15221 case SPIRBlock::IgnoreIntersection:
15222 statement("ignoreIntersectionEXT;");
15223 break;
15224
15225 case SPIRBlock::TerminateRay:
15226 statement("terminateRayEXT;");
15227 break;
15228
15229 default:
15230 SPIRV_CROSS_THROW("Unimplemented block terminator.");
15231 }
15232
15233 if (block.next_block && emit_next_block)
15234 {
15235 // If we hit this case, we're dealing with an unconditional branch, which means we will output
15236 // that block after this. If we had selection merge, we already flushed phi variables.
15237 if (block.merge != SPIRBlock::MergeSelection)
15238 {
15239 flush_phi(block.self, block.next_block);
15240 // For a direct branch, need to remember to invalidate expressions in the next linear block instead.
15241 get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
15242 }
15243
15244 // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
15245 if (!current_emitting_switch_fallthrough)
15246 {
15247 // For merge selects we might have ignored the fact that a merge target
15248 // could have been a break; or continue;
15249 // We will need to deal with it here.
15250 if (is_loop_break(block.next_block))
15251 {
15252 // Cannot check for just break, because switch statements will also use break.
15253 assert(block.merge == SPIRBlock::MergeSelection);
15254 statement("break;");
15255 }
15256 else if (is_continue(block.next_block))
15257 {
15258 assert(block.merge == SPIRBlock::MergeSelection);
15259 branch_to_continue(block.self, block.next_block);
15260 }
15261 else if (BlockID(block.self) != block.next_block)
15262 emit_block_chain(get<SPIRBlock>(block.next_block));
15263 }
15264 }
15265
15266 if (block.merge == SPIRBlock::MergeLoop)
15267 {
15268 if (continue_type == SPIRBlock::DoWhileLoop)
15269 {
15270 // Make sure that we run the continue block to get the expressions set, but this
15271 // should become an empty string.
15272 // We have no fallbacks if we cannot forward everything to temporaries ...
15273 const auto &continue_block = get<SPIRBlock>(block.continue_block);
15274 bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
15275 get<SPIRBlock>(continue_block.loop_dominator));
15276
15277 uint32_t current_count = statement_count;
15278 auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
15279 if (statement_count != current_count)
15280 {
15281 // The DoWhile block has side effects, force ComplexLoop pattern next pass.
15282 get<SPIRBlock>(block.continue_block).complex_continue = true;
15283 force_recompile();
15284 }
15285
15286 // Might have to invert the do-while test here.
15287 auto condition = to_expression(continue_block.condition);
15288 if (!positive_test)
15289 condition = join("!", enclose_expression(condition));
15290
15291 end_scope_decl(join("while (", condition, ")"));
15292 }
15293 else
15294 end_scope();
15295
15296 loop_level_saver.release();
15297
15298 // We cannot break out of two loops at once, so don't check for break; here.
15299 // Using block.self as the "from" block isn't quite right, but it has the same scope
15300 // and dominance structure, so it's fine.
15301 if (is_continue(block.merge_block))
15302 branch_to_continue(block.self, block.merge_block);
15303 else
15304 emit_block_chain(get<SPIRBlock>(block.merge_block));
15305 }
15306
15307 // Forget about control dependent expressions now.
15308 block.invalidate_expressions.clear();
15309
15310 // After we return, we must be out of scope, so if we somehow have to re-emit this function,
15311 // re-declare variables if necessary.
15312 assert(rearm_dominated_variables.size() == block.dominated_variables.size());
15313 for (size_t i = 0; i < block.dominated_variables.size(); i++)
15314 {
15315 uint32_t var = block.dominated_variables[i];
15316 get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i];
15317 }
15318
15319 // Just like for deferred declaration, we need to forget about loop variable enable
15320 // if our block chain is reinstantiated later.
15321 for (auto &var_id : block.loop_variables)
15322 get<SPIRVariable>(var_id).loop_variable_enable = false;
15323}
15324
15325void CompilerGLSL::begin_scope()
15326{
15327 statement("{");
15328 indent++;
15329}
15330
15331void CompilerGLSL::end_scope()
15332{
15333 if (!indent)
15334 SPIRV_CROSS_THROW("Popping empty indent stack.");
15335 indent--;
15336 statement("}");
15337}
15338
15339void CompilerGLSL::end_scope(const string &trailer)
15340{
15341 if (!indent)
15342 SPIRV_CROSS_THROW("Popping empty indent stack.");
15343 indent--;
15344 statement("}", trailer);
15345}
15346
15347void CompilerGLSL::end_scope_decl()
15348{
15349 if (!indent)
15350 SPIRV_CROSS_THROW("Popping empty indent stack.");
15351 indent--;
15352 statement("};");
15353}
15354
15355void CompilerGLSL::end_scope_decl(const string &decl)
15356{
15357 if (!indent)
15358 SPIRV_CROSS_THROW("Popping empty indent stack.");
15359 indent--;
15360 statement("} ", decl, ";");
15361}
15362
15363void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
15364{
15365 // If our variable is remapped, and we rely on type-remapping information as
15366 // well, then we cannot pass the variable as a function parameter.
15367 // Fixing this is non-trivial without stamping out variants of the same function,
15368 // so for now warn about this and suggest workarounds instead.
15369 for (uint32_t i = 0; i < length; i++)
15370 {
15371 auto *var = maybe_get<SPIRVariable>(args[i]);
15372 if (!var || !var->remapped_variable)
15373 continue;
15374
15375 auto &type = get<SPIRType>(var->basetype);
15376 if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
15377 {
15378 SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
15379 "This will not work correctly because type-remapping information is lost. "
15380 "To workaround, please consider not passing the subpass input as a function parameter, "
15381 "or use in/out variables instead which do not need type remapping information.");
15382 }
15383 }
15384}
15385
15386const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
15387{
15388 // FIXME: This is kind of hacky. There should be a cleaner way.
15389 auto offset = uint32_t(&instr - current_emitting_block->ops.data());
15390 if ((offset + 1) < current_emitting_block->ops.size())
15391 return &current_emitting_block->ops[offset + 1];
15392 else
15393 return nullptr;
15394}
15395
15396uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
15397{
15398 return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
15399 MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
15400 MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
15401}
15402
15403void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
15404{
15405 statement(lhs, " = ", to_expression(rhs_id), ";");
15406}
15407
15408bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
15409{
15410 if (!backend.force_gl_in_out_block)
15411 return false;
15412 // This path is only relevant for GL backends.
15413
15414 auto *var = maybe_get<SPIRVariable>(target_id);
15415 if (!var || var->storage != StorageClassOutput)
15416 return false;
15417
15418 if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
15419 return false;
15420
15421 auto &type = expression_type(source_id);
15422 string array_expr;
15423 if (type.array_size_literal.back())
15424 {
15425 array_expr = convert_to_string(type.array.back());
15426 if (type.array.back() == 0)
15427 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15428 }
15429 else
15430 array_expr = to_expression(type.array.back());
15431
15432 SPIRType target_type;
15433 target_type.basetype = SPIRType::Int;
15434
15435 statement("for (int i = 0; i < int(", array_expr, "); i++)");
15436 begin_scope();
15437 statement(to_expression(target_id), "[i] = ",
15438 bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
15439 ";");
15440 end_scope();
15441
15442 return true;
15443}
15444
15445void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
15446{
15447 if (!backend.force_gl_in_out_block)
15448 return;
15449 // This path is only relevant for GL backends.
15450
15451 auto *var = maybe_get<SPIRVariable>(source_id);
15452 if (!var)
15453 return;
15454
15455 if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
15456 return;
15457
15458 auto &type = get_variable_data_type(*var);
15459 if (type.array.empty())
15460 return;
15461
15462 auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
15463 bool is_builtin = is_builtin_variable(*var) &&
15464 (builtin == BuiltInPointSize ||
15465 builtin == BuiltInPosition ||
15466 builtin == BuiltInSampleMask);
15467 bool is_tess = is_tessellation_shader();
15468 bool is_patch = has_decoration(var->self, DecorationPatch);
15469 bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
15470
15471 // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
15472 // We must unroll the array load.
15473 // For builtins, we couldn't catch this case normally,
15474 // because this is resolved in the OpAccessChain in most cases.
15475 // If we load the entire array, we have no choice but to unroll here.
15476 if (!is_patch && (is_builtin || is_tess))
15477 {
15478 auto new_expr = join("_", target_id, "_unrolled");
15479 statement(variable_decl(type, new_expr, target_id), ";");
15480 string array_expr;
15481 if (type.array_size_literal.back())
15482 {
15483 array_expr = convert_to_string(type.array.back());
15484 if (type.array.back() == 0)
15485 SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15486 }
15487 else
15488 array_expr = to_expression(type.array.back());
15489
15490 // The array size might be a specialization constant, so use a for-loop instead.
15491 statement("for (int i = 0; i < int(", array_expr, "); i++)");
15492 begin_scope();
15493 if (is_builtin && !is_sample_mask)
15494 statement(new_expr, "[i] = gl_in[i].", expr, ";");
15495 else if (is_sample_mask)
15496 {
15497 SPIRType target_type;
15498 target_type.basetype = SPIRType::Int;
15499 statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
15500 }
15501 else
15502 statement(new_expr, "[i] = ", expr, "[i];");
15503 end_scope();
15504
15505 expr = move(new_expr);
15506 }
15507}
15508
15509void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
15510{
15511 // We will handle array cases elsewhere.
15512 if (!expr_type.array.empty())
15513 return;
15514
15515 auto *var = maybe_get_backing_variable(source_id);
15516 if (var)
15517 source_id = var->self;
15518
15519 // Only interested in standalone builtin variables.
15520 if (!has_decoration(source_id, DecorationBuiltIn))
15521 return;
15522
15523 auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
15524 auto expected_type = expr_type.basetype;
15525
15526 // TODO: Fill in for more builtins.
15527 switch (builtin)
15528 {
15529 case BuiltInLayer:
15530 case BuiltInPrimitiveId:
15531 case BuiltInViewportIndex:
15532 case BuiltInInstanceId:
15533 case BuiltInInstanceIndex:
15534 case BuiltInVertexId:
15535 case BuiltInVertexIndex:
15536 case BuiltInSampleId:
15537 case BuiltInBaseVertex:
15538 case BuiltInBaseInstance:
15539 case BuiltInDrawIndex:
15540 case BuiltInFragStencilRefEXT:
15541 case BuiltInInstanceCustomIndexNV:
15542 case BuiltInSampleMask:
15543 case BuiltInPrimitiveShadingRateKHR:
15544 case BuiltInShadingRateKHR:
15545 expected_type = SPIRType::Int;
15546 break;
15547
15548 case BuiltInGlobalInvocationId:
15549 case BuiltInLocalInvocationId:
15550 case BuiltInWorkgroupId:
15551 case BuiltInLocalInvocationIndex:
15552 case BuiltInWorkgroupSize:
15553 case BuiltInNumWorkgroups:
15554 case BuiltInIncomingRayFlagsNV:
15555 case BuiltInLaunchIdNV:
15556 case BuiltInLaunchSizeNV:
15557 expected_type = SPIRType::UInt;
15558 break;
15559
15560 default:
15561 break;
15562 }
15563
15564 if (expected_type != expr_type.basetype)
15565 expr = bitcast_expression(expr_type, expected_type, expr);
15566}
15567
15568void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
15569{
15570 auto *var = maybe_get_backing_variable(target_id);
15571 if (var)
15572 target_id = var->self;
15573
15574 // Only interested in standalone builtin variables.
15575 if (!has_decoration(target_id, DecorationBuiltIn))
15576 return;
15577
15578 auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
15579 auto expected_type = expr_type.basetype;
15580
15581 // TODO: Fill in for more builtins.
15582 switch (builtin)
15583 {
15584 case BuiltInLayer:
15585 case BuiltInPrimitiveId:
15586 case BuiltInViewportIndex:
15587 case BuiltInFragStencilRefEXT:
15588 case BuiltInSampleMask:
15589 case BuiltInPrimitiveShadingRateKHR:
15590 case BuiltInShadingRateKHR:
15591 expected_type = SPIRType::Int;
15592 break;
15593
15594 default:
15595 break;
15596 }
15597
15598 if (expected_type != expr_type.basetype)
15599 {
15600 auto type = expr_type;
15601 type.basetype = expected_type;
15602 expr = bitcast_expression(type, expr_type.basetype, expr);
15603 }
15604}
15605
15606void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
15607{
15608 if (*backend.nonuniform_qualifier == '\0')
15609 return;
15610
15611 auto *var = maybe_get_backing_variable(ptr_id);
15612 if (!var)
15613 return;
15614
15615 if (var->storage != StorageClassUniformConstant &&
15616 var->storage != StorageClassStorageBuffer &&
15617 var->storage != StorageClassUniform)
15618 return;
15619
15620 auto &backing_type = get<SPIRType>(var->basetype);
15621 if (backing_type.array.empty())
15622 return;
15623
15624 // If we get here, we know we're accessing an arrayed resource which
15625 // might require nonuniform qualifier.
15626
15627 auto start_array_index = expr.find_first_of('[');
15628
15629 if (start_array_index == string::npos)
15630 return;
15631
15632 // We've opened a bracket, track expressions until we can close the bracket.
15633 // This must be our resource index.
15634 size_t end_array_index = string::npos;
15635 unsigned bracket_count = 1;
15636 for (size_t index = start_array_index + 1; index < expr.size(); index++)
15637 {
15638 if (expr[index] == ']')
15639 {
15640 if (--bracket_count == 0)
15641 {
15642 end_array_index = index;
15643 break;
15644 }
15645 }
15646 else if (expr[index] == '[')
15647 bracket_count++;
15648 }
15649
15650 assert(bracket_count == 0);
15651
15652 // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
15653 // nothing we can do here to express that.
15654 if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index)
15655 return;
15656
15657 start_array_index++;
15658
15659 expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(",
15660 expr.substr(start_array_index, end_array_index - start_array_index), ")",
15661 expr.substr(end_array_index, string::npos));
15662}
15663
15664void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
15665{
15666 if ((options.es && options.version < 310) || (!options.es && options.version < 140))
15667 return;
15668
15669 switch (block.hint)
15670 {
15671 case SPIRBlock::HintFlatten:
15672 require_extension_internal("GL_EXT_control_flow_attributes");
15673 statement("SPIRV_CROSS_FLATTEN");
15674 break;
15675 case SPIRBlock::HintDontFlatten:
15676 require_extension_internal("GL_EXT_control_flow_attributes");
15677 statement("SPIRV_CROSS_BRANCH");
15678 break;
15679 case SPIRBlock::HintUnroll:
15680 require_extension_internal("GL_EXT_control_flow_attributes");
15681 statement("SPIRV_CROSS_UNROLL");
15682 break;
15683 case SPIRBlock::HintDontUnroll:
15684 require_extension_internal("GL_EXT_control_flow_attributes");
15685 statement("SPIRV_CROSS_LOOP");
15686 break;
15687 default:
15688 break;
15689 }
15690}
15691
15692void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
15693{
15694 preserved_aliases[id] = get_name(id);
15695}
15696
15697void CompilerGLSL::reset_name_caches()
15698{
15699 for (auto &preserved : preserved_aliases)
15700 set_name(preserved.first, preserved.second);
15701
15702 preserved_aliases.clear();
15703 resource_names.clear();
15704 block_input_names.clear();
15705 block_output_names.clear();
15706 block_ubo_names.clear();
15707 block_ssbo_names.clear();
15708 block_names.clear();
15709 function_overloads.clear();
15710}
15711
15712void CompilerGLSL::fixup_type_alias()
15713{
15714 // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
15715 ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
15716 if (!type.type_alias)
15717 return;
15718
15719 if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))
15720 {
15721 // Top-level block types should never alias anything else.
15722 type.type_alias = 0;
15723 }
15724 else if (type_is_block_like(type) && type.self == ID(self))
15725 {
15726 // A block-like type is any type which contains Offset decoration, but not top-level blocks,
15727 // i.e. blocks which are placed inside buffers.
15728 // Become the master.
15729 ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
15730 if (other_id == self)
15731 return;
15732
15733 if (other_type.type_alias == type.type_alias)
15734 other_type.type_alias = self;
15735 });
15736
15737 this->get<SPIRType>(type.type_alias).type_alias = self;
15738 type.type_alias = 0;
15739 }
15740 });
15741}
15742
15743void CompilerGLSL::reorder_type_alias()
15744{
15745 // Reorder declaration of types so that the master of the type alias is always emitted first.
15746 // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
15747 // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
15748 auto loop_lock = ir.create_loop_hard_lock();
15749
15750 auto &type_ids = ir.ids_for_type[TypeType];
15751 for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
15752 {
15753 auto &type = get<SPIRType>(*alias_itr);
15754 if (type.type_alias != TypeID(0) &&
15755 !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
15756 {
15757 // We will skip declaring this type, so make sure the type_alias type comes before.
15758 auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias));
15759 assert(master_itr != end(type_ids));
15760
15761 if (alias_itr < master_itr)
15762 {
15763 // Must also swap the type order for the constant-type joined array.
15764 auto &joined_types = ir.ids_for_constant_or_type;
15765 auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
15766 auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
15767 assert(alt_alias_itr != end(joined_types));
15768 assert(alt_master_itr != end(joined_types));
15769
15770 swap(*alias_itr, *master_itr);
15771 swap(*alt_alias_itr, *alt_master_itr);
15772 }
15773 }
15774 }
15775}
15776
15777void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
15778{
15779 // If we are redirecting statements, ignore the line directive.
15780 // Common case here is continue blocks.
15781 if (redirect_statement)
15782 return;
15783
15784 if (options.emit_line_directives)
15785 {
15786 require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
15787 statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
15788 }
15789}
15790
15791void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
15792 SmallVector<uint32_t> chain)
15793{
15794 // Fully unroll all member/array indices one by one.
15795
15796 auto &lhs_type = get<SPIRType>(lhs_type_id);
15797 auto &rhs_type = get<SPIRType>(rhs_type_id);
15798
15799 if (!lhs_type.array.empty())
15800 {
15801 // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
15802 // and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
15803 uint32_t array_size = to_array_size_literal(lhs_type);
15804 chain.push_back(0);
15805
15806 for (uint32_t i = 0; i < array_size; i++)
15807 {
15808 chain.back() = i;
15809 emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
15810 }
15811 }
15812 else if (lhs_type.basetype == SPIRType::Struct)
15813 {
15814 chain.push_back(0);
15815 uint32_t member_count = uint32_t(lhs_type.member_types.size());
15816 for (uint32_t i = 0; i < member_count; i++)
15817 {
15818 chain.back() = i;
15819 emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
15820 }
15821 }
15822 else
15823 {
15824 // Need to handle unpack/packing fixups since this can differ wildly between the logical types,
15825 // particularly in MSL.
15826 // To deal with this, we emit access chains and go through emit_store_statement
15827 // to deal with all the special cases we can encounter.
15828
15829 AccessChainMeta lhs_meta, rhs_meta;
15830 auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
15831 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
15832 auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
15833 ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
15834
15835 uint32_t id = ir.increase_bound_by(2);
15836 lhs_id = id;
15837 rhs_id = id + 1;
15838
15839 {
15840 auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
15841 lhs_expr.need_transpose = lhs_meta.need_transpose;
15842
15843 if (lhs_meta.storage_is_packed)
15844 set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15845 if (lhs_meta.storage_physical_type != 0)
15846 set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
15847
15848 forwarded_temporaries.insert(lhs_id);
15849 suppressed_usage_tracking.insert(lhs_id);
15850 }
15851
15852 {
15853 auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
15854 rhs_expr.need_transpose = rhs_meta.need_transpose;
15855
15856 if (rhs_meta.storage_is_packed)
15857 set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15858 if (rhs_meta.storage_physical_type != 0)
15859 set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
15860
15861 forwarded_temporaries.insert(rhs_id);
15862 suppressed_usage_tracking.insert(rhs_id);
15863 }
15864
15865 emit_store_statement(lhs_id, rhs_id);
15866 }
15867}
15868
15869bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
15870{
15871 if (!has_decoration(id, DecorationInputAttachmentIndex))
15872 return false;
15873
15874 uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
15875 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15876 if (remap.first == input_attachment_index)
15877 return true;
15878
15879 return false;
15880}
15881
15882const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const
15883{
15884 const SPIRVariable *ret = nullptr;
15885 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15886 if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
15887 get_decoration(var.self, DecorationInputAttachmentIndex) == index)
15888 {
15889 ret = &var;
15890 }
15891 });
15892 return ret;
15893}
15894
15895const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const
15896{
15897 const SPIRVariable *ret = nullptr;
15898 ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15899 if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
15900 ret = &var;
15901 });
15902 return ret;
15903}
15904
15905void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
15906{
15907 for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15908 {
15909 auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
15910 auto *output_var = find_color_output_by_location(remap.second);
15911 if (!subpass_var)
15912 continue;
15913 if (!output_var)
15914 SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
15915 "to read from it.");
15916 if (is_array(get<SPIRType>(output_var->basetype)))
15917 SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
15918
15919 auto &func = get<SPIRFunction>(get_entry_point().self);
15920 func.fixup_hooks_in.push_back([=]() {
15921 if (is_legacy())
15922 {
15923 statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
15924 get_decoration(output_var->self, DecorationLocation), "];");
15925 }
15926 else
15927 {
15928 uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
15929 statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ",
15930 to_expression(output_var->self), ";");
15931 }
15932 });
15933 }
15934}
15935
15936bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
15937{
15938 return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
15939}
15940
15941const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
15942{
15943 static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
15944 "GL_KHR_shader_subgroup_basic",
15945 "GL_KHR_shader_subgroup_vote",
15946 "GL_NV_gpu_shader_5",
15947 "GL_NV_shader_thread_group",
15948 "GL_NV_shader_thread_shuffle",
15949 "GL_ARB_shader_ballot",
15950 "GL_ARB_shader_group_vote",
15951 "GL_AMD_gcn_shader" };
15952 return retval[c];
15953}
15954
15955SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
15956{
15957 switch (c)
15958 {
15959 case ARB_shader_ballot:
15960 return { "GL_ARB_shader_int64" };
15961 case AMD_gcn_shader:
15962 return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
15963 default:
15964 return {};
15965 }
15966}
15967
15968const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
15969{
15970 switch (c)
15971 {
15972 case ARB_shader_ballot:
15973 return "defined(GL_ARB_shader_int64)";
15974 case AMD_gcn_shader:
15975 return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))";
15976 default:
15977 return "";
15978 }
15979}
15980
15981CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
15982 get_feature_dependencies(Feature feature)
15983{
15984 switch (feature)
15985 {
15986 case SubgroupAllEqualT:
15987 return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
15988 case SubgroupElect:
15989 return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
15990 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15991 return { SubgroupMask };
15992 case SubgroupBallotBitCount:
15993 return { SubgroupBallot };
15994 default:
15995 return {};
15996 }
15997}
15998
15999CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
16000 get_feature_dependency_mask(Feature feature)
16001{
16002 return build_mask(get_feature_dependencies(feature));
16003}
16004
16005bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
16006{
16007 static const bool retval[FeatureCount] = { false, false, false, false, false, false,
16008 true, // SubgroupBalloFindLSB_MSB
16009 false, false, false, false,
16010 true, // SubgroupMemBarrier - replaced with workgroup memory barriers
16011 false, false, true, false };
16012
16013 return retval[feature];
16014}
16015
16016CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
16017 get_KHR_extension_for_feature(Feature feature)
16018{
16019 static const Candidate extensions[FeatureCount] = {
16020 KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
16021 KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
16022 KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
16023 KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
16024 };
16025
16026 return extensions[feature];
16027}
16028
16029void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
16030{
16031 feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature);
16032}
16033
16034bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
16035{
16036 return (feature_mask & (1u << feature)) != 0;
16037}
16038
16039CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
16040{
16041 Result res;
16042
16043 for (uint32_t i = 0u; i < FeatureCount; ++i)
16044 {
16045 if (feature_mask & (1u << i))
16046 {
16047 auto feature = static_cast<Feature>(i);
16048 std::unordered_set<uint32_t> unique_candidates;
16049
16050 auto candidates = get_candidates_for_feature(feature);
16051 unique_candidates.insert(candidates.begin(), candidates.end());
16052
16053 auto deps = get_feature_dependencies(feature);
16054 for (Feature d : deps)
16055 {
16056 candidates = get_candidates_for_feature(d);
16057 if (!candidates.empty())
16058 unique_candidates.insert(candidates.begin(), candidates.end());
16059 }
16060
16061 for (uint32_t c : unique_candidates)
16062 ++res.weights[static_cast<Candidate>(c)];
16063 }
16064 }
16065
16066 return res;
16067}
16068
16069CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
16070 get_candidates_for_feature(Feature ft, const Result &r)
16071{
16072 auto c = get_candidates_for_feature(ft);
16073 auto cmp = [&r](Candidate a, Candidate b) {
16074 if (r.weights[a] == r.weights[b])
16075 return a < b; // Prefer candidates with lower enum value
16076 return r.weights[a] > r.weights[b];
16077 };
16078 std::sort(c.begin(), c.end(), cmp);
16079 return c;
16080}
16081
16082CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
16083 get_candidates_for_feature(Feature feature)
16084{
16085 switch (feature)
16086 {
16087 case SubgroupMask:
16088 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
16089 case SubgroupSize:
16090 return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
16091 case SubgroupInvocationID:
16092 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
16093 case SubgroupID:
16094 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
16095 case NumSubgroups:
16096 return { KHR_shader_subgroup_basic, NV_shader_thread_group };
16097 case SubgroupBroadcast_First:
16098 return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
16099 case SubgroupBallotFindLSB_MSB:
16100 return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
16101 case SubgroupAll_Any_AllEqualBool:
16102 return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
16103 case SubgroupAllEqualT:
16104 return {}; // depends on other features only
16105 case SubgroupElect:
16106 return {}; // depends on other features only
16107 case SubgroupBallot:
16108 return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
16109 case SubgroupBarrier:
16110 return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
16111 case SubgroupMemBarrier:
16112 return { KHR_shader_subgroup_basic };
16113 case SubgroupInverseBallot_InclBitCount_ExclBitCout:
16114 return {};
16115 case SubgroupBallotBitExtract:
16116 return { NV_shader_thread_group };
16117 case SubgroupBallotBitCount:
16118 return {};
16119 default:
16120 return {};
16121 }
16122}
16123
16124CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
16125 const SmallVector<Feature> &features)
16126{
16127 FeatureMask mask = 0;
16128 for (Feature f : features)
16129 mask |= FeatureMask(1) << f;
16130 return mask;
16131}
16132
16133CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
16134{
16135 for (auto &weight : weights)
16136 weight = 0;
16137
16138 // Make sure KHR_shader_subgroup extensions are always prefered.
16139 const uint32_t big_num = FeatureCount;
16140 weights[KHR_shader_subgroup_ballot] = big_num;
16141 weights[KHR_shader_subgroup_basic] = big_num;
16142 weights[KHR_shader_subgroup_vote] = big_num;
16143}
16144
16145void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
16146{
16147 // Must be ordered to maintain deterministic output, so vector is appropriate.
16148 if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
16149 end(workaround_ubo_load_overload_types))
16150 {
16151 force_recompile();
16152 workaround_ubo_load_overload_types.push_back(id);
16153 }
16154}
16155
16156void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
16157{
16158 // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
16159 // To load these types correctly, we must first wrap them in a dummy function which only purpose is to
16160 // ensure row_major decoration is actually respected.
16161 auto *var = maybe_get_backing_variable(ptr);
16162 if (!var)
16163 return;
16164
16165 auto &backing_type = get<SPIRType>(var->basetype);
16166 bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
16167 has_decoration(backing_type.self, DecorationBlock);
16168 if (!is_ubo)
16169 return;
16170
16171 auto *type = &get<SPIRType>(loaded_type);
16172 bool rewrite = false;
16173
16174 if (is_matrix(*type))
16175 {
16176 // To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
16177 // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
16178 // If there is any row-major action going on, we apply the workaround.
16179 // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
16180 // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
16181 type = &backing_type;
16182 }
16183
16184 if (type->basetype == SPIRType::Struct)
16185 {
16186 // If we're loading a struct where any member is a row-major matrix, apply the workaround.
16187 for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++)
16188 {
16189 if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
16190 {
16191 rewrite = true;
16192 break;
16193 }
16194 }
16195 }
16196
16197 if (rewrite)
16198 {
16199 request_workaround_wrapper_overload(loaded_type);
16200 expr = join("spvWorkaroundRowMajor(", expr, ")");
16201 }
16202}
16203
16204void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
16205{
16206 masked_output_locations.insert({ location, component });
16207}
16208
16209void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
16210{
16211 masked_output_builtins.insert(builtin);
16212}
16213
16214bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
16215{
16216 auto &type = get<SPIRType>(var.basetype);
16217 bool is_block = has_decoration(type.self, DecorationBlock);
16218 // Blocks by themselves are never masked. Must be masked per-member.
16219 if (is_block)
16220 return false;
16221
16222 bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
16223
16224 if (is_builtin)
16225 {
16226 return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
16227 }
16228 else
16229 {
16230 if (!has_decoration(var.self, DecorationLocation))
16231 return false;
16232
16233 return is_stage_output_location_masked(
16234 get_decoration(var.self, DecorationLocation),
16235 get_decoration(var.self, DecorationComponent));
16236 }
16237}
16238
16239bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
16240{
16241 auto &type = get<SPIRType>(var.basetype);
16242 bool is_block = has_decoration(type.self, DecorationBlock);
16243 if (!is_block)
16244 return false;
16245
16246 BuiltIn builtin = BuiltInMax;
16247 if (is_member_builtin(type, index, &builtin))
16248 {
16249 return is_stage_output_builtin_masked(builtin);
16250 }
16251 else
16252 {
16253 uint32_t location = get_declared_member_location(var, index, strip_array);
16254 uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
16255 return is_stage_output_location_masked(location, component);
16256 }
16257}
16258
16259bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
16260{
16261 return masked_output_locations.count({ location, component }) != 0;
16262}
16263
16264bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
16265{
16266 return masked_output_builtins.count(builtin) != 0;
16267}
16268
16269uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16270{
16271 auto &block_type = get<SPIRType>(var.basetype);
16272 if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
16273 return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
16274 else
16275 return get_accumulated_member_location(var, mbr_idx, strip_array);
16276}
16277
16278uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16279{
16280 auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
16281 uint32_t location = get_decoration(var.self, DecorationLocation);
16282
16283 for (uint32_t i = 0; i < mbr_idx; i++)
16284 {
16285 auto &mbr_type = get<SPIRType>(type.member_types[i]);
16286
16287 // Start counting from any place we have a new location decoration.
16288 if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
16289 location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
16290
16291 uint32_t location_count = type_to_location_count(mbr_type);
16292 location += location_count;
16293 }
16294
16295 return location;
16296}
16297
16298StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
16299{
16300 auto *var = maybe_get_backing_variable(ptr);
16301
16302 // If the expression has been lowered to a temporary, we need to use the Generic storage class.
16303 // We're looking for the effective storage class of a given expression.
16304 // An access chain or forwarded OpLoads from such access chains
16305 // will generally have the storage class of the underlying variable, but if the load was not forwarded
16306 // we have lost any address space qualifiers.
16307 bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
16308 (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0);
16309
16310 if (var && !forced_temporary)
16311 {
16312 if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
16313 return StorageClassWorkgroup;
16314 if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
16315 return StorageClassStorageBuffer;
16316
16317 // Normalize SSBOs to StorageBuffer here.
16318 if (var->storage == StorageClassUniform &&
16319 has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
16320 return StorageClassStorageBuffer;
16321 else
16322 return var->storage;
16323 }
16324 else
16325 return expression_type(ptr).storage;
16326}
16327
16328uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
16329{
16330 uint32_t count;
16331 if (type.basetype == SPIRType::Struct)
16332 {
16333 uint32_t mbr_count = uint32_t(type.member_types.size());
16334 count = 0;
16335 for (uint32_t i = 0; i < mbr_count; i++)
16336 count += type_to_location_count(get<SPIRType>(type.member_types[i]));
16337 }
16338 else
16339 {
16340 count = type.columns > 1 ? type.columns : 1;
16341 }
16342
16343 uint32_t dim_count = uint32_t(type.array.size());
16344 for (uint32_t i = 0; i < dim_count; i++)
16345 count *= to_array_size_literal(type, i);
16346
16347 return count;
16348}
16349