spirv_glsl.cpp source code [taichi/external/SPIRV-Cross/spirv_glsl.cpp]

1	/*
2	* Copyright 2015-2021 Arm Limited
3	* SPDX-License-Identifier: Apache-2.0 OR MIT
4	*
5	* Licensed under the Apache License, Version 2.0 (the "License");
6	* you may not use this file except in compliance with the License.
7	* You may obtain a copy of the License at
8	*
9	* http://www.apache.org/licenses/LICENSE-2.0
10	*
11	* Unless required by applicable law or agreed to in writing, software
12	* distributed under the License is distributed on an "AS IS" BASIS,
13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14	* See the License for the specific language governing permissions and
15	* limitations under the License.
16	*/
17
18	/*
19	* At your option, you may choose to accept this material under either:
20	* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
21	* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
22	*/
23
24	#include "spirv_glsl.hpp"
25	#include "GLSL.std.450.h"
26	#include "spirv_common.hpp"
27	#include <algorithm>
28	#include <assert.h>
29	#include <cmath>
30	#include <limits>
31	#include <locale.h>
32	#include <utility>
33
34	#ifndef _WIN32
35	#include <langinfo.h>
36	#endif
37	#include <locale.h>
38
39	using namespace spv;
40	using namespace SPIRV_CROSS_NAMESPACE;
41	using namespace std;
42
43	enum ExtraSubExpressionType
44	{
45	// Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map.
46	EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = `0x10000000`,
47	EXTRA_SUB_EXPRESSION_TYPE_AUX = `0x20000000`
48	};
49
50	static bool is_unsigned_opcode(Op op)
51	{
52	// Don't have to be exhaustive, only relevant for legacy target checking ...
53	switch (op)
54	{
55	case OpShiftRightLogical:
56	case OpUGreaterThan:
57	case OpUGreaterThanEqual:
58	case OpULessThan:
59	case OpULessThanEqual:
60	case OpUConvert:
61	case OpUDiv:
62	case OpUMod:
63	case OpUMulExtended:
64	case OpConvertUToF:
65	case OpConvertFToU:
66	return true;
67
68	default:
69	return false;
70	}
71	}
72
73	static bool is_unsigned_glsl_opcode(GLSLstd450 op)
74	{
75	// Don't have to be exhaustive, only relevant for legacy target checking ...
76	switch (op)
77	{
78	case GLSLstd450UClamp:
79	case GLSLstd450UMin:
80	case GLSLstd450UMax:
81	case GLSLstd450FindUMsb:
82	return true;
83
84	default:
85	return false;
86	}
87	}
88
89	static bool packing_is_vec4_padded(BufferPackingStandard packing)
90	{
91	switch (packing)
92	{
93	case BufferPackingHLSLCbuffer:
94	case BufferPackingHLSLCbufferPackOffset:
95	case BufferPackingStd140:
96	case BufferPackingStd140EnhancedLayout:
97	return true;
98
99	default:
100	return false;
101	}
102	}
103
104	static bool packing_is_hlsl(BufferPackingStandard packing)
105	{
106	switch (packing)
107	{
108	case BufferPackingHLSLCbuffer:
109	case BufferPackingHLSLCbufferPackOffset:
110	return true;
111
112	default:
113	return false;
114	}
115	}
116
117	static bool packing_has_flexible_offset(BufferPackingStandard packing)
118	{
119	switch (packing)
120	{
121	case BufferPackingStd140:
122	case BufferPackingStd430:
123	case BufferPackingScalar:
124	case BufferPackingHLSLCbuffer:
125	return false;
126
127	default:
128	return true;
129	}
130	}
131
132	static bool packing_is_scalar(BufferPackingStandard packing)
133	{
134	switch (packing)
135	{
136	case BufferPackingScalar:
137	case BufferPackingScalarEnhancedLayout:
138	return true;
139
140	default:
141	return false;
142	}
143	}
144
145	static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
146	{
147	switch (packing)
148	{
149	case BufferPackingStd140EnhancedLayout:
150	return BufferPackingStd140;
151	case BufferPackingStd430EnhancedLayout:
152	return BufferPackingStd430;
153	case BufferPackingHLSLCbufferPackOffset:
154	return BufferPackingHLSLCbuffer;
155	case BufferPackingScalarEnhancedLayout:
156	return BufferPackingScalar;
157	default:
158	return packing;
159	}
160	}
161
162	void CompilerGLSL::init()
163	{
164	if (ir.source.known)
165	{
166	options.es = ir.source.es;
167	options.version = ir.source.version;
168	}
169
170	// Query the locale to see what the decimal point is.
171	// We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale
172	// rather than setting locales ourselves. Settings locales in a safe and isolated way is rather
173	// tricky.
174	#ifdef _WIN32
175	// On Windows, localeconv uses thread-local storage, so it should be fine.
176	const struct lconv *conv = localeconv();
177	if (conv && conv->decimal_point)
178	current_locale_radix_character = *conv->decimal_point;
179	#elif defined(__ANDROID__) && __ANDROID_API__ < 26
180	// nl_langinfo is not supported on this platform, fall back to the worse alternative.
181	const struct lconv *conv = localeconv();
182	if (conv && conv->decimal_point)
183	current_locale_radix_character = *conv->decimal_point;
184	#else
185	// localeconv, the portable function is not MT safe ...
186	const char *decimal_point = nl_langinfo(RADIXCHAR);
187	if (decimal_point && *decimal_point != `'\0'`)
188	current_locale_radix_character = *decimal_point;
189	#endif
190	}
191
192	static const char *to_pls_layout(PlsFormat format)
193	{
194	switch (format)
195	{
196	case PlsR11FG11FB10F:
197	return "layout(r11f_g11f_b10f) ";
198	case PlsR32F:
199	return "layout(r32f) ";
200	case PlsRG16F:
201	return "layout(rg16f) ";
202	case PlsRGB10A2:
203	return "layout(rgb10_a2) ";
204	case PlsRGBA8:
205	return "layout(rgba8) ";
206	case PlsRG16:
207	return "layout(rg16) ";
208	case PlsRGBA8I:
209	return "layout(rgba8i)";
210	case PlsRG16I:
211	return "layout(rg16i) ";
212	case PlsRGB10A2UI:
213	return "layout(rgb10_a2ui) ";
214	case PlsRGBA8UI:
215	return "layout(rgba8ui) ";
216	case PlsRG16UI:
217	return "layout(rg16ui) ";
218	case PlsR32UI:
219	return "layout(r32ui) ";
220	default:
221	return "";
222	}
223	}
224
225	static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
226	{
227	switch (format)
228	{
229	default:
230	case PlsR11FG11FB10F:
231	case PlsR32F:
232	case PlsRG16F:
233	case PlsRGB10A2:
234	case PlsRGBA8:
235	case PlsRG16:
236	return SPIRType::Float;
237
238	case PlsRGBA8I:
239	case PlsRG16I:
240	return SPIRType::Int;
241
242	case PlsRGB10A2UI:
243	case PlsRGBA8UI:
244	case PlsRG16UI:
245	case PlsR32UI:
246	return SPIRType::UInt;
247	}
248	}
249
250	static uint32_t pls_format_to_components(PlsFormat format)
251	{
252	switch (format)
253	{
254	default:
255	case PlsR32F:
256	case PlsR32UI:
257	return `1`;
258
259	case PlsRG16F:
260	case PlsRG16:
261	case PlsRG16UI:
262	case PlsRG16I:
263	return `2`;
264
265	case PlsR11FG11FB10F:
266	return `3`;
267
268	case PlsRGB10A2:
269	case PlsRGBA8:
270	case PlsRGBA8I:
271	case PlsRGB10A2UI:
272	case PlsRGBA8UI:
273	return `4`;
274	}
275	}
276
277	const char CompilerGLSL::vector_swizzle(int* vecsize, int index)
278	{
279	static const char *const swizzle[`4`][`4`] = {
280	{ ".x", ".y", ".z", ".w" },
281	{ ".xy", ".yz", ".zw", nullptr },
282	{ ".xyz", ".yzw", nullptr, nullptr },
283	#if defined(__GNUC__) && (__GNUC__ == 9)
284	// This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947.
285	// This array ends up being compiled as all nullptrs, tripping the assertions below.
286	{ "", nullptr, nullptr, "$" },
287	#else
288	{ "", nullptr, nullptr, nullptr },
289	#endif
290	};
291
292	assert(vecsize >= `1` && vecsize <= `4`);
293	assert(index >= `0` && index < `4`);
294	assert(swizzle[vecsize - `1`][index]);
295
296	return swizzle[vecsize - `1`][index];
297	}
298
299	void CompilerGLSL::reset(uint32_t iteration_count)
300	{
301	// Sanity check the iteration count to be robust against a certain class of bugs where
302	// we keep forcing recompilations without making clear forward progress.
303	// In buggy situations we will loop forever, or loop for an unbounded number of iterations.
304	// Certain types of recompilations are considered to make forward progress,
305	// but in almost all situations, we'll never see more than 3 iterations.
306	// It is highly context-sensitive when we need to force recompilation,
307	// and it is not practical with the current architecture
308	// to resolve everything up front.
309	if (iteration_count >= `3` && !is_force_recompile_forward_progress)
310	SPIRV_CROSS_THROW("Over 3 compilation loops detected and no forward progress was made. Must be a bug!");
311
312	// We do some speculative optimizations which should pretty much always work out,
313	// but just in case the SPIR-V is rather weird, recompile until it's happy.
314	// This typically only means one extra pass.
315	clear_force_recompile();
316
317	// Clear invalid expression tracking.
318	invalid_expressions.clear();
319	current_function = nullptr;
320
321	// Clear temporary usage tracking.
322	expression_usage_counts.clear();
323	forwarded_temporaries.clear();
324	suppressed_usage_tracking.clear();
325
326	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
327	flushed_phi_variables.clear();
328
329	reset_name_caches();
330
331	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) {
332	func.active = false;
333	func.flush_undeclared = true;
334	});
335
336	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); });
337
338	ir.reset_all_of_type<SPIRExpression>();
339	ir.reset_all_of_type<SPIRAccessChain>();
340
341	statement_count = `0`;
342	indent = `0`;
343	current_loop_level = `0`;
344	}
345
346	void CompilerGLSL::remap_pls_variables()
347	{
348	for (auto &input : pls_inputs)
349	{
350	auto &var = get<SPIRVariable>(input.id);
351
352	bool input_is_target = false;
353	if (var.storage == StorageClassUniformConstant)
354	{
355	auto &type = get<SPIRType>(var.basetype);
356	input_is_target = type.image.dim == DimSubpassData;
357	}
358
359	if (var.storage != StorageClassInput && !input_is_target)
360	SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
361	var.remapped_variable = true;
362	}
363
364	for (auto &output : pls_outputs)
365	{
366	auto &var = get<SPIRVariable>(output.id);
367	if (var.storage != StorageClassOutput)
368	SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
369	var.remapped_variable = true;
370	}
371	}
372
373	void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent)
374	{
375	subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location });
376	inout_color_attachments.push_back({ color_location, coherent });
377	}
378
379	bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const
380	{
381	return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
382	[&](const std::pair<uint32_t, bool> &elem) {
383	return elem.first == location;
384	}) != end(inout_color_attachments);
385	}
386
387	bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const
388	{
389	return std::find_if(begin(inout_color_attachments), end(inout_color_attachments),
390	[&](const std::pair<uint32_t, bool> &elem) {
391	return elem.first == location && !elem.second;
392	}) != end(inout_color_attachments);
393	}
394
395	void CompilerGLSL::find_static_extensions()
396	{
397	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
398	if (type.basetype == SPIRType::Double)
399	{
400	if (options.es)
401	SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
402	if (!options.es && options.version < `400`)
403	require_extension_internal("GL_ARB_gpu_shader_fp64");
404	}
405	else if (type.basetype == SPIRType::Int64 \|\| type.basetype == SPIRType::UInt64)
406	{
407	if (options.es)
408	SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
409	if (!options.es)
410	require_extension_internal("GL_ARB_gpu_shader_int64");
411	}
412	else if (type.basetype == SPIRType::Half)
413	{
414	require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16");
415	if (options.vulkan_semantics)
416	require_extension_internal("GL_EXT_shader_16bit_storage");
417	}
418	else if (type.basetype == SPIRType::SByte \|\| type.basetype == SPIRType::UByte)
419	{
420	require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8");
421	if (options.vulkan_semantics)
422	require_extension_internal("GL_EXT_shader_8bit_storage");
423	}
424	else if (type.basetype == SPIRType::Short \|\| type.basetype == SPIRType::UShort)
425	{
426	require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16");
427	if (options.vulkan_semantics)
428	require_extension_internal("GL_EXT_shader_16bit_storage");
429	}
430	});
431
432	auto &execution = get_entry_point();
433	switch (execution.model)
434	{
435	case ExecutionModelGLCompute:
436	if (!options.es && options.version < `430`)
437	require_extension_internal("GL_ARB_compute_shader");
438	if (options.es && options.version < `310`)
439	SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
440	break;
441
442	case ExecutionModelGeometry:
443	if (options.es && options.version < `320`)
444	require_extension_internal("GL_EXT_geometry_shader");
445	if (!options.es && options.version < `150`)
446	require_extension_internal("GL_ARB_geometry_shader4");
447
448	if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != `1`)
449	{
450	// Instanced GS is part of 400 core or this extension.
451	if (!options.es && options.version < `400`)
452	require_extension_internal("GL_ARB_gpu_shader5");
453	}
454	break;
455
456	case ExecutionModelTessellationEvaluation:
457	case ExecutionModelTessellationControl:
458	if (options.es && options.version < `320`)
459	require_extension_internal("GL_EXT_tessellation_shader");
460	if (!options.es && options.version < `400`)
461	require_extension_internal("GL_ARB_tessellation_shader");
462	break;
463
464	case ExecutionModelRayGenerationKHR:
465	case ExecutionModelIntersectionKHR:
466	case ExecutionModelAnyHitKHR:
467	case ExecutionModelClosestHitKHR:
468	case ExecutionModelMissKHR:
469	case ExecutionModelCallableKHR:
470	// NV enums are aliases.
471	if (options.es \|\| options.version < `460`)
472	SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above.");
473	if (!options.vulkan_semantics)
474	SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics.");
475
476	// Need to figure out if we should target KHR or NV extension based on capabilities.
477	for (auto &cap : ir.declared_capabilities)
478	{
479	if (cap == CapabilityRayTracingKHR \|\| cap == CapabilityRayQueryKHR \|\|
480	cap == CapabilityRayTraversalPrimitiveCullingKHR)
481	{
482	ray_tracing_is_khr = true;
483	break;
484	}
485	}
486
487	if (ray_tracing_is_khr)
488	{
489	// In KHR ray tracing we pass payloads by pointer instead of location,
490	// so make sure we assign locations properly.
491	ray_tracing_khr_fixup_locations();
492	require_extension_internal("GL_EXT_ray_tracing");
493	}
494	else
495	require_extension_internal("GL_NV_ray_tracing");
496	break;
497
498	default:
499	break;
500	}
501
502	if (!pls_inputs.empty() \|\| !pls_outputs.empty())
503	{
504	if (execution.model != ExecutionModelFragment)
505	SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders.");
506	require_extension_internal("GL_EXT_shader_pixel_local_storage");
507	}
508
509	if (!inout_color_attachments.empty())
510	{
511	if (execution.model != ExecutionModelFragment)
512	SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders.");
513	if (options.vulkan_semantics)
514	SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL.");
515
516	bool has_coherent = false;
517	bool has_incoherent = false;
518
519	for (auto &att : inout_color_attachments)
520	{
521	if (att.second)
522	has_coherent = true;
523	else
524	has_incoherent = true;
525	}
526
527	if (has_coherent)
528	require_extension_internal("GL_EXT_shader_framebuffer_fetch");
529	if (has_incoherent)
530	require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent");
531	}
532
533	if (options.separate_shader_objects && !options.es && options.version < `410`)
534	require_extension_internal("GL_ARB_separate_shader_objects");
535
536	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
537	{
538	if (!options.vulkan_semantics)
539	SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
540	if (options.es && options.version < `320`)
541	SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320.");
542	else if (!options.es && options.version < `450`)
543	SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450.");
544	require_extension_internal("GL_EXT_buffer_reference");
545	}
546	else if (ir.addressing_model != AddressingModelLogical)
547	{
548	SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
549	}
550
551	// Check for nonuniform qualifier and passthrough.
552	// Instead of looping over all decorations to find this, just look at capabilities.
553	for (auto &cap : ir.declared_capabilities)
554	{
555	switch (cap)
556	{
557	case CapabilityShaderNonUniformEXT:
558	if (!options.vulkan_semantics)
559	require_extension_internal("GL_NV_gpu_shader5");
560	else
561	require_extension_internal("GL_EXT_nonuniform_qualifier");
562	break;
563	case CapabilityRuntimeDescriptorArrayEXT:
564	if (!options.vulkan_semantics)
565	SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL.");
566	require_extension_internal("GL_EXT_nonuniform_qualifier");
567	break;
568
569	case CapabilityGeometryShaderPassthroughNV:
570	if (execution.model == ExecutionModelGeometry)
571	{
572	require_extension_internal("GL_NV_geometry_shader_passthrough");
573	execution.geometry_passthrough = true;
574	}
575	break;
576
577	case CapabilityVariablePointers:
578	case CapabilityVariablePointersStorageBuffer:
579	SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL.");
580
581	case CapabilityMultiView:
582	if (options.vulkan_semantics)
583	require_extension_internal("GL_EXT_multiview");
584	else
585	{
586	require_extension_internal("GL_OVR_multiview2");
587	if (options.ovr_multiview_view_count == `0`)
588	SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2.");
589	if (get_execution_model() != ExecutionModelVertex)
590	SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
591	}
592	break;
593
594	case CapabilityRayQueryKHR:
595	if (options.es \|\| options.version < `460` \|\| !options.vulkan_semantics)
596	SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
597	require_extension_internal("GL_EXT_ray_query");
598	ray_tracing_is_khr = true;
599	break;
600
601	case CapabilityRayTraversalPrimitiveCullingKHR:
602	if (options.es \|\| options.version < `460` \|\| !options.vulkan_semantics)
603	SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460.");
604	require_extension_internal("GL_EXT_ray_flags_primitive_culling");
605	ray_tracing_is_khr = true;
606	break;
607
608	default:
609	break;
610	}
611	}
612
613	if (options.ovr_multiview_view_count)
614	{
615	if (options.vulkan_semantics)
616	SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics.");
617	if (get_execution_model() != ExecutionModelVertex)
618	SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders.");
619	require_extension_internal("GL_OVR_multiview2");
620	}
621	}
622
623	void CompilerGLSL::ray_tracing_khr_fixup_locations()
624	{
625	uint32_t location = `0`;
626	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
627	// Incoming payload storage can also be used for tracing.
628	if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR &&
629	var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR)
630	return;
631	if (is_hidden_variable(var))
632	return;
633	set_decoration(var.self, DecorationLocation, location++);
634	});
635	}
636
637	string CompilerGLSL::compile()
638	{
639	ir.fixup_reserved_names();
640
641	if (!options.vulkan_semantics)
642	{
643	// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
644	backend.nonuniform_qualifier = "";
645	backend.needs_row_major_load_workaround = true;
646	}
647	backend.allow_precision_qualifiers = options.vulkan_semantics \|\| options.es;
648	backend.force_gl_in_out_block = true;
649	backend.supports_extensions = true;
650	backend.use_array_constructor = true;
651	backend.workgroup_size_is_hidden = true;
652
653	backend.support_precise_qualifier = (!options.es && options.version >= `400`) \|\| (options.es && options.version >= `320`);
654
655	if (is_legacy_es())
656	backend.support_case_fallthrough = false;
657
658	// Scan the SPIR-V to find trivial uses of extensions.
659	fixup_type_alias();
660	reorder_type_alias();
661	build_function_control_flow_graphs_and_analyze();
662	find_static_extensions();
663	fixup_image_load_store_access();
664	update_active_builtins();
665	analyze_image_and_sampler_usage();
666	analyze_interlocked_resource_usage();
667	if (!inout_color_attachments.empty())
668	emit_inout_fragment_outputs_copy_to_subpass_inputs();
669
670	// Shaders might cast unrelated data to pointers of non-block types.
671	// Find all such instances and make sure we can cast the pointers to a synthesized block type.
672	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
673	analyze_non_block_pointer_types();
674
675	uint32_t pass_count = `0`;
676	do
677	{
678	reset(pass_count);
679
680	buffer.reset();
681
682	emit_header();
683	emit_resources();
684	emit_extension_workarounds(get_execution_model());
685
686	emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset ());
687
688	pass_count++;
689	} while (is_forcing_recompilation());
690
691	// Implement the interlocked wrapper function at the end.
692	// The body was implemented in lieu of main().
693	if (interlocked_is_complex)
694	{
695	statement("void main()");
696	begin_scope();
697	statement("// Interlocks were used in a way not compatible with GLSL, this is very slow.");
698	statement("SPIRV_Cross_beginInvocationInterlock();");
699	statement("spvMainInterlockedBody();");
700	statement("SPIRV_Cross_endInvocationInterlock();");
701	end_scope();
702	}
703
704	// Entry point in GLSL is always main().
705	get_entry_point().name = "main";
706
707	return buffer.str();
708	}
709
710	std::string CompilerGLSL::get_partial_source()
711	{
712	return buffer.str();
713	}
714
715	void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x,
716	const SpecializationConstant &wg_y, const SpecializationConstant &wg_z)
717	{
718	auto &execution = get_entry_point();
719	bool builtin_workgroup = execution.workgroup_size.constant != `0`;
720	bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId);
721
722	if (wg_x.id)
723	{
724	if (options.vulkan_semantics)
725	arguments.push_back(join("local_size_x_id = ", wg_x.constant_id));
726	else
727	arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name));
728	}
729	else if (use_local_size_id && execution.workgroup_size.id_x)
730	arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar()));
731	else
732	arguments.push_back(join("local_size_x = ", execution.workgroup_size.x));
733
734	if (wg_y.id)
735	{
736	if (options.vulkan_semantics)
737	arguments.push_back(join("local_size_y_id = ", wg_y.constant_id));
738	else
739	arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name));
740	}
741	else if (use_local_size_id && execution.workgroup_size.id_y)
742	arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar()));
743	else
744	arguments.push_back(join("local_size_y = ", execution.workgroup_size.y));
745
746	if (wg_z.id)
747	{
748	if (options.vulkan_semantics)
749	arguments.push_back(join("local_size_z_id = ", wg_z.constant_id));
750	else
751	arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name));
752	}
753	else if (use_local_size_id && execution.workgroup_size.id_z)
754	arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar()));
755	else
756	arguments.push_back(join("local_size_z = ", execution.workgroup_size.z));
757	}
758
759	void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature)
760	{
761	if (options.vulkan_semantics)
762	{
763	auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature);
764	require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension));
765	}
766	else
767	{
768	if (!shader_subgroup_supporter.is_feature_requested(feature))
769	force_recompile();
770	shader_subgroup_supporter.request_feature(feature);
771	}
772	}
773
774	void CompilerGLSL::emit_header()
775	{
776	auto &execution = get_entry_point();
777	statement("#version ", options.version, options.es && options.version > `100` ? " es" : "");
778
779	if (!options.es && options.version < `420`)
780	{
781	// Needed for binding = # on UBOs, etc.
782	if (options.enable_420pack_extension)
783	{
784	statement("#ifdef GL_ARB_shading_language_420pack");
785	statement("#extension GL_ARB_shading_language_420pack : require");
786	statement("#endif");
787	}
788	// Needed for: layout(early_fragment_tests) in;
789	if (execution.flags.get(ExecutionModeEarlyFragmentTests))
790	require_extension_internal("GL_ARB_shader_image_load_store");
791	}
792
793	// Needed for: layout(post_depth_coverage) in;
794	if (execution.flags.get(ExecutionModePostDepthCoverage))
795	require_extension_internal("GL_ARB_post_depth_coverage");
796
797	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
798	bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) \|\|
799	execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) \|\|
800	execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) \|\|
801	execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT);
802
803	if (interlock_used)
804	{
805	if (options.es)
806	{
807	if (options.version < `310`)
808	SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
809	require_extension_internal("GL_NV_fragment_shader_interlock");
810	}
811	else
812	{
813	if (options.version < `420`)
814	require_extension_internal("GL_ARB_shader_image_load_store");
815	require_extension_internal("GL_ARB_fragment_shader_interlock");
816	}
817	}
818
819	for (auto &ext : forced_extensions)
820	{
821	if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
822	{
823	// Special case, this extension has a potential fallback to another vendor extension in normal GLSL.
824	// GL_AMD_gpu_shader_half_float is a superset, so try that first.
825	statement("#if defined(GL_AMD_gpu_shader_half_float)");
826	statement("#extension GL_AMD_gpu_shader_half_float : require");
827	if (!options.vulkan_semantics)
828	{
829	statement("#elif defined(GL_NV_gpu_shader5)");
830	statement("#extension GL_NV_gpu_shader5 : require");
831	}
832	else
833	{
834	statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)");
835	statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require");
836	}
837	statement("#else");
838	statement("#error No extension available for FP16.");
839	statement("#endif");
840	}
841	else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16")
842	{
843	if (options.vulkan_semantics)
844	statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require");
845	else
846	{
847	statement("#if defined(GL_AMD_gpu_shader_int16)");
848	statement("#extension GL_AMD_gpu_shader_int16 : require");
849	statement("#elif defined(GL_NV_gpu_shader5)");
850	statement("#extension GL_NV_gpu_shader5 : require");
851	statement("#else");
852	statement("#error No extension available for Int16.");
853	statement("#endif");
854	}
855	}
856	else if (ext == "GL_ARB_post_depth_coverage")
857	{
858	if (options.es)
859	statement("#extension GL_EXT_post_depth_coverage : require");
860	else
861	{
862	statement("#if defined(GL_ARB_post_depth_coverge)");
863	statement("#extension GL_ARB_post_depth_coverage : require");
864	statement("#else");
865	statement("#extension GL_EXT_post_depth_coverage : require");
866	statement("#endif");
867	}
868	}
869	else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters")
870	{
871	// Soft-enable this extension on plain GLSL.
872	statement("#ifdef ", ext);
873	statement("#extension ", ext, " : enable");
874	statement("#endif");
875	}
876	else if (ext == "GL_EXT_control_flow_attributes")
877	{
878	// These are just hints so we can conditionally enable and fallback in the shader.
879	statement("#if defined(GL_EXT_control_flow_attributes)");
880	statement("#extension GL_EXT_control_flow_attributes : require");
881	statement("#define SPIRV_CROSS_FLATTEN [[flatten]]");
882	statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]");
883	statement("#define SPIRV_CROSS_UNROLL [[unroll]]");
884	statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]");
885	statement("#else");
886	statement("#define SPIRV_CROSS_FLATTEN");
887	statement("#define SPIRV_CROSS_BRANCH");
888	statement("#define SPIRV_CROSS_UNROLL");
889	statement("#define SPIRV_CROSS_LOOP");
890	statement("#endif");
891	}
892	else if (ext == "GL_NV_fragment_shader_interlock")
893	{
894	statement("#extension GL_NV_fragment_shader_interlock : require");
895	statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()");
896	statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()");
897	}
898	else if (ext == "GL_ARB_fragment_shader_interlock")
899	{
900	statement("#ifdef GL_ARB_fragment_shader_interlock");
901	statement("#extension GL_ARB_fragment_shader_interlock : enable");
902	statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()");
903	statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()");
904	statement("#elif defined(GL_INTEL_fragment_shader_ordering)");
905	statement("#extension GL_INTEL_fragment_shader_ordering : enable");
906	statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()");
907	statement("#define SPIRV_Cross_endInvocationInterlock()");
908	statement("#endif");
909	}
910	else
911	statement("#extension ", ext, " : require");
912	}
913
914	if (!options.vulkan_semantics)
915	{
916	using Supp = ShaderSubgroupSupportHelper;
917	auto result = shader_subgroup_supporter.resolve();
918
919	for (uint32_t feature_index = `0`; feature_index < Supp::FeatureCount; feature_index++)
920	{
921	auto feature = static_cast<Supp::Feature>(feature_index);
922	if (!shader_subgroup_supporter.is_feature_requested(feature))
923	continue;
924
925	auto exts = Supp::get_candidates_for_feature(feature, result);
926	if (exts.empty())
927	continue;
928
929	statement("");
930
931	for (auto &ext : exts)
932	{
933	const char *name = Supp::get_extension_name(ext);
934	const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext);
935	auto extra_names = Supp::get_extra_required_extension_names(ext);
936	statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")",
937	(*extra_predicate != `'\0'` ? " && " : ""), extra_predicate);
938	for (const auto &e : extra_names)
939	statement("#extension ", e, " : enable");
940	statement("#extension ", name, " : require");
941	}
942
943	if (!Supp::can_feature_be_implemented_without_extensions(feature))
944	{
945	statement("#else");
946	statement("#error No extensions available to emulate requested subgroup feature.");
947	}
948
949	statement("#endif");
950	}
951	}
952
953	for (auto &header : header_lines)
954	statement(header);
955
956	SmallVector<string> inputs;
957	SmallVector<string> outputs;
958
959	switch (execution.model)
960	{
961	case ExecutionModelVertex:
962	if (options.ovr_multiview_view_count)
963	inputs.push_back(join("num_views = ", options.ovr_multiview_view_count));
964	break;
965	case ExecutionModelGeometry:
966	if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != `1`)
967	inputs.push_back(join("invocations = ", execution.invocations));
968	if (execution.flags.get(ExecutionModeInputPoints))
969	inputs.push_back("points");
970	if (execution.flags.get(ExecutionModeInputLines))
971	inputs.push_back("lines");
972	if (execution.flags.get(ExecutionModeInputLinesAdjacency))
973	inputs.push_back("lines_adjacency");
974	if (execution.flags.get(ExecutionModeTriangles))
975	inputs.push_back("triangles");
976	if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
977	inputs.push_back("triangles_adjacency");
978
979	if (!execution.geometry_passthrough)
980	{
981	// For passthrough, these are implies and cannot be declared in shader.
982	outputs.push_back(join("max_vertices = ", execution.output_vertices));
983	if (execution.flags.get(ExecutionModeOutputTriangleStrip))
984	outputs.push_back("triangle_strip");
985	if (execution.flags.get(ExecutionModeOutputPoints))
986	outputs.push_back("points");
987	if (execution.flags.get(ExecutionModeOutputLineStrip))
988	outputs.push_back("line_strip");
989	}
990	break;
991
992	case ExecutionModelTessellationControl:
993	if (execution.flags.get(ExecutionModeOutputVertices))
994	outputs.push_back(join("vertices = ", execution.output_vertices));
995	break;
996
997	case ExecutionModelTessellationEvaluation:
998	if (execution.flags.get(ExecutionModeQuads))
999	inputs.push_back("quads");
1000	if (execution.flags.get(ExecutionModeTriangles))
1001	inputs.push_back("triangles");
1002	if (execution.flags.get(ExecutionModeIsolines))
1003	inputs.push_back("isolines");
1004	if (execution.flags.get(ExecutionModePointMode))
1005	inputs.push_back("point_mode");
1006
1007	if (!execution.flags.get(ExecutionModeIsolines))
1008	{
1009	if (execution.flags.get(ExecutionModeVertexOrderCw))
1010	inputs.push_back("cw");
1011	if (execution.flags.get(ExecutionModeVertexOrderCcw))
1012	inputs.push_back("ccw");
1013	}
1014
1015	if (execution.flags.get(ExecutionModeSpacingFractionalEven))
1016	inputs.push_back("fractional_even_spacing");
1017	if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
1018	inputs.push_back("fractional_odd_spacing");
1019	if (execution.flags.get(ExecutionModeSpacingEqual))
1020	inputs.push_back("equal_spacing");
1021	break;
1022
1023	case ExecutionModelGLCompute:
1024	{
1025	if (execution.workgroup_size.constant != `0` \|\| execution.flags.get(ExecutionModeLocalSizeId))
1026	{
1027	SpecializationConstant wg_x, wg_y, wg_z;
1028	get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
1029
1030	// If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro
1031	// declarations before we can emit the work group size.
1032	if (options.vulkan_semantics \|\|
1033	((wg_x.id == ConstantID (`0`)) && (wg_y.id == ConstantID (`0`)) && (wg_z.id == ConstantID (`0`))))
1034	build_workgroup_size(inputs, wg_x, wg_y, wg_z);
1035	}
1036	else
1037	{
1038	inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
1039	inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
1040	inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
1041	}
1042	break;
1043	}
1044
1045	case ExecutionModelFragment:
1046	if (options.es)
1047	{
1048	switch (options.fragment.default_float_precision)
1049	{
1050	case Options::Lowp:
1051	statement("precision lowp float;");
1052	break;
1053
1054	case Options::Mediump:
1055	statement("precision mediump float;");
1056	break;
1057
1058	case Options::Highp:
1059	statement("precision highp float;");
1060	break;
1061
1062	default:
1063	break;
1064	}
1065
1066	switch (options.fragment.default_int_precision)
1067	{
1068	case Options::Lowp:
1069	statement("precision lowp int;");
1070	break;
1071
1072	case Options::Mediump:
1073	statement("precision mediump int;");
1074	break;
1075
1076	case Options::Highp:
1077	statement("precision highp int;");
1078	break;
1079
1080	default:
1081	break;
1082	}
1083	}
1084
1085	if (execution.flags.get(ExecutionModeEarlyFragmentTests))
1086	inputs.push_back("early_fragment_tests");
1087	if (execution.flags.get(ExecutionModePostDepthCoverage))
1088	inputs.push_back("post_depth_coverage");
1089
1090	if (interlock_used)
1091	statement("#if defined(GL_ARB_fragment_shader_interlock)");
1092
1093	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
1094	statement("layout(pixel_interlock_ordered) in;");
1095	else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
1096	statement("layout(pixel_interlock_unordered) in;");
1097	else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
1098	statement("layout(sample_interlock_ordered) in;");
1099	else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
1100	statement("layout(sample_interlock_unordered) in;");
1101
1102	if (interlock_used)
1103	{
1104	statement("#elif !defined(GL_INTEL_fragment_shader_ordering)");
1105	statement("#error Fragment Shader Interlock/Ordering extension missing!");
1106	statement("#endif");
1107	}
1108
1109	if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
1110	statement("layout(depth_greater) out float gl_FragDepth;");
1111	else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
1112	statement("layout(depth_less) out float gl_FragDepth;");
1113
1114	break;
1115
1116	default:
1117	break;
1118	}
1119
1120	for (auto &cap : ir.declared_capabilities)
1121	if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
1122	statement("layout(primitive_culling);");
1123
1124	if (!inputs.empty())
1125	statement("layout(", merge(inputs), ") in;");
1126	if (!outputs.empty())
1127	statement("layout(", merge(outputs), ") out;");
1128
1129	statement("");
1130	}
1131
1132	bool CompilerGLSL::type_is_empty(const SPIRType &type)
1133	{
1134	return type.basetype == SPIRType::Struct && type.member_types.empty();
1135	}
1136
1137	void CompilerGLSL::emit_struct(SPIRType &type)
1138	{
1139	// Struct types can be stamped out multiple times
1140	// with just different offsets, matrix layouts, etc ...
1141	// Type-punning with these types is legal, which complicates things
1142	// when we are storing struct and array types in an SSBO for example.
1143	// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
1144	if (type.type_alias != TypeID (`0`) &&
1145	!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
1146	return;
1147
1148	add_resource_name(type.self);
1149	auto name = type_to_glsl(type);
1150
1151	statement(!backend.explicit_struct_type ? "struct " : "", name);
1152	begin_scope();
1153
1154	type.member_name_cache.clear();
1155
1156	uint32_t i = `0`;
1157	bool emitted = false;
1158	for (auto &member : type.member_types)
1159	{
1160	add_member_name(type, i);
1161	emit_struct_member(type, member, i);
1162	i++;
1163	emitted = true;
1164	}
1165
1166	// Don't declare empty structs in GLSL, this is not allowed.
1167	if (type_is_empty(type) && !backend.supports_empty_struct)
1168	{
1169	statement("int empty_struct_member;");
1170	emitted = true;
1171	}
1172
1173	if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget))
1174	emit_struct_padding_target(type);
1175
1176	end_scope_decl();
1177
1178	if (emitted)
1179	statement("");
1180	}
1181
1182	string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
1183	{
1184	string res;
1185	//if (flags & (1ull << DecorationSmooth))
1186	// res += "smooth ";
1187	if (flags.get(DecorationFlat))
1188	res += "flat ";
1189	if (flags.get(DecorationNoPerspective))
1190	res += "noperspective ";
1191	if (flags.get(DecorationCentroid))
1192	res += "centroid ";
1193	if (flags.get(DecorationPatch))
1194	res += "patch ";
1195	if (flags.get(DecorationSample))
1196	res += "sample ";
1197	if (flags.get(DecorationInvariant))
1198	res += "invariant ";
1199
1200	if (flags.get(DecorationExplicitInterpAMD))
1201	{
1202	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
1203	res += "__explicitInterpAMD ";
1204	}
1205
1206	if (flags.get(DecorationPerVertexNV))
1207	{
1208	if (options.es && options.version < `320`)
1209	SPIRV_CROSS_THROW("pervertexNV requires ESSL 320.");
1210	else if (!options.es && options.version < `450`)
1211	SPIRV_CROSS_THROW("pervertexNV requires GLSL 450.");
1212	require_extension_internal("GL_NV_fragment_shader_barycentric");
1213	res += "pervertexNV ";
1214	}
1215
1216	return res;
1217	}
1218
1219	string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
1220	{
1221	if (is_legacy())
1222	return "";
1223
1224	bool is_block = has_decoration(type.self, DecorationBlock) \|\| has_decoration(type.self, DecorationBufferBlock);
1225	if (!is_block)
1226	return "";
1227
1228	auto &memb = ir.meta [type.self].members;
1229	if (index >= memb.size())
1230	return "";
1231	auto &dec = memb [index];
1232
1233	SmallVector<string> attr;
1234
1235	if (has_member_decoration(type.self, index, DecorationPassthroughNV))
1236	attr.push_back("passthrough");
1237
1238	// We can only apply layouts on members in block interfaces.
1239	// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
1240	// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
1241	// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
1242	//
1243	// We would like to go from (SPIR-V style):
1244	//
1245	// struct Foo { layout(row_major) mat4 matrix; };
1246	// buffer UBO { Foo foo; };
1247	//
1248	// to
1249	//
1250	// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
1251	// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
1252	auto flags = combined_decoration_for_member(type, index);
1253
1254	if (flags.get(DecorationRowMajor))
1255	attr.push_back("row_major");
1256	// We don't emit any global layouts, so column_major is default.
1257	//if (flags & (1ull << DecorationColMajor))
1258	// attr.push_back("column_major");
1259
1260	if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
1261	attr.push_back(join("location = ", dec.location));
1262
1263	// Can only declare component if we can declare location.
1264	if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true))
1265	{
1266	if (!options.es)
1267	{
1268	if (options.version < `440` && options.version >= `140`)
1269	require_extension_internal("GL_ARB_enhanced_layouts");
1270	else if (options.version < `140`)
1271	SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
1272	attr.push_back(join("component = ", dec.component));
1273	}
1274	else
1275	SPIRV_CROSS_THROW("Component decoration is not supported in ES targets.");
1276	}
1277
1278	// SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
1279	// This is only done selectively in GLSL as needed.
1280	if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) &&
1281	dec.decoration_flags.get(DecorationOffset))
1282	attr.push_back(join("offset = ", dec.offset));
1283	else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset))
1284	attr.push_back(join("xfb_offset = ", dec.offset));
1285
1286	if (attr.empty())
1287	return "";
1288
1289	string res = "layout(";
1290	res += merge(attr);
1291	res += ") ";
1292	return res;
1293	}
1294
1295	const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
1296	{
1297	if (options.es && is_desktop_only_format(format))
1298	SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
1299
1300	switch (format)
1301	{
1302	case ImageFormatRgba32f:
1303	return "rgba32f";
1304	case ImageFormatRgba16f:
1305	return "rgba16f";
1306	case ImageFormatR32f:
1307	return "r32f";
1308	case ImageFormatRgba8:
1309	return "rgba8";
1310	case ImageFormatRgba8Snorm:
1311	return "rgba8_snorm";
1312	case ImageFormatRg32f:
1313	return "rg32f";
1314	case ImageFormatRg16f:
1315	return "rg16f";
1316	case ImageFormatRgba32i:
1317	return "rgba32i";
1318	case ImageFormatRgba16i:
1319	return "rgba16i";
1320	case ImageFormatR32i:
1321	return "r32i";
1322	case ImageFormatRgba8i:
1323	return "rgba8i";
1324	case ImageFormatRg32i:
1325	return "rg32i";
1326	case ImageFormatRg16i:
1327	return "rg16i";
1328	case ImageFormatRgba32ui:
1329	return "rgba32ui";
1330	case ImageFormatRgba16ui:
1331	return "rgba16ui";
1332	case ImageFormatR32ui:
1333	return "r32ui";
1334	case ImageFormatRgba8ui:
1335	return "rgba8ui";
1336	case ImageFormatRg32ui:
1337	return "rg32ui";
1338	case ImageFormatRg16ui:
1339	return "rg16ui";
1340	case ImageFormatR11fG11fB10f:
1341	return "r11f_g11f_b10f";
1342	case ImageFormatR16f:
1343	return "r16f";
1344	case ImageFormatRgb10A2:
1345	return "rgb10_a2";
1346	case ImageFormatR8:
1347	return "r8";
1348	case ImageFormatRg8:
1349	return "rg8";
1350	case ImageFormatR16:
1351	return "r16";
1352	case ImageFormatRg16:
1353	return "rg16";
1354	case ImageFormatRgba16:
1355	return "rgba16";
1356	case ImageFormatR16Snorm:
1357	return "r16_snorm";
1358	case ImageFormatRg16Snorm:
1359	return "rg16_snorm";
1360	case ImageFormatRgba16Snorm:
1361	return "rgba16_snorm";
1362	case ImageFormatR8Snorm:
1363	return "r8_snorm";
1364	case ImageFormatRg8Snorm:
1365	return "rg8_snorm";
1366	case ImageFormatR8ui:
1367	return "r8ui";
1368	case ImageFormatRg8ui:
1369	return "rg8ui";
1370	case ImageFormatR16ui:
1371	return "r16ui";
1372	case ImageFormatRgb10a2ui:
1373	return "rgb10_a2ui";
1374	case ImageFormatR8i:
1375	return "r8i";
1376	case ImageFormatRg8i:
1377	return "rg8i";
1378	case ImageFormatR16i:
1379	return "r16i";
1380	default:
1381	case ImageFormatUnknown:
1382	return nullptr;
1383	}
1384	}
1385
1386	uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
1387	{
1388	switch (type.basetype)
1389	{
1390	case SPIRType::Double:
1391	case SPIRType::Int64:
1392	case SPIRType::UInt64:
1393	return `8`;
1394	case SPIRType::Float:
1395	case SPIRType::Int:
1396	case SPIRType::UInt:
1397	return `4`;
1398	case SPIRType::Half:
1399	case SPIRType::Short:
1400	case SPIRType::UShort:
1401	return `2`;
1402	case SPIRType::SByte:
1403	case SPIRType::UByte:
1404	return `1`;
1405
1406	default:
1407	SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
1408	}
1409	}
1410
1411	uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
1412	BufferPackingStandard packing)
1413	{
1414	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1415	// and is 64-bit.
1416	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1417	{
1418	if (!type.pointer)
1419	SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1420
1421	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1422	{
1423	if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
1424	return `16`;
1425	else
1426	return `8`;
1427	}
1428	else
1429	SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1430	}
1431
1432	if (!type.array.empty())
1433	{
1434	uint32_t minimum_alignment = `1`;
1435	if (packing_is_vec4_padded(packing))
1436	minimum_alignment = `16`;
1437
1438	auto *tmp = &get<SPIRType>(type.parent_type);
1439	while (!tmp->array.empty())
1440	tmp = &get<SPIRType>(tmp->parent_type);
1441
1442	// Get the alignment of the base type, then maybe round up.
1443	return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
1444	}
1445
1446	if (type.basetype == SPIRType::Struct)
1447	{
1448	// Rule 9. Structs alignments are maximum alignment of its members.
1449	uint32_t alignment = `1`;
1450	for (uint32_t i = `0`; i < type.member_types.size(); i++)
1451	{
1452	auto member_flags = ir.meta [type.self].members [i].decoration_flags;
1453	alignment =
1454	max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types [i]), member_flags, packing));
1455	}
1456
1457	// In std140, struct alignment is rounded up to 16.
1458	if (packing_is_vec4_padded(packing))
1459	alignment = max(alignment, `16u`);
1460
1461	return alignment;
1462	}
1463	else
1464	{
1465	const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1466
1467	// Alignment requirement for scalar block layout is always the alignment for the most basic component.
1468	if (packing_is_scalar(packing))
1469	return base_alignment;
1470
1471	// Vectors are not* aligned in HLSL, but there's an extra rule where vectors cannot straddle*
1472	// a vec4, this is handled outside since that part knows our current offset.
1473	if (type.columns == `1` && packing_is_hlsl(packing))
1474	return base_alignment;
1475
1476	// From 7.6.2.2 in GL 4.5 core spec.
1477	// Rule 1
1478	if (type.vecsize == `1` && type.columns == `1`)
1479	return base_alignment;
1480
1481	// Rule 2
1482	if ((type.vecsize == `2` \|\| type.vecsize == `4`) && type.columns == `1`)
1483	return type.vecsize * base_alignment;
1484
1485	// Rule 3
1486	if (type.vecsize == `3` && type.columns == `1`)
1487	return `4` * base_alignment;
1488
1489	// Rule 4 implied. Alignment does not change in std430.
1490
1491	// Rule 5. Column-major matrices are stored as arrays of
1492	// vectors.
1493	if (flags.get(DecorationColMajor) && type.columns > `1`)
1494	{
1495	if (packing_is_vec4_padded(packing))
1496	return `4` * base_alignment;
1497	else if (type.vecsize == `3`)
1498	return `4` * base_alignment;
1499	else
1500	return type.vecsize * base_alignment;
1501	}
1502
1503	// Rule 6 implied.
1504
1505	// Rule 7.
1506	if (flags.get(DecorationRowMajor) && type.vecsize > `1`)
1507	{
1508	if (packing_is_vec4_padded(packing))
1509	return `4` * base_alignment;
1510	else if (type.columns == `3`)
1511	return `4` * base_alignment;
1512	else
1513	return type.columns * base_alignment;
1514	}
1515
1516	// Rule 8 implied.
1517	}
1518
1519	SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
1520	}
1521
1522	uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
1523	BufferPackingStandard packing)
1524	{
1525	// Array stride is equal to aligned size of the underlying type.
1526	uint32_t parent = type.parent_type;
1527	assert(parent);
1528
1529	auto &tmp = get<SPIRType>(parent);
1530
1531	uint32_t size = type_to_packed_size(tmp, flags, packing);
1532	uint32_t alignment = type_to_packed_alignment(type, flags, packing);
1533	return (size + alignment - `1`) & ~(alignment - `1`);
1534	}
1535
1536	uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
1537	{
1538	if (!type.array.empty())
1539	{
1540	uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing);
1541
1542	// For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size,
1543	// so that it is possible to pack other vectors into the last element.
1544	if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct)
1545	packed_size -= (`4` - type.vecsize) * (type.width / `8`);
1546
1547	return packed_size;
1548	}
1549
1550	// If using PhysicalStorageBufferEXT storage class, this is a pointer,
1551	// and is 64-bit.
1552	if (type.storage == StorageClassPhysicalStorageBufferEXT)
1553	{
1554	if (!type.pointer)
1555	SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
1556
1557	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
1558	return `8`;
1559	else
1560	SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
1561	}
1562
1563	uint32_t size = `0`;
1564
1565	if (type.basetype == SPIRType::Struct)
1566	{
1567	uint32_t pad_alignment = `1`;
1568
1569	for (uint32_t i = `0`; i < type.member_types.size(); i++)
1570	{
1571	auto member_flags = ir.meta [type.self].members [i].decoration_flags;
1572	auto &member_type = get<SPIRType>(type.member_types [i]);
1573
1574	uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
1575	uint32_t alignment = max(packed_alignment, pad_alignment);
1576
1577	// The next member following a struct member is aligned to the base alignment of the struct that came before.
1578	// GL 4.5 spec, 7.6.2.2.
1579	if (member_type.basetype == SPIRType::Struct)
1580	pad_alignment = packed_alignment;
1581	else
1582	pad_alignment = `1`;
1583
1584	size = (size + alignment - `1`) & ~(alignment - `1`);
1585	size += type_to_packed_size(member_type, member_flags, packing);
1586	}
1587	}
1588	else
1589	{
1590	const uint32_t base_alignment = type_to_packed_base_size(type, packing);
1591
1592	if (packing_is_scalar(packing))
1593	{
1594	size = type.vecsize * type.columns * base_alignment;
1595	}
1596	else
1597	{
1598	if (type.columns == `1`)
1599	size = type.vecsize * base_alignment;
1600
1601	if (flags.get(DecorationColMajor) && type.columns > `1`)
1602	{
1603	if (packing_is_vec4_padded(packing))
1604	size = type.columns * `4` * base_alignment;
1605	else if (type.vecsize == `3`)
1606	size = type.columns * `4` * base_alignment;
1607	else
1608	size = type.columns * type.vecsize * base_alignment;
1609	}
1610
1611	if (flags.get(DecorationRowMajor) && type.vecsize > `1`)
1612	{
1613	if (packing_is_vec4_padded(packing))
1614	size = type.vecsize * `4` * base_alignment;
1615	else if (type.columns == `3`)
1616	size = type.vecsize * `4` * base_alignment;
1617	else
1618	size = type.vecsize * type.columns * base_alignment;
1619	}
1620
1621	// For matrices in HLSL, the last element has a size which depends on its vector size,
1622	// so that it is possible to pack other vectors into the last element.
1623	if (packing_is_hlsl(packing) && type.columns > `1`)
1624	size -= (`4` - type.vecsize) * (type.width / `8`);
1625	}
1626	}
1627
1628	return size;
1629	}
1630
1631	bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
1632	uint32_t *failed_validation_index, uint32_t start_offset,
1633	uint32_t end_offset)
1634	{
1635	// This is very tricky and error prone, but try to be exhaustive and correct here.
1636	// SPIR-V doesn't directly say if we're using std430 or std140.
1637	// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
1638	// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
1639	// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
1640	//
1641	// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
1642	// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
1643	//
1644	// The only two differences between std140 and std430 are related to padding alignment/array stride
1645	// in arrays and structs. In std140 they take minimum vec4 alignment.
1646	// std430 only removes the vec4 requirement.
1647
1648	uint32_t offset = `0`;
1649	uint32_t pad_alignment = `1`;
1650
1651	bool is_top_level_block =
1652	has_decoration(type.self, DecorationBlock) \|\| has_decoration(type.self, DecorationBufferBlock);
1653
1654	for (uint32_t i = `0`; i < type.member_types.size(); i++)
1655	{
1656	auto &memb_type = get<SPIRType>(type.member_types [i]);
1657	auto member_flags = ir.meta [type.self].members [i].decoration_flags;
1658
1659	// Verify alignment rules.
1660	uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
1661
1662	// This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g:
1663	// layout(constant_id = 0) const int s = 10;
1664	// const int S = s + 5; // SpecConstantOp
1665	// buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here,
1666	// we would need full implementation of compile-time constant folding. :(
1667	// If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant
1668	// for our analysis (e.g. unsized arrays).
1669	// This lets us simply ignore that there are spec constant op sized arrays in our buffers.
1670	// Querying size of this member will fail, so just don't call it unless we have to.
1671	//
1672	// This is likely "best effort" we can support without going into unacceptably complicated workarounds.
1673	bool member_can_be_unsized =
1674	is_top_level_block && size_t(i + `1`) == type.member_types.size() && !memb_type.array.empty();
1675
1676	uint32_t packed_size = `0`;
1677	if (!member_can_be_unsized \|\| packing_is_hlsl(packing))
1678	packed_size = type_to_packed_size(memb_type, member_flags, packing);
1679
1680	// We only need to care about this if we have non-array types which can straddle the vec4 boundary.
1681	if (packing_is_hlsl(packing))
1682	{
1683	// If a member straddles across a vec4 boundary, alignment is actually vec4.
1684	uint32_t begin_word = offset / `16`;
1685	uint32_t end_word = (offset + packed_size - `1`) / `16`;
1686	if (begin_word != end_word)
1687	packed_alignment = max(packed_alignment, `16u`);
1688	}
1689
1690	uint32_t actual_offset = type_struct_member_offset(type, i);
1691	// Field is not in the specified range anymore and we can ignore any further fields.
1692	if (actual_offset >= end_offset)
1693	break;
1694
1695	uint32_t alignment = max(packed_alignment, pad_alignment);
1696	offset = (offset + alignment - `1`) & ~(alignment - `1`);
1697
1698	// The next member following a struct member is aligned to the base alignment of the struct that came before.
1699	// GL 4.5 spec, 7.6.2.2.
1700	if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer)
1701	pad_alignment = packed_alignment;
1702	else
1703	pad_alignment = `1`;
1704
1705	// Only care about packing if we are in the given range
1706	if (actual_offset >= start_offset)
1707	{
1708	// We only care about offsets in std140, std430, etc ...
1709	// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
1710	if (!packing_has_flexible_offset(packing))
1711	{
1712	if (actual_offset != offset) // This cannot be the packing we're looking for.
1713	{
1714	if (failed_validation_index)
1715	*failed_validation_index = i;
1716	return false;
1717	}
1718	}
1719	else if ((actual_offset & (alignment - `1`)) != `0`)
1720	{
1721	// We still need to verify that alignment rules are observed, even if we have explicit offset.
1722	if (failed_validation_index)
1723	*failed_validation_index = i;
1724	return false;
1725	}
1726
1727	// Verify array stride rules.
1728	if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
1729	type_struct_member_array_stride(type, i))
1730	{
1731	if (failed_validation_index)
1732	*failed_validation_index = i;
1733	return false;
1734	}
1735
1736	// Verify that sub-structs also follow packing rules.
1737	// We cannot use enhanced layouts on substructs, so they better be up to spec.
1738	auto substruct_packing = packing_to_substruct_packing(packing);
1739
1740	if (!memb_type.pointer && !memb_type.member_types.empty() &&
1741	!buffer_is_packing_standard(memb_type, substruct_packing))
1742	{
1743	if (failed_validation_index)
1744	*failed_validation_index = i;
1745	return false;
1746	}
1747	}
1748
1749	// Bump size.
1750	offset = actual_offset + packed_size;
1751	}
1752
1753	return true;
1754	}
1755
1756	bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
1757	{
1758	// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
1759	// Be very explicit here about how to solve the issue.
1760	if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) \|\|
1761	(get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
1762	{
1763	uint32_t minimum_desktop_version = block ? `440` : `410`;
1764	// ARB_enhanced_layouts vs ARB_separate_shader_objects ...
1765
1766	if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
1767	return false;
1768	else if (options.es && options.version < `310`)
1769	return false;
1770	}
1771
1772	if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) \|\|
1773	(get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
1774	{
1775	if (options.es && options.version < `300`)
1776	return false;
1777	else if (!options.es && options.version < `330`)
1778	return false;
1779	}
1780
1781	if (storage == StorageClassUniform \|\| storage == StorageClassUniformConstant \|\| storage == StorageClassPushConstant)
1782	{
1783	if (options.es && options.version < `310`)
1784	return false;
1785	else if (!options.es && options.version < `430`)
1786	return false;
1787	}
1788
1789	return true;
1790	}
1791
1792	string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
1793	{
1794	// FIXME: Come up with a better solution for when to disable layouts.
1795	// Having layouts depend on extensions as well as which types
1796	// of layouts are used. For now, the simple solution is to just disable
1797	// layouts for legacy versions.
1798	if (is_legacy())
1799	return "";
1800
1801	if (subpass_input_is_framebuffer_fetch(var.self))
1802	return "";
1803
1804	SmallVector<string> attr;
1805
1806	auto &type = get<SPIRType>(var.basetype);
1807	auto &flags = get_decoration_bitset(var.self);
1808	auto &typeflags = get_decoration_bitset(type.self);
1809
1810	if (flags.get(DecorationPassthroughNV))
1811	attr.push_back("passthrough");
1812
1813	if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
1814	attr.push_back("push_constant");
1815	else if (var.storage == StorageClassShaderRecordBufferKHR)
1816	attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV");
1817
1818	if (flags.get(DecorationRowMajor))
1819	attr.push_back("row_major");
1820	if (flags.get(DecorationColMajor))
1821	attr.push_back("column_major");
1822
1823	if (options.vulkan_semantics)
1824	{
1825	if (flags.get(DecorationInputAttachmentIndex))
1826	attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex)));
1827	}
1828
1829	bool is_block = has_decoration(type.self, DecorationBlock);
1830	if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
1831	{
1832	Bitset combined_decoration;
1833	for (uint32_t i = `0`; i < ir.meta [type.self].members.size(); i++)
1834	combined_decoration.merge_or(combined_decoration_for_member(type, i));
1835
1836	// If our members have location decorations, we don't need to
1837	// emit location decorations at the top as well (looks weird).
1838	if (!combined_decoration.get(DecorationLocation))
1839	attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation)));
1840	}
1841
1842	if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput &&
1843	location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation)))
1844	{
1845	attr.push_back("noncoherent");
1846	}
1847
1848	// Transform feedback
1849	bool uses_enhanced_layouts = false;
1850	if (is_block && var.storage == StorageClassOutput)
1851	{
1852	// For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself,
1853	// since all members must match the same xfb_buffer. The only thing we will declare for members of the block
1854	// is the xfb_offset.
1855	uint32_t member_count = uint32_t(type.member_types.size());
1856	bool have_xfb_buffer_stride = false;
1857	bool have_any_xfb_offset = false;
1858	bool have_geom_stream = false;
1859	uint32_t xfb_stride = `0`, xfb_buffer = `0`, geom_stream = `0`;
1860
1861	if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride))
1862	{
1863	have_xfb_buffer_stride = true;
1864	xfb_buffer = get_decoration(var.self, DecorationXfbBuffer);
1865	xfb_stride = get_decoration(var.self, DecorationXfbStride);
1866	}
1867
1868	if (flags.get(DecorationStream))
1869	{
1870	have_geom_stream = true;
1871	geom_stream = get_decoration(var.self, DecorationStream);
1872	}
1873
1874	// Verify that none of the members violate our assumption.
1875	for (uint32_t i = `0`; i < member_count; i++)
1876	{
1877	if (has_member_decoration(type.self, i, DecorationStream))
1878	{
1879	uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream);
1880	if (have_geom_stream && member_geom_stream != geom_stream)
1881	SPIRV_CROSS_THROW("IO block member Stream mismatch.");
1882	have_geom_stream = true;
1883	geom_stream = member_geom_stream;
1884	}
1885
1886	// Only members with an Offset decoration participate in XFB.
1887	if (!has_member_decoration(type.self, i, DecorationOffset))
1888	continue;
1889	have_any_xfb_offset = true;
1890
1891	if (has_member_decoration(type.self, i, DecorationXfbBuffer))
1892	{
1893	uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer);
1894	if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
1895	SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
1896	have_xfb_buffer_stride = true;
1897	xfb_buffer = buffer_index;
1898	}
1899
1900	if (has_member_decoration(type.self, i, DecorationXfbStride))
1901	{
1902	uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride);
1903	if (have_xfb_buffer_stride && stride != xfb_stride)
1904	SPIRV_CROSS_THROW("IO block member XfbStride mismatch.");
1905	have_xfb_buffer_stride = true;
1906	xfb_stride = stride;
1907	}
1908	}
1909
1910	if (have_xfb_buffer_stride && have_any_xfb_offset)
1911	{
1912	attr.push_back(join("xfb_buffer = ", xfb_buffer));
1913	attr.push_back(join("xfb_stride = ", xfb_stride));
1914	uses_enhanced_layouts = true;
1915	}
1916
1917	if (have_geom_stream)
1918	{
1919	if (get_execution_model() != ExecutionModelGeometry)
1920	SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1921	if (options.es)
1922	SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1923	if (options.version < `400`)
1924	require_extension_internal("GL_ARB_transform_feedback3");
1925	attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1926	}
1927	}
1928	else if (var.storage == StorageClassOutput)
1929	{
1930	if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset))
1931	{
1932	// XFB for standalone variables, we can emit all decorations.
1933	attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer)));
1934	attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride)));
1935	attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset)));
1936	uses_enhanced_layouts = true;
1937	}
1938
1939	if (flags.get(DecorationStream))
1940	{
1941	if (get_execution_model() != ExecutionModelGeometry)
1942	SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
1943	if (options.es)
1944	SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
1945	if (options.version < `400`)
1946	require_extension_internal("GL_ARB_transform_feedback3");
1947	attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream)));
1948	}
1949	}
1950
1951	// Can only declare Component if we can declare location.
1952	if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block))
1953	{
1954	uses_enhanced_layouts = true;
1955	attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent)));
1956	}
1957
1958	if (uses_enhanced_layouts)
1959	{
1960	if (!options.es)
1961	{
1962	if (options.version < `440` && options.version >= `140`)
1963	require_extension_internal("GL_ARB_enhanced_layouts");
1964	else if (options.version < `140`)
1965	SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40.");
1966	if (!options.es && options.version < `440`)
1967	require_extension_internal("GL_ARB_enhanced_layouts");
1968	}
1969	else if (options.es)
1970	SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL.");
1971	}
1972
1973	if (flags.get(DecorationIndex))
1974	attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex)));
1975
1976	// Do not emit set = decoration in regular GLSL output, but
1977	// we need to preserve it in Vulkan GLSL mode.
1978	if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR)
1979	{
1980	if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics)
1981	attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet)));
1982	}
1983
1984	bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
1985	bool ssbo_block = var.storage == StorageClassStorageBuffer \|\| var.storage == StorageClassShaderRecordBufferKHR \|\|
1986	(var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
1987	bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer;
1988	bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock);
1989
1990	// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
1991	bool can_use_buffer_blocks = (options.es && options.version >= `300`) \|\| (!options.es && options.version >= `140`);
1992
1993	// pretend no UBOs when options say so
1994	if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)
1995	can_use_buffer_blocks = false;
1996
1997	bool can_use_binding;
1998	if (options.es)
1999	can_use_binding = options.version >= `310`;
2000	else
2001	can_use_binding = options.enable_420pack_extension \|\| (options.version >= `420`);
2002
2003	// Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
2004	if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
2005	can_use_binding = false;
2006
2007	if (var.storage == StorageClassShaderRecordBufferKHR)
2008	can_use_binding = false;
2009
2010	if (can_use_binding && flags.get(DecorationBinding))
2011	attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding)));
2012
2013	if (var.storage != StorageClassOutput && flags.get(DecorationOffset))
2014	attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset)));
2015
2016	// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
2017	// If SPIR-V does not comply with either layout, we cannot really work around it.
2018	if (can_use_buffer_blocks && (ubo_block \|\| emulated_ubo))
2019	{
2020	attr.push_back(buffer_to_packing_standard(type, false));
2021	}
2022	else if (can_use_buffer_blocks && (push_constant_block \|\| ssbo_block))
2023	{
2024	attr.push_back(buffer_to_packing_standard(type, true));
2025	}
2026
2027	// For images, the type itself adds a layout qualifer.
2028	// Only emit the format for storage images.
2029	if (type.basetype == SPIRType::Image && type.image.sampled == `2`)
2030	{
2031	const char *fmt = format_to_glsl(type.image.format);
2032	if (fmt)
2033	attr.push_back(fmt);
2034	}
2035
2036	if (attr.empty())
2037	return "";
2038
2039	string res = "layout(";
2040	res += merge(attr);
2041	res += ") ";
2042	return res;
2043	}
2044
2045	string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout)
2046	{
2047	if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430))
2048	return "std430";
2049	else if (buffer_is_packing_standard(type, BufferPackingStd140))
2050	return "std140";
2051	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar))
2052	{
2053	require_extension_internal("GL_EXT_scalar_block_layout");
2054	return "scalar";
2055	}
2056	else if (support_std430_without_scalar_layout &&
2057	buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2058	{
2059	if (options.es && !options.vulkan_semantics)
2060	SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2061	"not support GL_ARB_enhanced_layouts.");
2062	if (!options.es && !options.vulkan_semantics && options.version < `440`)
2063	require_extension_internal("GL_ARB_enhanced_layouts");
2064
2065	set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2066	return "std430";
2067	}
2068	else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
2069	{
2070	// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
2071	// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
2072	// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
2073	if (options.es && !options.vulkan_semantics)
2074	SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
2075	"not support GL_ARB_enhanced_layouts.");
2076	if (!options.es && !options.vulkan_semantics && options.version < `440`)
2077	require_extension_internal("GL_ARB_enhanced_layouts");
2078
2079	set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2080	return "std140";
2081	}
2082	else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout))
2083	{
2084	set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2085	require_extension_internal("GL_EXT_scalar_block_layout");
2086	return "scalar";
2087	}
2088	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2089	buffer_is_packing_standard(type, BufferPackingStd430))
2090	{
2091	// UBOs can support std430 with GL_EXT_scalar_block_layout.
2092	require_extension_internal("GL_EXT_scalar_block_layout");
2093	return "std430";
2094	}
2095	else if (!support_std430_without_scalar_layout && options.vulkan_semantics &&
2096	buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
2097	{
2098	// UBOs can support std430 with GL_EXT_scalar_block_layout.
2099	set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset);
2100	require_extension_internal("GL_EXT_scalar_block_layout");
2101	return "std430";
2102	}
2103	else
2104	{
2105	SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced "
2106	"layouts. You can try flattening this block to support a more flexible layout.");
2107	}
2108	}
2109
2110	void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
2111	{
2112	if (flattened_buffer_blocks.count(var.self))
2113	emit_buffer_block_flattened(var);
2114	else if (options.vulkan_semantics)
2115	emit_push_constant_block_vulkan(var);
2116	else if (options.emit_push_constant_as_uniform_buffer)
2117	emit_buffer_block_native(var);
2118	else
2119	emit_push_constant_block_glsl(var);
2120	}
2121
2122	void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
2123	{
2124	emit_buffer_block(var);
2125	}
2126
2127	void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2128	{
2129	// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
2130	auto &type = get<SPIRType>(var.basetype);
2131
2132	auto &flags = ir.meta [var.self].decoration.decoration_flags;
2133	flags.clear(DecorationBinding);
2134	flags.clear(DecorationDescriptorSet);
2135
2136	#if 0
2137	if (flags & ((`1ull` << DecorationBinding) \| (`1ull` << DecorationDescriptorSet)))
2138	SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2139	"Remap to location with reflection API first or disable these decorations.");
2140	#endif
2141
2142	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2143	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2144	auto &block_flags = ir.meta [type.self].decoration.decoration_flags;
2145	bool block_flag = block_flags.get(DecorationBlock);
2146	block_flags.clear(DecorationBlock);
2147
2148	emit_struct(type);
2149
2150	if (block_flag)
2151	block_flags.set(DecorationBlock);
2152
2153	emit_uniform(var);
2154	statement("");
2155	}
2156
2157	void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
2158	{
2159	auto &type = get<SPIRType>(var.basetype);
2160	bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock);
2161
2162	if (flattened_buffer_blocks.count(var.self))
2163	emit_buffer_block_flattened(var);
2164	else if (is_legacy() \|\| (!options.es && options.version == `130`) \|\|
2165	(ubo_block && options.emit_uniform_buffer_as_plain_uniforms))
2166	emit_buffer_block_legacy(var);
2167	else
2168	emit_buffer_block_native(var);
2169	}
2170
2171	void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
2172	{
2173	auto &type = get<SPIRType>(var.basetype);
2174	bool ssbo = var.storage == StorageClassStorageBuffer \|\|
2175	ir.meta [type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2176	if (ssbo)
2177	SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
2178
2179	// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
2180	// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
2181	auto &block_flags = ir.meta [type.self].decoration.decoration_flags;
2182	bool block_flag = block_flags.get(DecorationBlock);
2183	block_flags.clear(DecorationBlock);
2184	emit_struct(type);
2185	if (block_flag)
2186	block_flags.set(DecorationBlock);
2187	emit_uniform(var);
2188	statement("");
2189	}
2190
2191	void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration)
2192	{
2193	auto &type = get<SPIRType>(type_id);
2194	string buffer_name;
2195
2196	if (forward_declaration)
2197	{
2198	// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2199	// Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration.
2200	// The names must match up.
2201	buffer_name = to_name(type.self, false);
2202
2203	// Shaders never use the block by interface name, so we don't
2204	// have to track this other than updating name caches.
2205	// If we have a collision for any reason, just fallback immediately.
2206	if (ir.meta [type.self].decoration.alias.empty() \|\|
2207	block_ssbo_names.find(buffer_name) != end(block_ssbo_names) \|\|
2208	resource_names.find(buffer_name) != end(resource_names))
2209	{
2210	buffer_name = join("_", type.self);
2211	}
2212
2213	// Make sure we get something unique for both global name scope and block name scope.
2214	// See GLSL 4.5 spec: section 4.3.9 for details.
2215	add_variable(block_ssbo_names, resource_names, buffer_name);
2216
2217	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2218	// This cannot conflict with anything else, so we're safe now.
2219	// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2220	if (buffer_name.empty())
2221	buffer_name = join("_", type.self);
2222
2223	block_names.insert(buffer_name);
2224	block_ssbo_names.insert(buffer_name);
2225
2226	// Ensure we emit the correct name when emitting non-forward pointer type.
2227	ir.meta [type.self].decoration.alias = buffer_name;
2228	}
2229	else if (type.basetype != SPIRType::Struct)
2230	buffer_name = type_to_glsl(type);
2231	else
2232	buffer_name = to_name(type.self, false);
2233
2234	if (!forward_declaration)
2235	{
2236	auto itr = physical_storage_type_to_alignment.find(type_id);
2237	uint32_t alignment = `0`;
2238	if (itr != physical_storage_type_to_alignment.end())
2239	alignment = itr ->second.alignment;
2240
2241	if (type.basetype == SPIRType::Struct)
2242	{
2243	SmallVector<std::string> attributes;
2244	attributes.push_back("buffer_reference");
2245	if (alignment)
2246	attributes.push_back(join("buffer_reference_align = ", alignment));
2247	attributes.push_back(buffer_to_packing_standard(type, true));
2248
2249	auto flags = ir.get_buffer_block_type_flags(type);
2250	string decorations;
2251	if (flags.get(DecorationRestrict))
2252	decorations += " restrict";
2253	if (flags.get(DecorationCoherent))
2254	decorations += " coherent";
2255	if (flags.get(DecorationNonReadable))
2256	decorations += " writeonly";
2257	if (flags.get(DecorationNonWritable))
2258	decorations += " readonly";
2259
2260	statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name);
2261	}
2262	else if (alignment)
2263	statement("layout(buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name);
2264	else
2265	statement("layout(buffer_reference) buffer ", buffer_name);
2266
2267	begin_scope();
2268
2269	if (type.basetype == SPIRType::Struct)
2270	{
2271	type.member_name_cache.clear();
2272
2273	uint32_t i = `0`;
2274	for (auto &member : type.member_types)
2275	{
2276	add_member_name(type, i);
2277	emit_struct_member(type, member, i);
2278	i++;
2279	}
2280	}
2281	else
2282	{
2283	auto &pointee_type = get_pointee_type(type);
2284	statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";");
2285	}
2286
2287	end_scope_decl();
2288	statement("");
2289	}
2290	else
2291	{
2292	statement("layout(buffer_reference) buffer ", buffer_name, ";");
2293	}
2294	}
2295
2296	void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2297	{
2298	auto &type = get<SPIRType>(var.basetype);
2299
2300	Bitset flags = ir.get_buffer_block_flags(var);
2301	bool ssbo = var.storage == StorageClassStorageBuffer \|\| var.storage == StorageClassShaderRecordBufferKHR \|\|
2302	ir.meta [type.self].decoration.decoration_flags.get(DecorationBufferBlock);
2303	bool is_restrict = ssbo && flags.get(DecorationRestrict);
2304	bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
2305	bool is_readonly = ssbo && flags.get(DecorationNonWritable);
2306	bool is_coherent = ssbo && flags.get(DecorationCoherent);
2307
2308	// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
2309	auto buffer_name = to_name(type.self, false);
2310
2311	auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names;
2312
2313	// Shaders never use the block by interface name, so we don't
2314	// have to track this other than updating name caches.
2315	// If we have a collision for any reason, just fallback immediately.
2316	if (ir.meta [type.self].decoration.alias.empty() \|\| block_namespace.find(buffer_name) != end(block_namespace) \|\|
2317	resource_names.find(buffer_name) != end(resource_names))
2318	{
2319	buffer_name = get_block_fallback_name(var.self);
2320	}
2321
2322	// Make sure we get something unique for both global name scope and block name scope.
2323	// See GLSL 4.5 spec: section 4.3.9 for details.
2324	add_variable(block_namespace, resource_names, buffer_name);
2325
2326	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2327	// This cannot conflict with anything else, so we're safe now.
2328	// We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope.
2329	if (buffer_name.empty())
2330	buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2331
2332	block_names.insert(buffer_name);
2333	block_namespace.insert(buffer_name);
2334
2335	// Save for post-reflection later.
2336	declared_block_names [var.self] = buffer_name;
2337
2338	statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
2339	is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
2340	buffer_name);
2341
2342	begin_scope();
2343
2344	type.member_name_cache.clear();
2345
2346	uint32_t i = `0`;
2347	for (auto &member : type.member_types)
2348	{
2349	add_member_name(type, i);
2350	emit_struct_member(type, member, i);
2351	i++;
2352	}
2353
2354	// var.self can be used as a backup name for the block name,
2355	// so we need to make sure we don't disturb the name here on a recompile.
2356	// It will need to be reset if we have to recompile.
2357	preserve_alias_on_reset(var.self);
2358	add_resource_name(var.self);
2359	end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
2360	statement("");
2361	}
2362
2363	void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
2364	{
2365	auto &type = get<SPIRType>(var.basetype);
2366
2367	// Block names should never alias.
2368	auto buffer_name = to_name(type.self, false);
2369	size_t buffer_size = (get_declared_struct_size(type) + `15`) / `16`;
2370
2371	SPIRType::BaseType basic_type;
2372	if (get_common_basic_type(type, basic_type))
2373	{
2374	SPIRType tmp;
2375	tmp.basetype = basic_type;
2376	tmp.vecsize = `4`;
2377	if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
2378	SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
2379
2380	auto flags = ir.get_buffer_block_flags(var);
2381	statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
2382	buffer_size, "];");
2383	}
2384	else
2385	SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
2386	}
2387
2388	const char CompilerGLSL::to_storage_qualifiers_glsl(const* SPIRVariable &var)
2389	{
2390	auto &execution = get_entry_point();
2391
2392	if (subpass_input_is_framebuffer_fetch(var.self))
2393	return "";
2394
2395	if (var.storage == StorageClassInput \|\| var.storage == StorageClassOutput)
2396	{
2397	if (is_legacy() && execution.model == ExecutionModelVertex)
2398	return var.storage == StorageClassInput ? "attribute " : "varying ";
2399	else if (is_legacy() && execution.model == ExecutionModelFragment)
2400	return "varying "; // Fragment outputs are renamed so they never hit this case.
2401	else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput)
2402	{
2403	uint32_t loc = get_decoration(var.self, DecorationLocation);
2404	bool is_inout = location_is_framebuffer_fetch(loc);
2405	if (is_inout)
2406	return "inout ";
2407	else
2408	return "out ";
2409	}
2410	else
2411	return var.storage == StorageClassInput ? "in " : "out ";
2412	}
2413	else if (var.storage == StorageClassUniformConstant \|\| var.storage == StorageClassUniform \|\|
2414	var.storage == StorageClassPushConstant)
2415	{
2416	return "uniform ";
2417	}
2418	else if (var.storage == StorageClassRayPayloadKHR)
2419	{
2420	return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV ";
2421	}
2422	else if (var.storage == StorageClassIncomingRayPayloadKHR)
2423	{
2424	return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV ";
2425	}
2426	else if (var.storage == StorageClassHitAttributeKHR)
2427	{
2428	return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV ";
2429	}
2430	else if (var.storage == StorageClassCallableDataKHR)
2431	{
2432	return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV ";
2433	}
2434	else if (var.storage == StorageClassIncomingCallableDataKHR)
2435	{
2436	return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV ";
2437	}
2438
2439	return "";
2440	}
2441
2442	void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual,
2443	const SmallVector<uint32_t> &indices)
2444	{
2445	uint32_t member_type_id = type.self;
2446	const SPIRType *member_type = &type;
2447	const SPIRType parent_type = nullptr*;
2448	auto flattened_name = basename;
2449	for (auto &index : indices)
2450	{
2451	flattened_name += "_";
2452	flattened_name += to_member_name(*member_type, index);
2453	parent_type = member_type;
2454	member_type_id = member_type->member_types [index];
2455	member_type = &get<SPIRType>(member_type_id);
2456	}
2457
2458	assert(member_type->basetype != SPIRType::Struct);
2459
2460	// We're overriding struct member names, so ensure we do so on the primary type.
2461	if (parent_type->type_alias)
2462	parent_type = &get<SPIRType>(parent_type->type_alias);
2463
2464	// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
2465	// which is not allowed.
2466	ParsedIR::sanitize_underscores(flattened_name);
2467
2468	uint32_t last_index = indices.back();
2469
2470	// Pass in the varying qualifier here so it will appear in the correct declaration order.
2471	// Replace member name while emitting it so it encodes both struct name and member name.
2472	auto backup_name = get_member_name(parent_type->self, last_index);
2473	auto member_name = to_member_name(*parent_type, last_index);
2474	set_member_name(parent_type->self, last_index, flattened_name);
2475	emit_struct_member(*parent_type, member_type_id, last_index, qual);
2476	// Restore member name.
2477	set_member_name(parent_type->self, last_index, member_name);
2478	}
2479
2480	void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual,
2481	const SmallVector<uint32_t> &indices)
2482	{
2483	auto sub_indices = indices;
2484	sub_indices.push_back(`0`);
2485
2486	const SPIRType *member_type = &type;
2487	for (auto &index : indices)
2488	member_type = &get<SPIRType>(member_type->member_types [index]);
2489
2490	assert(member_type->basetype == SPIRType::Struct);
2491
2492	if (!member_type->array.empty())
2493	SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks.");
2494
2495	for (uint32_t i = `0`; i < uint32_t(member_type->member_types.size()); i++)
2496	{
2497	sub_indices.back() = i;
2498	if (get<SPIRType>(member_type->member_types [i]).basetype == SPIRType::Struct)
2499	emit_flattened_io_block_struct(basename, type, qual, sub_indices);
2500	else
2501	emit_flattened_io_block_member(basename, type, qual, sub_indices);
2502	}
2503	}
2504
2505	void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
2506	{
2507	auto &var_type = get<SPIRType>(var.basetype);
2508	if (!var_type.array.empty())
2509	SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
2510
2511	// Emit flattened types based on the type alias. Normally, we are never supposed to emit
2512	// struct declarations for aliased types.
2513	auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type;
2514
2515	auto old_flags = ir.meta [type.self].decoration.decoration_flags;
2516	// Emit the members as if they are part of a block to get all qualifiers.
2517	ir.meta [type.self].decoration.decoration_flags.set(DecorationBlock);
2518
2519	type.member_name_cache.clear();
2520
2521	SmallVector<uint32_t> member_indices;
2522	member_indices.push_back(`0`);
2523	auto basename = to_name(var.self);
2524
2525	uint32_t i = `0`;
2526	for (auto &member : type.member_types)
2527	{
2528	add_member_name(type, i);
2529	auto &membertype = get<SPIRType>(member);
2530
2531	member_indices.back() = i;
2532	if (membertype.basetype == SPIRType::Struct)
2533	emit_flattened_io_block_struct(basename, type, qual, member_indices);
2534	else
2535	emit_flattened_io_block_member(basename, type, qual, member_indices);
2536	i++;
2537	}
2538
2539	ir.meta [type.self].decoration.decoration_flags = old_flags;
2540
2541	// Treat this variable as fully flattened from now on.
2542	flattened_structs [var.self] = true;
2543	}
2544
2545	void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
2546	{
2547	auto &type = get<SPIRType>(var.basetype);
2548
2549	if (var.storage == StorageClassInput && type.basetype == SPIRType::Double &&
2550	!options.es && options.version < `410`)
2551	{
2552	require_extension_internal("GL_ARB_vertex_attrib_64bit");
2553	}
2554
2555	// Either make it plain in/out or in/out blocks depending on what shader is doing ...
2556	bool block = ir.meta [type.self].decoration.decoration_flags.get(DecorationBlock);
2557	const char *qual = to_storage_qualifiers_glsl(var);
2558
2559	if (block)
2560	{
2561	// ESSL earlier than 310 and GLSL earlier than 150 did not support
2562	// I/O variables which are struct types.
2563	// To support this, flatten the struct into separate varyings instead.
2564	if (options.force_flattened_io_blocks \|\| (options.es && options.version < `310`) \|\|
2565	(!options.es && options.version < `150`))
2566	{
2567	// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
2568	// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
2569	emit_flattened_io_block(var, qual);
2570	}
2571	else
2572	{
2573	if (options.es && options.version < `320`)
2574	{
2575	// Geometry and tessellation extensions imply this extension.
2576	if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
2577	require_extension_internal("GL_EXT_shader_io_blocks");
2578	}
2579
2580	// Workaround to make sure we can emit "patch in/out" correctly.
2581	fixup_io_block_patch_qualifiers(var);
2582
2583	// Block names should never alias.
2584	auto block_name = to_name(type.self, false);
2585
2586	// The namespace for I/O blocks is separate from other variables in GLSL.
2587	auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names;
2588
2589	// Shaders never use the block by interface name, so we don't
2590	// have to track this other than updating name caches.
2591	if (block_name.empty() \|\| block_namespace.find(block_name) != end(block_namespace))
2592	block_name = get_fallback_name(type.self);
2593	else
2594	block_namespace.insert(block_name);
2595
2596	// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
2597	// This cannot conflict with anything else, so we're safe now.
2598	if (block_name.empty())
2599	block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
2600
2601	// Instance names cannot alias block names.
2602	resource_names.insert(block_name);
2603
2604	bool is_patch = has_decoration(var.self, DecorationPatch);
2605	statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name);
2606	begin_scope();
2607
2608	type.member_name_cache.clear();
2609
2610	uint32_t i = `0`;
2611	for (auto &member : type.member_types)
2612	{
2613	add_member_name(type, i);
2614	emit_struct_member(type, member, i);
2615	i++;
2616	}
2617
2618	add_resource_name(var.self);
2619	end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
2620	statement("");
2621	}
2622	}
2623	else
2624	{
2625	// ESSL earlier than 310 and GLSL earlier than 150 did not support
2626	// I/O variables which are struct types.
2627	// To support this, flatten the struct into separate varyings instead.
2628	if (type.basetype == SPIRType::Struct &&
2629	(options.force_flattened_io_blocks \|\| (options.es && options.version < `310`) \|\|
2630	(!options.es && options.version < `150`)))
2631	{
2632	emit_flattened_io_block(var, qual);
2633	}
2634	else
2635	{
2636	add_resource_name(var.self);
2637
2638	// Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
2639	// Opt for unsized as it's the more "correct" variant to use.
2640	bool control_point_input_array = type.storage == StorageClassInput && !type.array.empty() &&
2641	!has_decoration(var.self, DecorationPatch) &&
2642	(get_entry_point().model == ExecutionModelTessellationControl \|\|
2643	get_entry_point().model == ExecutionModelTessellationEvaluation);
2644
2645	uint32_t old_array_size = `0`;
2646	bool old_array_size_literal = true;
2647
2648	if (control_point_input_array)
2649	{
2650	swap(type.array.back(), old_array_size);
2651	swap(type.array_size_literal.back(), old_array_size_literal);
2652	}
2653
2654	statement(layout_for_variable(var), to_qualifiers_glsl(var.self),
2655	variable_decl(type, to_name(var.self), var.self), ";");
2656
2657	if (control_point_input_array)
2658	{
2659	swap(type.array.back(), old_array_size);
2660	swap(type.array_size_literal.back(), old_array_size_literal);
2661	}
2662	}
2663	}
2664	}
2665
2666	void CompilerGLSL::emit_uniform(const SPIRVariable &var)
2667	{
2668	auto &type = get<SPIRType>(var.basetype);
2669	if (type.basetype == SPIRType::Image && type.image.sampled == `2` && type.image.dim != DimSubpassData)
2670	{
2671	if (!options.es && options.version < `420`)
2672	require_extension_internal("GL_ARB_shader_image_load_store");
2673	else if (options.es && options.version < `310`)
2674	SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
2675	}
2676
2677	add_resource_name(var.self);
2678	statement(layout_for_variable(var), variable_decl(var), ";");
2679	}
2680
2681	string CompilerGLSL::constant_value_macro_name(uint32_t id)
2682	{
2683	return join("SPIRV_CROSS_CONSTANT_ID_", id);
2684	}
2685
2686	void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
2687	{
2688	auto &type = get<SPIRType>(constant.basetype);
2689	add_resource_name(constant.self);
2690	auto name = to_name(constant.self);
2691	statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
2692	}
2693
2694	int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const
2695	{
2696	auto &entry_point = get_entry_point();
2697	int index = -`1`;
2698
2699	// Need to redirect specialization constants which are used as WorkGroupSize to the builtin,
2700	// since the spec constant declarations are never explicitly declared.
2701	if (entry_point.workgroup_size.constant == `0` && entry_point.flags.get(ExecutionModeLocalSizeId))
2702	{
2703	if (c.self == entry_point.workgroup_size.id_x)
2704	index = `0`;
2705	else if (c.self == entry_point.workgroup_size.id_y)
2706	index = `1`;
2707	else if (c.self == entry_point.workgroup_size.id_z)
2708	index = `2`;
2709	}
2710
2711	return index;
2712	}
2713
2714	void CompilerGLSL::emit_constant(const SPIRConstant &constant)
2715	{
2716	auto &type = get<SPIRType>(constant.constant_type);
2717
2718	SpecializationConstant wg_x, wg_y, wg_z;
2719	ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
2720
2721	// This specialization constant is implicitly declared by emitting layout() in;
2722	if (constant.self == workgroup_size_id)
2723	return;
2724
2725	// These specialization constants are implicitly declared by emitting layout() in;
2726	// In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration
2727	// later can use macro overrides for work group size.
2728	bool is_workgroup_size_constant = ConstantID (constant.self) == wg_x.id \|\| ConstantID (constant.self) == wg_y.id \|\|
2729	ConstantID (constant.self) == wg_z.id;
2730
2731	if (options.vulkan_semantics && is_workgroup_size_constant)
2732	{
2733	// Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout().
2734	return;
2735	}
2736	else if (!options.vulkan_semantics && is_workgroup_size_constant &&
2737	!has_decoration(constant.self, DecorationSpecId))
2738	{
2739	// Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros.
2740	return;
2741	}
2742
2743	add_resource_name(constant.self);
2744	auto name = to_name(constant.self);
2745
2746	// Only scalars have constant IDs.
2747	if (has_decoration(constant.self, DecorationSpecId))
2748	{
2749	if (options.vulkan_semantics)
2750	{
2751	statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
2752	variable_decl(type, name), " = ", constant_expression(constant), ";");
2753	}
2754	else
2755	{
2756	const string &macro_name = constant.specialization_constant_macro_name;
2757	statement("#ifndef ", macro_name);
2758	statement("#define ", macro_name, " ", constant_expression(constant));
2759	statement("#endif");
2760
2761	// For workgroup size constants, only emit the macros.
2762	if (!is_workgroup_size_constant)
2763	statement("const ", variable_decl(type, name), " = ", macro_name, ";");
2764	}
2765	}
2766	else
2767	{
2768	statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
2769	}
2770	}
2771
2772	void CompilerGLSL::emit_entry_point_declarations()
2773	{
2774	}
2775
2776	void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords)
2777	{
2778	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
2779	if (is_hidden_variable(var))
2780	return;
2781
2782	auto *meta = ir.find_meta(var.self);
2783	if (!meta)
2784	return;
2785
2786	auto &m = meta->decoration;
2787	if (keywords.find(m.alias) != end(keywords))
2788	m.alias = join("_", m.alias);
2789	});
2790
2791	ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) {
2792	auto *meta = ir.find_meta(func.self);
2793	if (!meta)
2794	return;
2795
2796	auto &m = meta->decoration;
2797	if (keywords.find(m.alias) != end(keywords))
2798	m.alias = join("_", m.alias);
2799	});
2800
2801	ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) {
2802	auto *meta = ir.find_meta(type.self);
2803	if (!meta)
2804	return;
2805
2806	auto &m = meta->decoration;
2807	if (keywords.find(m.alias) != end(keywords))
2808	m.alias = join("_", m.alias);
2809
2810	for (auto &memb : meta->members)
2811	if (keywords.find(memb.alias) != end(keywords))
2812	memb.alias = join("_", memb.alias);
2813	});
2814	}
2815
2816	void CompilerGLSL::replace_illegal_names()
2817	{
2818	// clang-format off
2819	static const unordered_set<string> keywords = {
2820	"abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
2821	"atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
2822	"atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
2823	"bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
2824	"ceil", "cos", "cosh", "cross", "degrees",
2825	"dFdx", "dFdxCoarse", "dFdxFine",
2826	"dFdy", "dFdyCoarse", "dFdyFine",
2827	"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
2828	"faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract",
2829	"frexp", "fwidth", "fwidthCoarse", "fwidthFine",
2830	"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
2831	"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
2832	"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
2833	"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
2834	"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
2835	"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
2836	"outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8",
2837	"packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow",
2838	"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
2839	"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
2840	"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
2841	"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
2842	"transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16",
2843	"unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
2844
2845	"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
2846	"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
2847	"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
2848	"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
2849	"for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
2850	"iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
2851	"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
2852	"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
2853	"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
2854	"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
2855	"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
2856	"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
2857	"resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
2858	"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
2859	"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
2860	"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
2861	"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
2862	"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
2863	"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
2864	"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
2865	"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
2866	"while", "writeonly",
2867	};
2868	// clang-format on
2869
2870	replace_illegal_names(keywords);
2871	}
2872
2873	void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
2874	{
2875	auto &m = ir.meta [var.self].decoration;
2876	uint32_t location = `0`;
2877	if (m.decoration_flags.get(DecorationLocation))
2878	location = m.location;
2879
2880	// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
2881	// do the access chain part of this for us.
2882	auto &type = get<SPIRType>(var.basetype);
2883
2884	if (type.array.empty())
2885	{
2886	// Redirect the write to a specific render target in legacy GLSL.
2887	m.alias = join("gl_FragData[", location, "]");
2888
2889	if (is_legacy_es() && location != `0`)
2890	require_extension_internal("GL_EXT_draw_buffers");
2891	}
2892	else if (type.array.size() == `1`)
2893	{
2894	// If location is non-zero, we probably have to add an offset.
2895	// This gets really tricky since we'd have to inject an offset in the access chain.
2896	// FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
2897	m.alias = "gl_FragData";
2898	if (location != `0`)
2899	SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
2900	"This is unimplemented in SPIRV-Cross.");
2901
2902	if (is_legacy_es())
2903	require_extension_internal("GL_EXT_draw_buffers");
2904	}
2905	else
2906	SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
2907
2908	var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2909	}
2910
2911	void CompilerGLSL::replace_fragment_outputs()
2912	{
2913	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
2914	auto &type = this->get<SPIRType>(var.basetype);
2915
2916	if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
2917	replace_fragment_output(var);
2918	});
2919	}
2920
2921	string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2922	{
2923	if (out_type.vecsize == input_components)
2924	return expr;
2925	else if (input_components == `1` && !backend.can_swizzle_scalar)
2926	return join(type_to_glsl(out_type), "(", expr, ")");
2927	else
2928	{
2929	// FIXME: This will not work with packed expressions.
2930	auto e = enclose_expression(expr) + ".";
2931	// Just clamp the swizzle index if we have more outputs than inputs.
2932	for (uint32_t c = `0`; c < out_type.vecsize; c++)
2933	e += index_to_swizzle(min(c, input_components - `1`));
2934	if (backend.swizzle_is_function && out_type.vecsize > `1`)
2935	e += "()";
2936
2937	remove_duplicate_swizzle(e);
2938	return e;
2939	}
2940	}
2941
2942	void CompilerGLSL::emit_pls()
2943	{
2944	auto &execution = get_entry_point();
2945	if (execution.model != ExecutionModelFragment)
2946	SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
2947
2948	if (!options.es)
2949	SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
2950
2951	if (options.version < `300`)
2952	SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
2953
2954	if (!pls_inputs.empty())
2955	{
2956	statement("__pixel_local_inEXT _PLSIn");
2957	begin_scope();
2958	for (auto &input : pls_inputs)
2959	statement(pls_decl(input), ";");
2960	end_scope_decl();
2961	statement("");
2962	}
2963
2964	if (!pls_outputs.empty())
2965	{
2966	statement("__pixel_local_outEXT _PLSOut");
2967	begin_scope();
2968	for (auto &output : pls_outputs)
2969	statement(pls_decl(output), ";");
2970	end_scope_decl();
2971	statement("");
2972	}
2973	}
2974
2975	void CompilerGLSL::fixup_image_load_store_access()
2976	{
2977	if (!options.enable_storage_image_qualifier_deduction)
2978	return;
2979
2980	ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) {
2981	auto &vartype = expression_type(var);
2982	if (vartype.basetype == SPIRType::Image && vartype.image.sampled == `2`)
2983	{
2984	// Very old glslangValidator and HLSL compilers do not emit required qualifiers here.
2985	// Solve this by making the image access as restricted as possible and loosen up if we need to.
2986	// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
2987
2988	auto &flags = ir.meta [var].decoration.decoration_flags;
2989	if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
2990	{
2991	flags.set(DecorationNonWritable);
2992	flags.set(DecorationNonReadable);
2993	}
2994	}
2995	});
2996	}
2997
2998	static bool is_block_builtin(BuiltIn builtin)
2999	{
3000	return builtin == BuiltInPosition \|\| builtin == BuiltInPointSize \|\| builtin == BuiltInClipDistance \|\|
3001	builtin == BuiltInCullDistance;
3002	}
3003
3004	bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage)
3005	{
3006	// If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block.
3007
3008	if (storage != StorageClassOutput)
3009	return false;
3010	bool should_force = false;
3011
3012	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3013	if (should_force)
3014	return;
3015
3016	auto &type = this->get<SPIRType>(var.basetype);
3017	bool block = has_decoration(type.self, DecorationBlock);
3018	if (var.storage == storage && block && is_builtin_variable(var))
3019	{
3020	uint32_t member_count = uint32_t(type.member_types.size());
3021	for (uint32_t i = `0`; i < member_count; i++)
3022	{
3023	if (has_member_decoration(type.self, i, DecorationBuiltIn) &&
3024	is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) &&
3025	has_member_decoration(type.self, i, DecorationOffset))
3026	{
3027	should_force = true;
3028	}
3029	}
3030	}
3031	else if (var.storage == storage && !block && is_builtin_variable(var))
3032	{
3033	if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) &&
3034	has_decoration(var.self, DecorationOffset))
3035	{
3036	should_force = true;
3037	}
3038	}
3039	});
3040
3041	// If we're declaring clip/cull planes with control points we need to force block declaration.
3042	if (get_execution_model() == ExecutionModelTessellationControl &&
3043	(clip_distance_count \|\| cull_distance_count))
3044	{
3045	should_force = true;
3046	}
3047
3048	return should_force;
3049	}
3050
3051	void CompilerGLSL::fixup_implicit_builtin_block_names()
3052	{
3053	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3054	auto &type = this->get<SPIRType>(var.basetype);
3055	bool block = has_decoration(type.self, DecorationBlock);
3056	if ((var.storage == StorageClassOutput \|\| var.storage == StorageClassInput) && block &&
3057	is_builtin_variable(var))
3058	{
3059	// Make sure the array has a supported name in the code.
3060	if (var.storage == StorageClassOutput)
3061	set_name(var.self, "gl_out");
3062	else if (var.storage == StorageClassInput)
3063	set_name(var.self, "gl_in");
3064	}
3065	});
3066	}
3067
3068	void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
3069	{
3070	Bitset emitted_builtins;
3071	Bitset global_builtins;
3072	const SPIRVariable block_var = nullptr*;
3073	bool emitted_block = false;
3074	bool builtin_array = false;
3075
3076	// Need to use declared size in the type.
3077	// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
3078	uint32_t cull_distance_size = `0`;
3079	uint32_t clip_distance_size = `0`;
3080
3081	bool have_xfb_buffer_stride = false;
3082	bool have_geom_stream = false;
3083	bool have_any_xfb_offset = false;
3084	uint32_t xfb_stride = `0`, xfb_buffer = `0`, geom_stream = `0`;
3085	std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets;
3086
3087	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3088	auto &type = this->get<SPIRType>(var.basetype);
3089	bool block = has_decoration(type.self, DecorationBlock);
3090	Bitset builtins;
3091
3092	if (var.storage == storage && block && is_builtin_variable(var))
3093	{
3094	uint32_t index = `0`;
3095	for (auto &m : ir.meta [type.self].members)
3096	{
3097	if (m.builtin)
3098	{
3099	builtins.set(m.builtin_type);
3100	if (m.builtin_type == BuiltInCullDistance)
3101	cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types [index]));
3102	else if (m.builtin_type == BuiltInClipDistance)
3103	clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types [index]));
3104
3105	if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset))
3106	{
3107	have_any_xfb_offset = true;
3108	builtin_xfb_offsets [m.builtin_type] = m.offset;
3109	}
3110
3111	if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3112	{
3113	uint32_t stream = m.stream;
3114	if (have_geom_stream && geom_stream != stream)
3115	SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3116	have_geom_stream = true;
3117	geom_stream = stream;
3118	}
3119	}
3120	index++;
3121	}
3122
3123	if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) &&
3124	has_decoration(var.self, DecorationXfbStride))
3125	{
3126	uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer);
3127	uint32_t stride = get_decoration(var.self, DecorationXfbStride);
3128	if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3129	SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3130	if (have_xfb_buffer_stride && stride != xfb_stride)
3131	SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3132	have_xfb_buffer_stride = true;
3133	xfb_buffer = buffer_index;
3134	xfb_stride = stride;
3135	}
3136
3137	if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream))
3138	{
3139	uint32_t stream = get_decoration(var.self, DecorationStream);
3140	if (have_geom_stream && geom_stream != stream)
3141	SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3142	have_geom_stream = true;
3143	geom_stream = stream;
3144	}
3145	}
3146	else if (var.storage == storage && !block && is_builtin_variable(var))
3147	{
3148	// While we're at it, collect all declared global builtins (HLSL mostly ...).
3149	auto &m = ir.meta [var.self].decoration;
3150	if (m.builtin)
3151	{
3152	global_builtins.set(m.builtin_type);
3153	if (m.builtin_type == BuiltInCullDistance)
3154	cull_distance_size = to_array_size_literal(type);
3155	else if (m.builtin_type == BuiltInClipDistance)
3156	clip_distance_size = to_array_size_literal(type);
3157
3158	if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) &&
3159	m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset))
3160	{
3161	have_any_xfb_offset = true;
3162	builtin_xfb_offsets [m.builtin_type] = m.offset;
3163	uint32_t buffer_index = m.xfb_buffer;
3164	uint32_t stride = m.xfb_stride;
3165	if (have_xfb_buffer_stride && buffer_index != xfb_buffer)
3166	SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3167	if (have_xfb_buffer_stride && stride != xfb_stride)
3168	SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch.");
3169	have_xfb_buffer_stride = true;
3170	xfb_buffer = buffer_index;
3171	xfb_stride = stride;
3172	}
3173
3174	if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream))
3175	{
3176	uint32_t stream = get_decoration(var.self, DecorationStream);
3177	if (have_geom_stream && geom_stream != stream)
3178	SPIRV_CROSS_THROW("IO block member Stream mismatch.");
3179	have_geom_stream = true;
3180	geom_stream = stream;
3181	}
3182	}
3183	}
3184
3185	if (builtins.empty())
3186	return;
3187
3188	if (emitted_block)
3189	SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
3190
3191	emitted_builtins = builtins;
3192	emitted_block = true;
3193	builtin_array = !type.array.empty();
3194	block_var = &var;
3195	});
3196
3197	global_builtins =
3198	Bitset (global_builtins.get_lower() & ((`1ull` << BuiltInPosition) \| (`1ull` << BuiltInPointSize) \|
3199	(`1ull` << BuiltInClipDistance) \| (`1ull` << BuiltInCullDistance)));
3200
3201	// Try to collect all other declared builtins.
3202	if (!emitted_block)
3203	emitted_builtins = global_builtins;
3204
3205	// Can't declare an empty interface block.
3206	if (emitted_builtins.empty())
3207	return;
3208
3209	if (storage == StorageClassOutput)
3210	{
3211	SmallVector<string> attr;
3212	if (have_xfb_buffer_stride && have_any_xfb_offset)
3213	{
3214	if (!options.es)
3215	{
3216	if (options.version < `440` && options.version >= `140`)
3217	require_extension_internal("GL_ARB_enhanced_layouts");
3218	else if (options.version < `140`)
3219	SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40.");
3220	if (!options.es && options.version < `440`)
3221	require_extension_internal("GL_ARB_enhanced_layouts");
3222	}
3223	else if (options.es)
3224	SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer.");
3225	attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride));
3226	}
3227
3228	if (have_geom_stream)
3229	{
3230	if (get_execution_model() != ExecutionModelGeometry)
3231	SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders.");
3232	if (options.es)
3233	SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL.");
3234	if (options.version < `400`)
3235	require_extension_internal("GL_ARB_transform_feedback3");
3236	attr.push_back(join("stream = ", geom_stream));
3237	}
3238
3239	if (!attr.empty())
3240	statement("layout(", merge(attr), ") out gl_PerVertex");
3241	else
3242	statement("out gl_PerVertex");
3243	}
3244	else
3245	{
3246	// If we have passthrough, there is no way PerVertex cannot be passthrough.
3247	if (get_entry_point().geometry_passthrough)
3248	statement("layout(passthrough) in gl_PerVertex");
3249	else
3250	statement("in gl_PerVertex");
3251	}
3252
3253	begin_scope();
3254	if (emitted_builtins.get(BuiltInPosition))
3255	{
3256	auto itr = builtin_xfb_offsets.find(BuiltInPosition);
3257	if (itr != end(builtin_xfb_offsets))
3258	statement("layout(xfb_offset = ", itr ->second, ") vec4 gl_Position;");
3259	else
3260	statement("vec4 gl_Position;");
3261	}
3262
3263	if (emitted_builtins.get(BuiltInPointSize))
3264	{
3265	auto itr = builtin_xfb_offsets.find(BuiltInPointSize);
3266	if (itr != end(builtin_xfb_offsets))
3267	statement("layout(xfb_offset = ", itr ->second, ") float gl_PointSize;");
3268	else
3269	statement("float gl_PointSize;");
3270	}
3271
3272	if (emitted_builtins.get(BuiltInClipDistance))
3273	{
3274	auto itr = builtin_xfb_offsets.find(BuiltInClipDistance);
3275	if (itr != end(builtin_xfb_offsets))
3276	statement("layout(xfb_offset = ", itr ->second, ") float gl_ClipDistance[", clip_distance_size, "];");
3277	else
3278	statement("float gl_ClipDistance[", clip_distance_size, "];");
3279	}
3280
3281	if (emitted_builtins.get(BuiltInCullDistance))
3282	{
3283	auto itr = builtin_xfb_offsets.find(BuiltInCullDistance);
3284	if (itr != end(builtin_xfb_offsets))
3285	statement("layout(xfb_offset = ", itr ->second, ") float gl_CullDistance[", cull_distance_size, "];");
3286	else
3287	statement("float gl_CullDistance[", cull_distance_size, "];");
3288	}
3289
3290	if (builtin_array)
3291	{
3292	if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
3293	end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
3294	else
3295	end_scope_decl(join(to_name(block_var->self), "[]"));
3296	}
3297	else
3298	end_scope_decl();
3299	statement("");
3300	}
3301
3302	void CompilerGLSL::declare_undefined_values()
3303	{
3304	bool emitted = false;
3305	ir.for_each_typed_id<SPIRUndef>([&](uint32_t, const SPIRUndef &undef) {
3306	auto &type = this->get<SPIRType>(undef.basetype);
3307	// OpUndef can be void for some reason ...
3308	if (type.basetype == SPIRType::Void)
3309	return;
3310
3311	string initializer;
3312	if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
3313	initializer = join(" = ", to_zero_initialized_expression(undef.basetype));
3314
3315	statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";");
3316	emitted = true;
3317	});
3318
3319	if (emitted)
3320	statement("");
3321	}
3322
3323	bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const
3324	{
3325	bool statically_assigned = var.statically_assigned && var.static_expression != ID (`0`) && var.remapped_variable;
3326
3327	if (statically_assigned)
3328	{
3329	auto *constant = maybe_get<SPIRConstant>(var.static_expression);
3330	if (constant && constant->is_used_as_lut)
3331	return true;
3332	}
3333
3334	return false;
3335	}
3336
3337	void CompilerGLSL::emit_resources()
3338	{
3339	auto &execution = get_entry_point();
3340
3341	replace_illegal_names();
3342
3343	// Legacy GL uses gl_FragData[], redeclare all fragment outputs
3344	// with builtins.
3345	if (execution.model == ExecutionModelFragment && is_legacy())
3346	replace_fragment_outputs();
3347
3348	// Emit PLS blocks if we have such variables.
3349	if (!pls_inputs.empty() \|\| !pls_outputs.empty())
3350	emit_pls();
3351
3352	switch (execution.model)
3353	{
3354	case ExecutionModelGeometry:
3355	case ExecutionModelTessellationControl:
3356	case ExecutionModelTessellationEvaluation:
3357	fixup_implicit_builtin_block_names();
3358	break;
3359
3360	default:
3361	break;
3362	}
3363
3364	// Emit custom gl_PerVertex for SSO compatibility.
3365	if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
3366	{
3367	switch (execution.model)
3368	{
3369	case ExecutionModelGeometry:
3370	case ExecutionModelTessellationControl:
3371	case ExecutionModelTessellationEvaluation:
3372	emit_declared_builtin_block(StorageClassInput, execution.model);
3373	emit_declared_builtin_block(StorageClassOutput, execution.model);
3374	break;
3375
3376	case ExecutionModelVertex:
3377	emit_declared_builtin_block(StorageClassOutput, execution.model);
3378	break;
3379
3380	default:
3381	break;
3382	}
3383	}
3384	else if (should_force_emit_builtin_block(StorageClassOutput))
3385	{
3386	emit_declared_builtin_block(StorageClassOutput, execution.model);
3387	}
3388	else if (execution.geometry_passthrough)
3389	{
3390	// Need to declare gl_in with Passthrough.
3391	// If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass.
3392	emit_declared_builtin_block(StorageClassInput, execution.model);
3393	}
3394	else
3395	{
3396	// Need to redeclare clip/cull distance with explicit size to use them.
3397	// SPIR-V mandates these builtins have a size declared.
3398	const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
3399	if (clip_distance_count != `0`)
3400	statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
3401	if (cull_distance_count != `0`)
3402	statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
3403	if (clip_distance_count != `0` \|\| cull_distance_count != `0`)
3404	statement("");
3405	}
3406
3407	if (position_invariant)
3408	{
3409	statement("invariant gl_Position;");
3410	statement("");
3411	}
3412
3413	bool emitted = false;
3414
3415	// If emitted Vulkan GLSL,
3416	// emit specialization constants as actual floats,
3417	// spec op expressions will redirect to the constant name.
3418	//
3419	{
3420	auto loop_lock = ir.create_loop_hard_lock();
3421	for (auto &id_ : ir.ids_for_constant_or_type)
3422	{
3423	auto &id = ir.ids [id_];
3424
3425	if (id.get_type() == TypeConstant)
3426	{
3427	auto &c = id.get<SPIRConstant>();
3428
3429	bool needs_declaration = c.specialization \|\| c.is_used_as_lut;
3430
3431	if (needs_declaration)
3432	{
3433	if (!options.vulkan_semantics && c.specialization)
3434	{
3435	c.specialization_constant_macro_name =
3436	constant_value_macro_name(get_decoration(c.self, DecorationSpecId));
3437	}
3438	emit_constant(c);
3439	emitted = true;
3440	}
3441	}
3442	else if (id.get_type() == TypeConstantOp)
3443	{
3444	emit_specialization_constant_op(id.get<SPIRConstantOp>());
3445	emitted = true;
3446	}
3447	else if (id.get_type() == TypeType)
3448	{
3449	auto *type = &id.get<SPIRType>();
3450
3451	bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer &&
3452	(!has_decoration(type->self, DecorationBlock) &&
3453	!has_decoration(type->self, DecorationBufferBlock));
3454
3455	// Special case, ray payload and hit attribute blocks are not really blocks, just regular structs.
3456	if (type->basetype == SPIRType::Struct && type->pointer &&
3457	has_decoration(type->self, DecorationBlock) &&
3458	(type->storage == StorageClassRayPayloadKHR \|\| type->storage == StorageClassIncomingRayPayloadKHR \|\|
3459	type->storage == StorageClassHitAttributeKHR))
3460	{
3461	type = &get<SPIRType>(type->parent_type);
3462	is_natural_struct = true;
3463	}
3464
3465	if (is_natural_struct)
3466	{
3467	if (emitted)
3468	statement("");
3469	emitted = false;
3470
3471	emit_struct(*type);
3472	}
3473	}
3474	}
3475	}
3476
3477	if (emitted)
3478	statement("");
3479
3480	// If we needed to declare work group size late, check here.
3481	// If the work group size depends on a specialization constant, we need to declare the layout() block
3482	// after constants (and their macros) have been declared.
3483	if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics &&
3484	(execution.workgroup_size.constant != `0` \|\| execution.flags.get(ExecutionModeLocalSizeId)))
3485	{
3486	SpecializationConstant wg_x, wg_y, wg_z;
3487	get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
3488
3489	if ((wg_x.id != ConstantID (`0`)) \|\| (wg_y.id != ConstantID (`0`)) \|\| (wg_z.id != ConstantID (`0`)))
3490	{
3491	SmallVector<string> inputs;
3492	build_workgroup_size(inputs, wg_x, wg_y, wg_z);
3493	statement("layout(", merge(inputs), ") in;");
3494	statement("");
3495	}
3496	}
3497
3498	emitted = false;
3499
3500	if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
3501	{
3502	for (auto type : physical_storage_non_block_pointer_types)
3503	{
3504	emit_buffer_reference_block(type, false);
3505	}
3506
3507	// Output buffer reference blocks.
3508	// Do this in two stages, one with forward declaration,
3509	// and one without. Buffer reference blocks can reference themselves
3510	// to support things like linked lists.
3511	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
3512	if (type.basetype == SPIRType::Struct && type.pointer &&
3513	type.pointer_depth == `1` && !type_is_array_of_pointers(type) &&
3514	type.storage == StorageClassPhysicalStorageBufferEXT)
3515	{
3516	emit_buffer_reference_block(self, true);
3517	}
3518	});
3519
3520	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
3521	if (type.basetype == SPIRType::Struct &&
3522	type.pointer && type.pointer_depth == `1` && !type_is_array_of_pointers(type) &&
3523	type.storage == StorageClassPhysicalStorageBufferEXT)
3524	{
3525	emit_buffer_reference_block(self, false);
3526	}
3527	});
3528	}
3529
3530	// Output UBOs and SSBOs
3531	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3532	auto &type = this->get<SPIRType>(var.basetype);
3533
3534	bool is_block_storage = type.storage == StorageClassStorageBuffer \|\| type.storage == StorageClassUniform \|\|
3535	type.storage == StorageClassShaderRecordBufferKHR;
3536	bool has_block_flags = ir.meta [type.self].decoration.decoration_flags.get(DecorationBlock) \|\|
3537	ir.meta [type.self].decoration.decoration_flags.get(DecorationBufferBlock);
3538
3539	if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
3540	has_block_flags)
3541	{
3542	emit_buffer_block(var);
3543	}
3544	});
3545
3546	// Output push constant blocks
3547	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3548	auto &type = this->get<SPIRType>(var.basetype);
3549	if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
3550	!is_hidden_variable(var))
3551	{
3552	emit_push_constant_block(var);
3553	}
3554	});
3555
3556	bool skip_separate_image_sampler = !combined_image_samplers.empty() \|\| !options.vulkan_semantics;
3557
3558	// Output Uniform Constants (values, samplers, images, etc).
3559	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3560	auto &type = this->get<SPIRType>(var.basetype);
3561
3562	// If we're remapping separate samplers and images, only emit the combined samplers.
3563	if (skip_separate_image_sampler)
3564	{
3565	// Sampler buffers are always used without a sampler, and they will also work in regular GL.
3566	bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
3567	bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == `1`;
3568	bool separate_sampler = type.basetype == SPIRType::Sampler;
3569	if (!sampler_buffer && (separate_image \|\| separate_sampler))
3570	return;
3571	}
3572
3573	if (var.storage != StorageClassFunction && type.pointer &&
3574	(type.storage == StorageClassUniformConstant \|\| type.storage == StorageClassAtomicCounter \|\|
3575	type.storage == StorageClassRayPayloadKHR \|\| type.storage == StorageClassIncomingRayPayloadKHR \|\|
3576	type.storage == StorageClassCallableDataKHR \|\| type.storage == StorageClassIncomingCallableDataKHR \|\|
3577	type.storage == StorageClassHitAttributeKHR) &&
3578	!is_hidden_variable(var))
3579	{
3580	emit_uniform(var);
3581	emitted = true;
3582	}
3583	});
3584
3585	if (emitted)
3586	statement("");
3587	emitted = false;
3588
3589	bool emitted_base_instance = false;
3590
3591	// Output in/out interfaces.
3592	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) {
3593	auto &type = this->get<SPIRType>(var.basetype);
3594
3595	bool is_hidden = is_hidden_variable(var);
3596
3597	// Unused output I/O variables might still be required to implement framebuffer fetch.
3598	if (var.storage == StorageClassOutput && !is_legacy() &&
3599	location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != `0`)
3600	{
3601	is_hidden = false;
3602	}
3603
3604	if (var.storage != StorageClassFunction && type.pointer &&
3605	(var.storage == StorageClassInput \|\| var.storage == StorageClassOutput) &&
3606	interface_variable_exists_in_entry_point(var.self) && !is_hidden)
3607	{
3608	if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput &&
3609	type.array.size() == `1`)
3610	{
3611	SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader.");
3612	}
3613	emit_interface_block(var);
3614	emitted = true;
3615	}
3616	else if (is_builtin_variable(var))
3617	{
3618	auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn));
3619	// For gl_InstanceIndex emulation on GLES, the API user needs to
3620	// supply this uniform.
3621
3622	// The draw parameter extension is soft-enabled on GL with some fallbacks.
3623	if (!options.vulkan_semantics)
3624	{
3625	if (!emitted_base_instance &&
3626	((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) \|\|
3627	(builtin == BuiltInBaseInstance)))
3628	{
3629	statement("#ifdef GL_ARB_shader_draw_parameters");
3630	statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB");
3631	statement("#else");
3632	// A crude, but simple workaround which should be good enough for non-indirect draws.
3633	statement("uniform int SPIRV_Cross_BaseInstance;");
3634	statement("#endif");
3635	emitted = true;
3636	emitted_base_instance = true;
3637	}
3638	else if (builtin == BuiltInBaseVertex)
3639	{
3640	statement("#ifdef GL_ARB_shader_draw_parameters");
3641	statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB");
3642	statement("#else");
3643	// A crude, but simple workaround which should be good enough for non-indirect draws.
3644	statement("uniform int SPIRV_Cross_BaseVertex;");
3645	statement("#endif");
3646	}
3647	else if (builtin == BuiltInDrawIndex)
3648	{
3649	statement("#ifndef GL_ARB_shader_draw_parameters");
3650	// Cannot really be worked around.
3651	statement("#error GL_ARB_shader_draw_parameters is not supported.");
3652	statement("#endif");
3653	}
3654	}
3655	}
3656	});
3657
3658	// Global variables.
3659	for (auto global : global_variables)
3660	{
3661	auto &var = get<SPIRVariable>(global);
3662	if (is_hidden_variable(var, true))
3663	continue;
3664
3665	if (var.storage != StorageClassOutput)
3666	{
3667	if (!variable_is_lut(var))
3668	{
3669	add_resource_name(var.self);
3670
3671	string initializer;
3672	if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate &&
3673	!var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var)))
3674	{
3675	initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var)));
3676	}
3677
3678	statement(variable_decl(var), initializer, ";");
3679	emitted = true;
3680	}
3681	}
3682	else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr)
3683	{
3684	emit_output_variable_initializer(var);
3685	}
3686	}
3687
3688	if (emitted)
3689	statement("");
3690
3691	declare_undefined_values();
3692	}
3693
3694	void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var)
3695	{
3696	// If a StorageClassOutput variable has an initializer, we need to initialize it in main().
3697	auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point);
3698	auto &type = get<SPIRType>(var.basetype);
3699	bool is_patch = has_decoration(var.self, DecorationPatch);
3700	bool is_block = has_decoration(type.self, DecorationBlock);
3701	bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch;
3702
3703	if (is_block)
3704	{
3705	uint32_t member_count = uint32_t(type.member_types.size());
3706	bool type_is_array = type.array.size() == `1`;
3707	uint32_t array_size = `1`;
3708	if (type_is_array)
3709	array_size = to_array_size_literal(type);
3710	uint32_t iteration_count = is_control_point ? `1` : array_size;
3711
3712	// If the initializer is a block, we must initialize each block member one at a time.
3713	for (uint32_t i = `0`; i < member_count; i++)
3714	{
3715	// These outputs might not have been properly declared, so don't initialize them in that case.
3716	if (has_member_decoration(type.self, i, DecorationBuiltIn))
3717	{
3718	if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance &&
3719	!cull_distance_count)
3720	continue;
3721
3722	if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance &&
3723	!clip_distance_count)
3724	continue;
3725	}
3726
3727	// We need to build a per-member array first, essentially transposing from AoS to SoA.
3728	// This code path hits when we have an array of blocks.
3729	string lut_name;
3730	if (type_is_array)
3731	{
3732	lut_name = join("_", var.self, "_", i, "_init");
3733	uint32_t member_type_id = get<SPIRType>(var.basetype).member_types [i];
3734	auto &member_type = get<SPIRType>(member_type_id);
3735	auto array_type = member_type;
3736	array_type.parent_type = member_type_id;
3737	array_type.array.push_back(array_size);
3738	array_type.array_size_literal.push_back(true);
3739
3740	SmallVector<string> exprs;
3741	exprs.reserve(array_size);
3742	auto &c = get<SPIRConstant>(var.initializer);
3743	for (uint32_t j = `0`; j < array_size; j++)
3744	exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants [j]).subconstants [i]));
3745	statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type), " = ",
3746	type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");");
3747	}
3748
3749	for (uint32_t j = `0`; j < iteration_count; j++)
3750	{
3751	entry_func.fixup_hooks_in.push_back([=, &var]() {
3752	AccessChainMeta meta;
3753	auto &c = this->get<SPIRConstant>(var.initializer);
3754
3755	uint32_t invocation_id = `0`;
3756	uint32_t member_index_id = `0`;
3757	if (is_control_point)
3758	{
3759	uint32_t ids = ir.increase_bound_by(`3`);
3760	SPIRType uint_type;
3761	uint_type.basetype = SPIRType::UInt;
3762	uint_type.width = `32`;
3763	set<SPIRType>(ids, uint_type);
3764	set<SPIRExpression>(ids + `1`, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true);
3765	set<SPIRConstant>(ids + `2`, ids, i, false);
3766	invocation_id = ids + `1`;
3767	member_index_id = ids + `2`;
3768	}
3769
3770	if (is_patch)
3771	{
3772	statement("if (gl_InvocationID == 0)");
3773	begin_scope();
3774	}
3775
3776	if (type_is_array && !is_control_point)
3777	{
3778	uint32_t indices[`2`] = { j, i };
3779	auto chain = access_chain_internal(var.self, indices, `2`, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3780	statement(chain, " = ", lut_name, "[", j, "];");
3781	}
3782	else if (is_control_point)
3783	{
3784	uint32_t indices[`2`] = { invocation_id, member_index_id };
3785	auto chain = access_chain_internal(var.self, indices, `2`, `0`, &meta);
3786	statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];");
3787	}
3788	else
3789	{
3790	auto chain =
3791	access_chain_internal(var.self, &i, `1`, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta);
3792	statement(chain, " = ", to_expression(c.subconstants [i]), ";");
3793	}
3794
3795	if (is_patch)
3796	end_scope();
3797	});
3798	}
3799	}
3800	}
3801	else if (is_control_point)
3802	{
3803	auto lut_name = join("_", var.self, "_init");
3804	statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type),
3805	" = ", to_expression(var.initializer), ";");
3806	entry_func.fixup_hooks_in.push_back([&, lut_name]() {
3807	statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];");
3808	});
3809	}
3810	else if (has_decoration(var.self, DecorationBuiltIn) &&
3811	BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask)
3812	{
3813	// We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_<
3814	entry_func.fixup_hooks_in.push_back([&] {
3815	auto &c = this->get<SPIRConstant>(var.initializer);
3816	uint32_t num_constants = uint32_t(c.subconstants.size());
3817	for (uint32_t i = `0`; i < num_constants; i++)
3818	{
3819	// Don't use to_expression on constant since it might be uint, just fish out the raw int.
3820	statement(to_expression(var.self), "[", i, "] = ",
3821	convert_to_string(this->get<SPIRConstant>(c.subconstants [i]).scalar_i32()), ";");
3822	}
3823	});
3824	}
3825	else
3826	{
3827	auto lut_name = join("_", var.self, "_init");
3828	statement("const ", type_to_glsl(type), " ", lut_name,
3829	type_to_array_glsl(type), " = ", to_expression(var.initializer), ";");
3830	entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() {
3831	if (is_patch)
3832	{
3833	statement("if (gl_InvocationID == 0)");
3834	begin_scope();
3835	}
3836	statement(to_expression(var.self), " = ", lut_name, ";");
3837	if (is_patch)
3838	end_scope();
3839	});
3840	}
3841	}
3842
3843	void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model)
3844	{
3845	static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4",
3846	"float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" };
3847
3848	if (!options.vulkan_semantics)
3849	{
3850	using Supp = ShaderSubgroupSupportHelper;
3851	auto result = shader_subgroup_supporter.resolve();
3852
3853	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask))
3854	{
3855	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result);
3856
3857	for (auto &e : exts)
3858	{
3859	const char *name = Supp::get_extension_name(e);
3860	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3861
3862	switch (e)
3863	{
3864	case Supp::NV_shader_thread_group:
3865	statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)");
3866	statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)");
3867	statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)");
3868	statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)");
3869	statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)");
3870	break;
3871	case Supp::ARB_shader_ballot:
3872	statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)");
3873	statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)");
3874	statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)");
3875	statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)");
3876	statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)");
3877	break;
3878	default:
3879	break;
3880	}
3881	}
3882	statement("#endif");
3883	statement("");
3884	}
3885
3886	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize))
3887	{
3888	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result);
3889
3890	for (auto &e : exts)
3891	{
3892	const char *name = Supp::get_extension_name(e);
3893	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3894
3895	switch (e)
3896	{
3897	case Supp::NV_shader_thread_group:
3898	statement("#define gl_SubgroupSize gl_WarpSizeNV");
3899	break;
3900	case Supp::ARB_shader_ballot:
3901	statement("#define gl_SubgroupSize gl_SubGroupSizeARB");
3902	break;
3903	case Supp::AMD_gcn_shader:
3904	statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)");
3905	break;
3906	default:
3907	break;
3908	}
3909	}
3910	statement("#endif");
3911	statement("");
3912	}
3913
3914	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID))
3915	{
3916	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result);
3917
3918	for (auto &e : exts)
3919	{
3920	const char *name = Supp::get_extension_name(e);
3921	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3922
3923	switch (e)
3924	{
3925	case Supp::NV_shader_thread_group:
3926	statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV");
3927	break;
3928	case Supp::ARB_shader_ballot:
3929	statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB");
3930	break;
3931	default:
3932	break;
3933	}
3934	}
3935	statement("#endif");
3936	statement("");
3937	}
3938
3939	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID))
3940	{
3941	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result);
3942
3943	for (auto &e : exts)
3944	{
3945	const char *name = Supp::get_extension_name(e);
3946	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3947
3948	switch (e)
3949	{
3950	case Supp::NV_shader_thread_group:
3951	statement("#define gl_SubgroupID gl_WarpIDNV");
3952	break;
3953	default:
3954	break;
3955	}
3956	}
3957	statement("#endif");
3958	statement("");
3959	}
3960
3961	if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups))
3962	{
3963	auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result);
3964
3965	for (auto &e : exts)
3966	{
3967	const char *name = Supp::get_extension_name(e);
3968	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3969
3970	switch (e)
3971	{
3972	case Supp::NV_shader_thread_group:
3973	statement("#define gl_NumSubgroups gl_WarpsPerSMNV");
3974	break;
3975	default:
3976	break;
3977	}
3978	}
3979	statement("#endif");
3980	statement("");
3981	}
3982
3983	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First))
3984	{
3985	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result);
3986
3987	for (auto &e : exts)
3988	{
3989	const char *name = Supp::get_extension_name(e);
3990	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
3991
3992	switch (e)
3993	{
3994	case Supp::NV_shader_thread_shuffle:
3995	for (const char *t : workaround_types)
3996	{
3997	statement(t, " subgroupBroadcastFirst(", t,
3998	" value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }");
3999	}
4000	for (const char *t : workaround_types)
4001	{
4002	statement(t, " subgroupBroadcast(", t,
4003	" value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }");
4004	}
4005	break;
4006	case Supp::ARB_shader_ballot:
4007	for (const char *t : workaround_types)
4008	{
4009	statement(t, " subgroupBroadcastFirst(", t,
4010	" value) { return readFirstInvocationARB(value); }");
4011	}
4012	for (const char *t : workaround_types)
4013	{
4014	statement(t, " subgroupBroadcast(", t,
4015	" value, uint id) { return readInvocationARB(value, id); }");
4016	}
4017	break;
4018	default:
4019	break;
4020	}
4021	}
4022	statement("#endif");
4023	statement("");
4024	}
4025
4026	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB))
4027	{
4028	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result);
4029
4030	for (auto &e : exts)
4031	{
4032	const char *name = Supp::get_extension_name(e);
4033	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4034
4035	switch (e)
4036	{
4037	case Supp::NV_shader_thread_group:
4038	statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }");
4039	statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }");
4040	break;
4041	default:
4042	break;
4043	}
4044	}
4045	statement("#else");
4046	statement("uint subgroupBallotFindLSB(uvec4 value)");
4047	begin_scope();
4048	statement("int firstLive = findLSB(value.x);");
4049	statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));");
4050	end_scope();
4051	statement("uint subgroupBallotFindMSB(uvec4 value)");
4052	begin_scope();
4053	statement("int firstLive = findMSB(value.y);");
4054	statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));");
4055	end_scope();
4056	statement("#endif");
4057	statement("");
4058	}
4059
4060	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool))
4061	{
4062	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result);
4063
4064	for (auto &e : exts)
4065	{
4066	const char *name = Supp::get_extension_name(e);
4067	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4068
4069	switch (e)
4070	{
4071	case Supp::NV_gpu_shader_5:
4072	statement("bool subgroupAll(bool value) { return allThreadsNV(value); }");
4073	statement("bool subgroupAny(bool value) { return anyThreadNV(value); }");
4074	statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }");
4075	break;
4076	case Supp::ARB_shader_group_vote:
4077	statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }");
4078	statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }");
4079	statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }");
4080	break;
4081	case Supp::AMD_gcn_shader:
4082	statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }");
4083	statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }");
4084	statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull \|\| "
4085	"b == ballotAMD(true); }");
4086	break;
4087	default:
4088	break;
4089	}
4090	}
4091	statement("#endif");
4092	statement("");
4093	}
4094
4095	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT))
4096	{
4097	statement("#ifndef GL_KHR_shader_subgroup_vote");
4098	statement(
4099	"#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return "
4100	"subgroupAllEqual(subgroupBroadcastFirst(value) == value); }");
4101	for (const char *t : workaround_types)
4102	statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")");
4103	statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND");
4104	statement("#endif");
4105	statement("");
4106	}
4107
4108	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot))
4109	{
4110	auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result);
4111
4112	for (auto &e : exts)
4113	{
4114	const char *name = Supp::get_extension_name(e);
4115	statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")");
4116
4117	switch (e)
4118	{
4119	case Supp::NV_shader_thread_group:
4120	statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }");
4121	break;
4122	case Supp::ARB_shader_ballot:
4123	statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }");
4124	break;
4125	default:
4126	break;
4127	}
4128	}
4129	statement("#endif");
4130	statement("");
4131	}
4132
4133	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect))
4134	{
4135	statement("#ifndef GL_KHR_shader_subgroup_basic");
4136	statement("bool subgroupElect()");
4137	begin_scope();
4138	statement("uvec4 activeMask = subgroupBallot(true);");
4139	statement("uint firstLive = subgroupBallotFindLSB(activeMask);");
4140	statement("return gl_SubgroupInvocationID == firstLive;");
4141	end_scope();
4142	statement("#endif");
4143	statement("");
4144	}
4145
4146	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier))
4147	{
4148	// Extensions we're using in place of GL_KHR_shader_subgroup_basic state
4149	// that subgroup execute in lockstep so this barrier is implicit.
4150	// However the GL 4.6 spec also states that `barrier` implies a shared memory barrier,
4151	// and a specific test of optimizing scans by leveraging lock-step invocation execution,
4152	// has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`.
4153	// https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19
4154	statement("#ifndef GL_KHR_shader_subgroup_basic");
4155	statement("void subgroupBarrier() { memoryBarrierShared(); }");
4156	statement("#endif");
4157	statement("");
4158	}
4159
4160	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier))
4161	{
4162	if (model == spv::ExecutionModelGLCompute)
4163	{
4164	statement("#ifndef GL_KHR_shader_subgroup_basic");
4165	statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }");
4166	statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }");
4167	statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }");
4168	statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }");
4169	statement("#endif");
4170	}
4171	else
4172	{
4173	statement("#ifndef GL_KHR_shader_subgroup_basic");
4174	statement("void subgroupMemoryBarrier() { memoryBarrier(); }");
4175	statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }");
4176	statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }");
4177	statement("#endif");
4178	}
4179	statement("");
4180	}
4181
4182	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout))
4183	{
4184	statement("#ifndef GL_KHR_shader_subgroup_ballot");
4185	statement("bool subgroupInverseBallot(uvec4 value)");
4186	begin_scope();
4187	statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));");
4188	end_scope();
4189
4190	statement("uint subgroupBallotInclusiveBitCount(uvec4 value)");
4191	begin_scope();
4192	statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;");
4193	statement("ivec2 c = bitCount(v);");
4194	statement_no_indent("#ifdef GL_NV_shader_thread_group");
4195	statement("return uint(c.x);");
4196	statement_no_indent("#else");
4197	statement("return uint(c.x + c.y);");
4198	statement_no_indent("#endif");
4199	end_scope();
4200
4201	statement("uint subgroupBallotExclusiveBitCount(uvec4 value)");
4202	begin_scope();
4203	statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;");
4204	statement("ivec2 c = bitCount(v);");
4205	statement_no_indent("#ifdef GL_NV_shader_thread_group");
4206	statement("return uint(c.x);");
4207	statement_no_indent("#else");
4208	statement("return uint(c.x + c.y);");
4209	statement_no_indent("#endif");
4210	end_scope();
4211	statement("#endif");
4212	statement("");
4213	}
4214
4215	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount))
4216	{
4217	statement("#ifndef GL_KHR_shader_subgroup_ballot");
4218	statement("uint subgroupBallotBitCount(uvec4 value)");
4219	begin_scope();
4220	statement("ivec2 c = bitCount(value.xy);");
4221	statement_no_indent("#ifdef GL_NV_shader_thread_group");
4222	statement("return uint(c.x);");
4223	statement_no_indent("#else");
4224	statement("return uint(c.x + c.y);");
4225	statement_no_indent("#endif");
4226	end_scope();
4227	statement("#endif");
4228	statement("");
4229	}
4230
4231	if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract))
4232	{
4233	statement("#ifndef GL_KHR_shader_subgroup_ballot");
4234	statement("bool subgroupBallotBitExtract(uvec4 value, uint index)");
4235	begin_scope();
4236	statement_no_indent("#ifdef GL_NV_shader_thread_group");
4237	statement("uint shifted = value.x >> index;");
4238	statement_no_indent("#else");
4239	statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);");
4240	statement_no_indent("#endif");
4241	statement("return (shifted & 1u) != 0u;");
4242	end_scope();
4243	statement("#endif");
4244	statement("");
4245	}
4246	}
4247
4248	if (!workaround_ubo_load_overload_types.empty())
4249	{
4250	for (auto &type_id : workaround_ubo_load_overload_types)
4251	{
4252	auto &type = get<SPIRType>(type_id);
4253	statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type),
4254	" wrap) { return wrap; }");
4255	}
4256	statement("");
4257	}
4258
4259	if (requires_transpose_2x2)
4260	{
4261	statement("mat2 spvTranspose(mat2 m)");
4262	begin_scope();
4263	statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);");
4264	end_scope();
4265	statement("");
4266	}
4267
4268	if (requires_transpose_3x3)
4269	{
4270	statement("mat3 spvTranspose(mat3 m)");
4271	begin_scope();
4272	statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);");
4273	end_scope();
4274	statement("");
4275	}
4276
4277	if (requires_transpose_4x4)
4278	{
4279	statement("mat4 spvTranspose(mat4 m)");
4280	begin_scope();
4281	statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], "
4282	"m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);");
4283	end_scope();
4284	statement("");
4285	}
4286	}
4287
4288	// Returns a string representation of the ID, usable as a function arg.
4289	// Default is to simply return the expression representation fo the arg ID.
4290	// Subclasses may override to modify the return value.
4291	string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
4292	{
4293	// Make sure that we use the name of the original variable, and not the parameter alias.
4294	uint32_t name_id = id;
4295	auto *var = maybe_get<SPIRVariable>(id);
4296	if (var && var->basevariable)
4297	name_id = var->basevariable;
4298	return to_expression(name_id);
4299	}
4300
4301	void CompilerGLSL::handle_invalid_expression(uint32_t id)
4302	{
4303	// We tried to read an invalidated expression.
4304	// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
4305	auto res = forced_temporaries.insert(id);
4306
4307	// Forcing new temporaries guarantees forward progress.
4308	if (res.second)
4309	force_recompile_guarantee_forward_progress();
4310	else
4311	force_recompile();
4312	}
4313
4314	// Converts the format of the current expression from packed to unpacked,
4315	// by wrapping the expression in a constructor of the appropriate type.
4316	// GLSL does not support packed formats, so simply return the expression.
4317	// Subclasses that do will override.
4318	string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool)
4319	{
4320	return expr_str;
4321	}
4322
4323	// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
4324	void CompilerGLSL::strip_enclosed_expression(string &expr)
4325	{
4326	if (expr.size() < `2` \|\| expr.front() != `'('` \|\| expr.back() != `')'`)
4327	return;
4328
4329	// Have to make sure that our first and last parens actually enclose everything inside it.
4330	uint32_t paren_count = `0`;
4331	for (auto &c : expr)
4332	{
4333	if (c == `'('`)
4334	paren_count++;
4335	else if (c == `')'`)
4336	{
4337	paren_count--;
4338
4339	// If we hit 0 and this is not the final char, our first and final parens actually don't
4340	// enclose the expression, and we cannot strip, e.g.: (a + b) (c + d).*
4341	if (paren_count == `0` && &c != &expr.back())
4342	return;
4343	}
4344	}
4345	expr.erase(expr.size() - `1`, `1`);
4346	expr.erase(begin(expr));
4347	}
4348
4349	string CompilerGLSL::enclose_expression(const string &expr)
4350	{
4351	bool need_parens = false;
4352
4353	// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
4354	// unary expressions.
4355	if (!expr.empty())
4356	{
4357	auto c = expr.front();
4358	if (c == `'-'` \|\| c == `'+'` \|\| c == `'!'` \|\| c == `'~'` \|\| c == `'&'` \|\| c == `'*'`)
4359	need_parens = true;
4360	}
4361
4362	if (!need_parens)
4363	{
4364	uint32_t paren_count = `0`;
4365	for (auto c : expr)
4366	{
4367	if (c == `'('` \|\| c == `'['`)
4368	paren_count++;
4369	else if (c == `')'` \|\| c == `']'`)
4370	{
4371	assert(paren_count);
4372	paren_count--;
4373	}
4374	else if (c == `' '` && paren_count == `0`)
4375	{
4376	need_parens = true;
4377	break;
4378	}
4379	}
4380	assert(paren_count == `0`);
4381	}
4382
4383	// If this expression contains any spaces which are not enclosed by parentheses,
4384	// we need to enclose it so we can treat the whole string as an expression.
4385	// This happens when two expressions have been part of a binary op earlier.
4386	if (need_parens)
4387	return join(`'('`, expr, `')'`);
4388	else
4389	return expr;
4390	}
4391
4392	string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr)
4393	{
4394	// If this expression starts with an address-of operator ('&'), then
4395	// just return the part after the operator.
4396	// TODO: Strip parens if unnecessary?
4397	if (expr.front() == `'&'`)
4398	return expr.substr(`1`);
4399	else if (backend.native_pointers)
4400	return join(`'*'`, expr);
4401	else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct &&
4402	expr_type.pointer_depth == `1`)
4403	{
4404	return join(enclose_expression(expr), ".value");
4405	}
4406	else
4407	return expr;
4408	}
4409
4410	string CompilerGLSL::address_of_expression(const std::string &expr)
4411	{
4412	if (expr.size() > `3` && expr [`0`] == `'('` && expr [`1`] == `'*'` && expr.back() == `')'`)
4413	{
4414	// If we have an expression which looks like (foo), taking the address of it is the same as stripping*
4415	// the first two and last characters. We might have to enclose the expression.
4416	// This doesn't work for cases like (foo + 10),*
4417	// but this is an r-value expression which we cannot take the address of anyways.
4418	return enclose_expression(expr.substr(`2`, expr.size() - `3`));
4419	}
4420	else if (expr.front() == `'*'`)
4421	{
4422	// If this expression starts with a dereference operator (''), then*
4423	// just return the part after the operator.
4424	return expr.substr(`1`);
4425	}
4426	else
4427	return join(`'&'`, enclose_expression(expr));
4428	}
4429
4430	// Just like to_expression except that we enclose the expression inside parentheses if needed.
4431	string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read)
4432	{
4433	return enclose_expression(to_expression(id, register_expression_read));
4434	}
4435
4436	// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans.
4437	// need_transpose must be forced to false.
4438	string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id)
4439	{
4440	return unpack_expression_type(to_expression(id), expression_type(id),
4441	get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4442	has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true);
4443	}
4444
4445	string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
4446	{
4447	// If we need to transpose, it will also take care of unpacking rules.
4448	auto *e = maybe_get<SPIRExpression>(id);
4449	bool need_transpose = e && e->need_transpose;
4450	bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4451	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4452
4453	if (!need_transpose && (is_remapped \|\| is_packed))
4454	{
4455	return unpack_expression_type(to_expression(id, register_expression_read),
4456	get_pointee_type(expression_type_id(id)),
4457	get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID),
4458	has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false);
4459	}
4460	else
4461	return to_expression(id, register_expression_read);
4462	}
4463
4464	string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
4465	{
4466	return enclose_expression(to_unpacked_expression(id, register_expression_read));
4467	}
4468
4469	string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
4470	{
4471	auto &type = expression_type(id);
4472	if (type.pointer && should_dereference(id))
4473	return dereference_expression(type, to_enclosed_expression(id, register_expression_read));
4474	else
4475	return to_expression(id, register_expression_read);
4476	}
4477
4478	string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
4479	{
4480	auto &type = expression_type(id);
4481	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4482	return address_of_expression(to_enclosed_expression(id, register_expression_read));
4483	else
4484	return to_unpacked_expression(id, register_expression_read);
4485	}
4486
4487	string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
4488	{
4489	auto &type = expression_type(id);
4490	if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
4491	return address_of_expression(to_enclosed_expression(id, register_expression_read));
4492	else
4493	return to_enclosed_unpacked_expression(id, register_expression_read);
4494	}
4495
4496	string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
4497	{
4498	auto expr = to_enclosed_expression(id);
4499	if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked))
4500	return join(expr, "[", index, "]");
4501	else
4502	return join(expr, ".", index_to_swizzle(index));
4503	}
4504
4505	string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c,
4506	const uint32_t *chain, uint32_t length)
4507	{
4508	// It is kinda silly if application actually enter this path since they know the constant up front.
4509	// It is useful here to extract the plain constant directly.
4510	SPIRConstant tmp;
4511	tmp.constant_type = result_type;
4512	auto &composite_type = get<SPIRType>(c.constant_type);
4513	assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty());
4514	assert(!c.specialization);
4515
4516	if (is_matrix(composite_type))
4517	{
4518	if (length == `2`)
4519	{
4520	tmp.m.c[`0`].vecsize = `1`;
4521	tmp.m.columns = `1`;
4522	tmp.m.c[`0`].r[`0`] = c.m.c[chain[`0`]].r[chain[`1`]];
4523	}
4524	else
4525	{
4526	assert(length == `1`);
4527	tmp.m.c[`0`].vecsize = composite_type.vecsize;
4528	tmp.m.columns = `1`;
4529	tmp.m.c[`0`] = c.m.c[chain[`0`]];
4530	}
4531	}
4532	else
4533	{
4534	assert(length == `1`);
4535	tmp.m.c[`0`].vecsize = `1`;
4536	tmp.m.columns = `1`;
4537	tmp.m.c[`0`].r[`0`] = c.m.c[`0`].r[chain[`0`]];
4538	}
4539
4540	return constant_expression(tmp);
4541	}
4542
4543	string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type)
4544	{
4545	uint32_t size = to_array_size_literal(type);
4546	auto &parent = get<SPIRType>(type.parent_type);
4547	string expr = "{ ";
4548
4549	for (uint32_t i = `0`; i < size; i++)
4550	{
4551	auto subexpr = join(base_expr, "[", convert_to_string(i), "]");
4552	if (parent.array.empty())
4553	expr += subexpr;
4554	else
4555	expr += to_rerolled_array_expression(subexpr, parent);
4556
4557	if (i + `1` < size)
4558	expr += ", ";
4559	}
4560
4561	expr += " }";
4562	return expr;
4563	}
4564
4565	string CompilerGLSL::to_composite_constructor_expression(uint32_t id, bool block_like_type)
4566	{
4567	auto &type = expression_type(id);
4568
4569	bool reroll_array = !type.array.empty() &&
4570	(!backend.array_is_value_type \|\|
4571	(block_like_type && !backend.array_is_value_type_in_buffer_blocks));
4572
4573	if (reroll_array)
4574	{
4575	// For this case, we need to "re-roll" an array initializer from a temporary.
4576	// We cannot simply pass the array directly, since it decays to a pointer and it cannot
4577	// participate in a struct initializer. E.g.
4578	// float arr[2] = { 1.0, 2.0 };
4579	// Foo foo = { arr }; must be transformed to
4580	// Foo foo = { { arr[0], arr[1] } };
4581	// The array sizes cannot be deduced from specialization constants since we cannot use any loops.
4582
4583	// We're only triggering one read of the array expression, but this is fine since arrays have to be declared
4584	// as temporaries anyways.
4585	return to_rerolled_array_expression(to_enclosed_expression(id), type);
4586	}
4587	else
4588	return to_unpacked_expression(id);
4589	}
4590
4591	string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
4592	{
4593	string expr = to_expression(id);
4594
4595	if (has_decoration(id, DecorationNonUniform))
4596	convert_non_uniform_expression(expr, id);
4597
4598	return expr;
4599	}
4600
4601	string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
4602	{
4603	auto itr = invalid_expressions.find(id);
4604	if (itr != end(invalid_expressions))
4605	handle_invalid_expression(id);
4606
4607	if (ir.ids [id].get_type() == TypeExpression)
4608	{
4609	// We might have a more complex chain of dependencies.
4610	// A possible scenario is that we
4611	//
4612	// %1 = OpLoad
4613	// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
4614	// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
4615	// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
4616	// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
4617	//
4618	// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
4619	// and see that we should not forward reads of the original variable.
4620	auto &expr = get<SPIRExpression>(id);
4621	for (uint32_t dep : expr.expression_dependencies)
4622	if (invalid_expressions.find(dep) != end(invalid_expressions))
4623	handle_invalid_expression(dep);
4624	}
4625
4626	if (register_expression_read)
4627	track_expression_read(id);
4628
4629	switch (ir.ids [id].get_type())
4630	{
4631	case TypeExpression:
4632	{
4633	auto &e = get<SPIRExpression>(id);
4634	if (e.base_expression)
4635	return to_enclosed_expression(e.base_expression) + e.expression;
4636	else if (e.need_transpose)
4637	{
4638	// This should not be reached for access chains, since we always deal explicitly with transpose state
4639	// when consuming an access chain expression.
4640	uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
4641	bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
4642	return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id,
4643	is_packed);
4644	}
4645	else if (flattened_structs.count(id))
4646	{
4647	return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type));
4648	}
4649	else
4650	{
4651	if (is_forcing_recompilation())
4652	{
4653	// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
4654	// Avoid this by returning dummy expressions during this phase.
4655	// Do not use empty expressions here, because those are sentinels for other cases.
4656	return "_";
4657	}
4658	else
4659	return e.expression;
4660	}
4661	}
4662
4663	case TypeConstant:
4664	{
4665	auto &c = get<SPIRConstant>(id);
4666	auto &type = get<SPIRType>(c.constant_type);
4667
4668	// WorkGroupSize may be a constant.
4669	if (has_decoration(c.self, DecorationBuiltIn))
4670	return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric);
4671	else if (c.specialization)
4672	{
4673	if (backend.workgroup_size_is_hidden)
4674	{
4675	int wg_index = get_constant_mapping_to_workgroup_component(c);
4676	if (wg_index >= `0`)
4677	{
4678	auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(`1`, wg_index));
4679	if (type.basetype != SPIRType::UInt)
4680	wg_size = bitcast_expression(type, SPIRType::UInt, wg_size);
4681	return wg_size;
4682	}
4683	}
4684
4685	return to_name(id);
4686	}
4687	else if (c.is_used_as_lut)
4688	return to_name(id);
4689	else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
4690	return to_name(id);
4691	else if (!type.array.empty() && !backend.can_declare_arrays_inline)
4692	return to_name(id);
4693	else
4694	return constant_expression(c);
4695	}
4696
4697	case TypeConstantOp:
4698	return to_name(id);
4699
4700	case TypeVariable:
4701	{
4702	auto &var = get<SPIRVariable>(id);
4703	// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
4704	// the variable has not been declared yet.
4705	if (var.statically_assigned \|\| (var.loop_variable && !var.loop_variable_enable))
4706	return to_expression(var.static_expression);
4707	else if (var.deferred_declaration)
4708	{
4709	var.deferred_declaration = false;
4710	return variable_decl(var);
4711	}
4712	else if (flattened_structs.count(id))
4713	{
4714	return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype));
4715	}
4716	else
4717	{
4718	auto &dec = ir.meta [var.self].decoration;
4719	if (dec.builtin)
4720	return builtin_to_glsl(dec.builtin_type, var.storage);
4721	else
4722	return to_name(id);
4723	}
4724	}
4725
4726	case TypeCombinedImageSampler:
4727	// This type should never be taken the expression of directly.
4728	// The intention is that texture sampling functions will extract the image and samplers
4729	// separately and take their expressions as needed.
4730	// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
4731	// expression ala sampler2D(texture, sampler).
4732	SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
4733
4734	case TypeAccessChain:
4735	// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
4736	SPIRV_CROSS_THROW("Access chains have no default expression representation.");
4737
4738	default:
4739	return to_name(id);
4740	}
4741	}
4742
4743	string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
4744	{
4745	auto &type = get<SPIRType>(cop.basetype);
4746	bool binary = false;
4747	bool unary = false;
4748	string op;
4749
4750	if (is_legacy() && is_unsigned_opcode(cop.opcode))
4751	SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
4752
4753	// TODO: Find a clean way to reuse emit_instruction.
4754	switch (cop.opcode)
4755	{
4756	case OpSConvert:
4757	case OpUConvert:
4758	case OpFConvert:
4759	op = type_to_glsl_constructor(type);
4760	break;
4761
4762	#define GLSL_BOP(opname, x) \
4763	case Op##opname: \
4764	binary = true; \
4765	op = x; \
4766	break
4767
4768	#define GLSL_UOP(opname, x) \
4769	case Op##opname: \
4770	unary = true; \
4771	op = x; \
4772	break
4773
4774	GLSL_UOP(SNegate, "-");
4775	GLSL_UOP(Not, "~");
4776	GLSL_BOP(IAdd, "+");
4777	GLSL_BOP(ISub, "-");
4778	GLSL_BOP(IMul, "*");
4779	GLSL_BOP(SDiv, "/");
4780	GLSL_BOP(UDiv, "/");
4781	GLSL_BOP(UMod, "%");
4782	GLSL_BOP(SMod, "%");
4783	GLSL_BOP(ShiftRightLogical, ">>");
4784	GLSL_BOP(ShiftRightArithmetic, ">>");
4785	GLSL_BOP(ShiftLeftLogical, "<<");
4786	GLSL_BOP(BitwiseOr, "\|");
4787	GLSL_BOP(BitwiseXor, "^");
4788	GLSL_BOP(BitwiseAnd, "&");
4789	GLSL_BOP(LogicalOr, "\|\|");
4790	GLSL_BOP(LogicalAnd, "&&");
4791	GLSL_UOP(LogicalNot, "!");
4792	GLSL_BOP(LogicalEqual, "==");
4793	GLSL_BOP(LogicalNotEqual, "!=");
4794	GLSL_BOP(IEqual, "==");
4795	GLSL_BOP(INotEqual, "!=");
4796	GLSL_BOP(ULessThan, "<");
4797	GLSL_BOP(SLessThan, "<");
4798	GLSL_BOP(ULessThanEqual, "<=");
4799	GLSL_BOP(SLessThanEqual, "<=");
4800	GLSL_BOP(UGreaterThan, ">");
4801	GLSL_BOP(SGreaterThan, ">");
4802	GLSL_BOP(UGreaterThanEqual, ">=");
4803	GLSL_BOP(SGreaterThanEqual, ">=");
4804
4805	case OpSRem:
4806	{
4807	uint32_t op0 = cop.arguments [`0`];
4808	uint32_t op1 = cop.arguments [`1`];
4809	return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
4810	to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
4811	}
4812
4813	case OpSelect:
4814	{
4815	if (cop.arguments.size() < `3`)
4816	SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4817
4818	// This one is pretty annoying. It's triggered from
4819	// uint(bool), int(bool) from spec constants.
4820	// In order to preserve its compile-time constness in Vulkan GLSL,
4821	// we need to reduce the OpSelect expression back to this simplified model.
4822	// If we cannot, fail.
4823	if (to_trivial_mix_op(type, op, cop.arguments [`2`], cop.arguments [`1`], cop.arguments [`0`]))
4824	{
4825	// Implement as a simple cast down below.
4826	}
4827	else
4828	{
4829	// Implement a ternary and pray the compiler understands it :)
4830	return to_ternary_expression(type, cop.arguments [`0`], cop.arguments [`1`], cop.arguments [`2`]);
4831	}
4832	break;
4833	}
4834
4835	case OpVectorShuffle:
4836	{
4837	string expr = type_to_glsl_constructor(type);
4838	expr += "(";
4839
4840	uint32_t left_components = expression_type(cop.arguments [`0`]).vecsize;
4841	string left_arg = to_enclosed_expression(cop.arguments [`0`]);
4842	string right_arg = to_enclosed_expression(cop.arguments [`1`]);
4843
4844	for (uint32_t i = `2`; i < uint32_t(cop.arguments.size()); i++)
4845	{
4846	uint32_t index = cop.arguments [i];
4847	if (index >= left_components)
4848	expr += right_arg + "." + "xyzw"[index - left_components];
4849	else
4850	expr += left_arg + "." + "xyzw"[index];
4851
4852	if (i + `1` < uint32_t(cop.arguments.size()))
4853	expr += ", ";
4854	}
4855
4856	expr += ")";
4857	return expr;
4858	}
4859
4860	case OpCompositeExtract:
4861	{
4862	auto expr = access_chain_internal(cop.arguments [`0`], &cop.arguments [`1`], uint32_t(cop.arguments.size() - `1`),
4863	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
4864	return expr;
4865	}
4866
4867	case OpCompositeInsert:
4868	SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
4869
4870	default:
4871	// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
4872	SPIRV_CROSS_THROW("Unimplemented spec constant op.");
4873	}
4874
4875	uint32_t bit_width = `0`;
4876	if (unary \|\| binary \|\| cop.opcode == OpSConvert \|\| cop.opcode == OpUConvert)
4877	bit_width = expression_type(cop.arguments [`0`]).width;
4878
4879	SPIRType::BaseType input_type;
4880	bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
4881
4882	switch (cop.opcode)
4883	{
4884	case OpIEqual:
4885	case OpINotEqual:
4886	input_type = to_signed_basetype(bit_width);
4887	break;
4888
4889	case OpSLessThan:
4890	case OpSLessThanEqual:
4891	case OpSGreaterThan:
4892	case OpSGreaterThanEqual:
4893	case OpSMod:
4894	case OpSDiv:
4895	case OpShiftRightArithmetic:
4896	case OpSConvert:
4897	case OpSNegate:
4898	input_type = to_signed_basetype(bit_width);
4899	break;
4900
4901	case OpULessThan:
4902	case OpULessThanEqual:
4903	case OpUGreaterThan:
4904	case OpUGreaterThanEqual:
4905	case OpUMod:
4906	case OpUDiv:
4907	case OpShiftRightLogical:
4908	case OpUConvert:
4909	input_type = to_unsigned_basetype(bit_width);
4910	break;
4911
4912	default:
4913	input_type = type.basetype;
4914	break;
4915	}
4916
4917	#undef GLSL_BOP
4918	#undef GLSL_UOP
4919	if (binary)
4920	{
4921	if (cop.arguments.size() < `2`)
4922	SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4923
4924	string cast_op0;
4925	string cast_op1;
4926	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments [`0`],
4927	cop.arguments [`1`], skip_cast_if_equal_type);
4928
4929	if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
4930	{
4931	expected_type.basetype = input_type;
4932	auto expr = bitcast_glsl_op(type, expected_type);
4933	expr += `'('`;
4934	expr += join(cast_op0, " ", op, " ", cast_op1);
4935	expr += `')'`;
4936	return expr;
4937	}
4938	else
4939	return join("(", cast_op0, " ", op, " ", cast_op1, ")");
4940	}
4941	else if (unary)
4942	{
4943	if (cop.arguments.size() < `1`)
4944	SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4945
4946	// Auto-bitcast to result type as needed.
4947	// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
4948	return join("(", op, bitcast_glsl(type, cop.arguments [`0`]), ")");
4949	}
4950	else if (cop.opcode == OpSConvert \|\| cop.opcode == OpUConvert)
4951	{
4952	if (cop.arguments.size() < `1`)
4953	SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4954
4955	auto &arg_type = expression_type(cop.arguments [`0`]);
4956	if (arg_type.width < type.width && input_type != arg_type.basetype)
4957	{
4958	auto expected = arg_type;
4959	expected.basetype = input_type;
4960	return join(op, "(", bitcast_glsl(expected, cop.arguments [`0`]), ")");
4961	}
4962	else
4963	return join(op, "(", to_expression(cop.arguments [`0`]), ")");
4964	}
4965	else
4966	{
4967	if (cop.arguments.size() < `1`)
4968	SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
4969	return join(op, "(", to_expression(cop.arguments [`0`]), ")");
4970	}
4971	}
4972
4973	string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope)
4974	{
4975	auto &type = get<SPIRType>(c.constant_type);
4976
4977	if (type.pointer)
4978	{
4979	return backend.null_pointer_literal;
4980	}
4981	else if (!c.subconstants.empty())
4982	{
4983	// Handles Arrays and structures.
4984	string res;
4985
4986	// Only consider the decay if we are inside a struct scope.
4987	// Outside a struct declaration, we can always bind to a constant array with templated type.
4988	bool array_type_decays = inside_block_like_struct_scope &&
4989	!type.array.empty() && !backend.array_is_value_type_in_buffer_blocks &&
4990	has_decoration(c.constant_type, DecorationArrayStride);
4991
4992	if (type.array.empty() && type.basetype == SPIRType::Struct && type_is_block_like(type))
4993	inside_block_like_struct_scope = true;
4994
4995	// Allow Metal to use the array<T> template to make arrays a value type
4996	bool needs_trailing_tracket = false;
4997	if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct &&
4998	type.array.empty())
4999	{
5000	res = type_to_glsl_constructor(type) + "{ ";
5001	}
5002	else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type &&
5003	!type.array.empty() && !array_type_decays)
5004	{
5005	res = type_to_glsl_constructor(type) + "({ ";
5006	needs_trailing_tracket = true;
5007	}
5008	else if (backend.use_initializer_list)
5009	{
5010	res = "{ ";
5011	}
5012	else
5013	{
5014	res = type_to_glsl_constructor(type) + "(";
5015	}
5016
5017	for (auto &elem : c.subconstants)
5018	{
5019	auto &subc = get<SPIRConstant>(elem);
5020	if (subc.specialization)
5021	res += to_name(elem);
5022	else
5023	res += constant_expression(subc, inside_block_like_struct_scope);
5024
5025	if (&elem != &c.subconstants.back())
5026	res += ", ";
5027	}
5028
5029	res += backend.use_initializer_list ? " }" : ")";
5030	if (needs_trailing_tracket)
5031	res += ")";
5032
5033	return res;
5034	}
5035	else if (type.basetype == SPIRType::Struct && type.member_types.size() == `0`)
5036	{
5037	// Metal tessellation likes empty structs which are then constant expressions.
5038	if (backend.supports_empty_struct)
5039	return "{ }";
5040	else if (backend.use_typed_initializer_list)
5041	return join(type_to_glsl(get<SPIRType>(c.constant_type)), "{ 0 }");
5042	else if (backend.use_initializer_list)
5043	return "{ 0 }";
5044	else
5045	return join(type_to_glsl(get<SPIRType>(c.constant_type)), "(0)");
5046	}
5047	else if (c.columns() == `1`)
5048	{
5049	return constant_expression_vector(c, `0`);
5050	}
5051	else
5052	{
5053	string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
5054	for (uint32_t col = `0`; col < c.columns(); col++)
5055	{
5056	if (c.specialization_constant_id(col) != `0`)
5057	res += to_name(c.specialization_constant_id(col));
5058	else
5059	res += constant_expression_vector(c, col);
5060
5061	if (col + `1` < c.columns())
5062	res += ", ";
5063	}
5064	res += ")";
5065	return res;
5066	}
5067	}
5068
5069	#ifdef _MSC_VER
5070	// sprintf warning.
5071	// We cannot rely on snprintf existing because, ..., MSVC.
5072	#pragma warning(push)
5073	#pragma warning(disable : 4996)
5074	#endif
5075
5076	string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5077	{
5078	string res;
5079	float float_value = c.scalar_f16(col, row);
5080
5081	// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
5082	// of complicated workarounds, just value-cast to the half type always.
5083	if (std::isnan(float_value) \|\| std::isinf(float_value))
5084	{
5085	SPIRType type;
5086	type.basetype = SPIRType::Half;
5087	type.vecsize = `1`;
5088	type.columns = `1`;
5089
5090	if (float_value == numeric_limits<float>::infinity())
5091	res = join(type_to_glsl(type), "(1.0 / 0.0)");
5092	else if (float_value == -numeric_limits<float>::infinity())
5093	res = join(type_to_glsl(type), "(-1.0 / 0.0)");
5094	else if (std::isnan(float_value))
5095	res = join(type_to_glsl(type), "(0.0 / 0.0)");
5096	else
5097	SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5098	}
5099	else
5100	{
5101	SPIRType type;
5102	type.basetype = SPIRType::Half;
5103	type.vecsize = `1`;
5104	type.columns = `1`;
5105	res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")");
5106	}
5107
5108	return res;
5109	}
5110
5111	string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5112	{
5113	string res;
5114	float float_value = c.scalar_f32(col, row);
5115
5116	if (std::isnan(float_value) \|\| std::isinf(float_value))
5117	{
5118	// Use special representation.
5119	if (!is_legacy())
5120	{
5121	SPIRType out_type;
5122	SPIRType in_type;
5123	out_type.basetype = SPIRType::Float;
5124	in_type.basetype = SPIRType::UInt;
5125	out_type.vecsize = `1`;
5126	in_type.vecsize = `1`;
5127	out_type.width = `32`;
5128	in_type.width = `32`;
5129
5130	char print_buffer[`32`];
5131	sprintf(print_buffer, "0x%xu", c.scalar(col, row));
5132
5133	const char *comment = "inf";
5134	if (float_value == -numeric_limits<float>::infinity())
5135	comment = "-inf";
5136	else if (std::isnan(float_value))
5137	comment = "nan";
5138	res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5139	}
5140	else
5141	{
5142	if (float_value == numeric_limits<float>::infinity())
5143	{
5144	if (backend.float_literal_suffix)
5145	res = "(1.0f / 0.0f)";
5146	else
5147	res = "(1.0 / 0.0)";
5148	}
5149	else if (float_value == -numeric_limits<float>::infinity())
5150	{
5151	if (backend.float_literal_suffix)
5152	res = "(-1.0f / 0.0f)";
5153	else
5154	res = "(-1.0 / 0.0)";
5155	}
5156	else if (std::isnan(float_value))
5157	{
5158	if (backend.float_literal_suffix)
5159	res = "(0.0f / 0.0f)";
5160	else
5161	res = "(0.0 / 0.0)";
5162	}
5163	else
5164	SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5165	}
5166	}
5167	else
5168	{
5169	res = convert_to_string(float_value, current_locale_radix_character);
5170	if (backend.float_literal_suffix)
5171	res += "f";
5172	}
5173
5174	return res;
5175	}
5176
5177	std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
5178	{
5179	string res;
5180	double double_value = c.scalar_f64(col, row);
5181
5182	if (std::isnan(double_value) \|\| std::isinf(double_value))
5183	{
5184	// Use special representation.
5185	if (!is_legacy())
5186	{
5187	SPIRType out_type;
5188	SPIRType in_type;
5189	out_type.basetype = SPIRType::Double;
5190	in_type.basetype = SPIRType::UInt64;
5191	out_type.vecsize = `1`;
5192	in_type.vecsize = `1`;
5193	out_type.width = `64`;
5194	in_type.width = `64`;
5195
5196	uint64_t u64_value = c.scalar_u64(col, row);
5197
5198	if (options.es)
5199	SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
5200	require_extension_internal("GL_ARB_gpu_shader_int64");
5201
5202	char print_buffer[`64`];
5203	sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
5204	backend.long_long_literal_suffix ? "ull" : "ul");
5205
5206	const char *comment = "inf";
5207	if (double_value == -numeric_limits<double>::infinity())
5208	comment = "-inf";
5209	else if (std::isnan(double_value))
5210	comment = "nan";
5211	res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)");
5212	}
5213	else
5214	{
5215	if (options.es)
5216	SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
5217	if (options.version < `400`)
5218	require_extension_internal("GL_ARB_gpu_shader_fp64");
5219
5220	if (double_value == numeric_limits<double>::infinity())
5221	{
5222	if (backend.double_literal_suffix)
5223	res = "(1.0lf / 0.0lf)";
5224	else
5225	res = "(1.0 / 0.0)";
5226	}
5227	else if (double_value == -numeric_limits<double>::infinity())
5228	{
5229	if (backend.double_literal_suffix)
5230	res = "(-1.0lf / 0.0lf)";
5231	else
5232	res = "(-1.0 / 0.0)";
5233	}
5234	else if (std::isnan(double_value))
5235	{
5236	if (backend.double_literal_suffix)
5237	res = "(0.0lf / 0.0lf)";
5238	else
5239	res = "(0.0 / 0.0)";
5240	}
5241	else
5242	SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
5243	}
5244	}
5245	else
5246	{
5247	res = convert_to_string(double_value, current_locale_radix_character);
5248	if (backend.double_literal_suffix)
5249	res += "lf";
5250	}
5251
5252	return res;
5253	}
5254
5255	#ifdef _MSC_VER
5256	#pragma warning(pop)
5257	#endif
5258
5259	string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
5260	{
5261	auto type = get<SPIRType>(c.constant_type);
5262	type.columns = `1`;
5263
5264	auto scalar_type = type;
5265	scalar_type.vecsize = `1`;
5266
5267	string res;
5268	bool splat = backend.use_constructor_splatting && c.vector_size() > `1`;
5269	bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > `1`;
5270
5271	if (!type_is_floating_point(type))
5272	{
5273	// Cannot swizzle literal integers as a special case.
5274	swizzle_splat = false;
5275	}
5276
5277	if (splat \|\| swizzle_splat)
5278	{
5279	// Cannot use constant splatting if we have specialization constants somewhere in the vector.
5280	for (uint32_t i = `0`; i < c.vector_size(); i++)
5281	{
5282	if (c.specialization_constant_id(vector, i) != `0`)
5283	{
5284	splat = false;
5285	swizzle_splat = false;
5286	break;
5287	}
5288	}
5289	}
5290
5291	if (splat \|\| swizzle_splat)
5292	{
5293	if (type.width == `64`)
5294	{
5295	uint64_t ident = c.scalar_u64(vector, `0`);
5296	for (uint32_t i = `1`; i < c.vector_size(); i++)
5297	{
5298	if (ident != c.scalar_u64(vector, i))
5299	{
5300	splat = false;
5301	swizzle_splat = false;
5302	break;
5303	}
5304	}
5305	}
5306	else
5307	{
5308	uint32_t ident = c.scalar(vector, `0`);
5309	for (uint32_t i = `1`; i < c.vector_size(); i++)
5310	{
5311	if (ident != c.scalar(vector, i))
5312	{
5313	splat = false;
5314	swizzle_splat = false;
5315	}
5316	}
5317	}
5318	}
5319
5320	if (c.vector_size() > `1` && !swizzle_splat)
5321	res += type_to_glsl(type) + "(";
5322
5323	switch (type.basetype)
5324	{
5325	case SPIRType::Half:
5326	if (splat \|\| swizzle_splat)
5327	{
5328	res += convert_half_to_string(c, vector, `0`);
5329	if (swizzle_splat)
5330	res = remap_swizzle(get<SPIRType>(c.constant_type), `1`, res);
5331	}
5332	else
5333	{
5334	for (uint32_t i = `0`; i < c.vector_size(); i++)
5335	{
5336	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5337	res += to_expression(c.specialization_constant_id(vector, i));
5338	else
5339	res += convert_half_to_string(c, vector, i);
5340
5341	if (i + `1` < c.vector_size())
5342	res += ", ";
5343	}
5344	}
5345	break;
5346
5347	case SPIRType::Float:
5348	if (splat \|\| swizzle_splat)
5349	{
5350	res += convert_float_to_string(c, vector, `0`);
5351	if (swizzle_splat)
5352	res = remap_swizzle(get<SPIRType>(c.constant_type), `1`, res);
5353	}
5354	else
5355	{
5356	for (uint32_t i = `0`; i < c.vector_size(); i++)
5357	{
5358	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5359	res += to_expression(c.specialization_constant_id(vector, i));
5360	else
5361	res += convert_float_to_string(c, vector, i);
5362
5363	if (i + `1` < c.vector_size())
5364	res += ", ";
5365	}
5366	}
5367	break;
5368
5369	case SPIRType::Double:
5370	if (splat \|\| swizzle_splat)
5371	{
5372	res += convert_double_to_string(c, vector, `0`);
5373	if (swizzle_splat)
5374	res = remap_swizzle(get<SPIRType>(c.constant_type), `1`, res);
5375	}
5376	else
5377	{
5378	for (uint32_t i = `0`; i < c.vector_size(); i++)
5379	{
5380	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5381	res += to_expression(c.specialization_constant_id(vector, i));
5382	else
5383	res += convert_double_to_string(c, vector, i);
5384
5385	if (i + `1` < c.vector_size())
5386	res += ", ";
5387	}
5388	}
5389	break;
5390
5391	case SPIRType::Int64:
5392	{
5393	auto tmp = type;
5394	tmp.vecsize = `1`;
5395	tmp.columns = `1`;
5396	auto int64_type = type_to_glsl(tmp);
5397
5398	if (splat)
5399	{
5400	res += convert_to_string(c.scalar_i64(vector, `0`), int64_type, backend.long_long_literal_suffix);
5401	}
5402	else
5403	{
5404	for (uint32_t i = `0`; i < c.vector_size(); i++)
5405	{
5406	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5407	res += to_expression(c.specialization_constant_id(vector, i));
5408	else
5409	res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix);
5410
5411	if (i + `1` < c.vector_size())
5412	res += ", ";
5413	}
5414	}
5415	break;
5416	}
5417
5418	case SPIRType::UInt64:
5419	if (splat)
5420	{
5421	res += convert_to_string(c.scalar_u64(vector, `0`));
5422	if (backend.long_long_literal_suffix)
5423	res += "ull";
5424	else
5425	res += "ul";
5426	}
5427	else
5428	{
5429	for (uint32_t i = `0`; i < c.vector_size(); i++)
5430	{
5431	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5432	res += to_expression(c.specialization_constant_id(vector, i));
5433	else
5434	{
5435	res += convert_to_string(c.scalar_u64(vector, i));
5436	if (backend.long_long_literal_suffix)
5437	res += "ull";
5438	else
5439	res += "ul";
5440	}
5441
5442	if (i + `1` < c.vector_size())
5443	res += ", ";
5444	}
5445	}
5446	break;
5447
5448	case SPIRType::UInt:
5449	if (splat)
5450	{
5451	res += convert_to_string(c.scalar(vector, `0`));
5452	if (is_legacy())
5453	{
5454	// Fake unsigned constant literals with signed ones if possible.
5455	// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5456	if (c.scalar_i32(vector, `0`) < `0`)
5457	SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
5458	}
5459	else if (backend.uint32_t_literal_suffix)
5460	res += "u";
5461	}
5462	else
5463	{
5464	for (uint32_t i = `0`; i < c.vector_size(); i++)
5465	{
5466	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5467	res += to_expression(c.specialization_constant_id(vector, i));
5468	else
5469	{
5470	res += convert_to_string(c.scalar(vector, i));
5471	if (is_legacy())
5472	{
5473	// Fake unsigned constant literals with signed ones if possible.
5474	// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
5475	if (c.scalar_i32(vector, i) < `0`)
5476	SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made "
5477	"the literal negative.");
5478	}
5479	else if (backend.uint32_t_literal_suffix)
5480	res += "u";
5481	}
5482
5483	if (i + `1` < c.vector_size())
5484	res += ", ";
5485	}
5486	}
5487	break;
5488
5489	case SPIRType::Int:
5490	if (splat)
5491	res += convert_to_string(c.scalar_i32(vector, `0`));
5492	else
5493	{
5494	for (uint32_t i = `0`; i < c.vector_size(); i++)
5495	{
5496	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5497	res += to_expression(c.specialization_constant_id(vector, i));
5498	else
5499	res += convert_to_string(c.scalar_i32(vector, i));
5500	if (i + `1` < c.vector_size())
5501	res += ", ";
5502	}
5503	}
5504	break;
5505
5506	case SPIRType::UShort:
5507	if (splat)
5508	{
5509	res += convert_to_string(c.scalar(vector, `0`));
5510	}
5511	else
5512	{
5513	for (uint32_t i = `0`; i < c.vector_size(); i++)
5514	{
5515	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5516	res += to_expression(c.specialization_constant_id(vector, i));
5517	else
5518	{
5519	if (*backend.uint16_t_literal_suffix)
5520	{
5521	res += convert_to_string(c.scalar_u16(vector, i));
5522	res += backend.uint16_t_literal_suffix;
5523	}
5524	else
5525	{
5526	// If backend doesn't have a literal suffix, we need to value cast.
5527	res += type_to_glsl(scalar_type);
5528	res += "(";
5529	res += convert_to_string(c.scalar_u16(vector, i));
5530	res += ")";
5531	}
5532	}
5533
5534	if (i + `1` < c.vector_size())
5535	res += ", ";
5536	}
5537	}
5538	break;
5539
5540	case SPIRType::Short:
5541	if (splat)
5542	{
5543	res += convert_to_string(c.scalar_i16(vector, `0`));
5544	}
5545	else
5546	{
5547	for (uint32_t i = `0`; i < c.vector_size(); i++)
5548	{
5549	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5550	res += to_expression(c.specialization_constant_id(vector, i));
5551	else
5552	{
5553	if (*backend.int16_t_literal_suffix)
5554	{
5555	res += convert_to_string(c.scalar_i16(vector, i));
5556	res += backend.int16_t_literal_suffix;
5557	}
5558	else
5559	{
5560	// If backend doesn't have a literal suffix, we need to value cast.
5561	res += type_to_glsl(scalar_type);
5562	res += "(";
5563	res += convert_to_string(c.scalar_i16(vector, i));
5564	res += ")";
5565	}
5566	}
5567
5568	if (i + `1` < c.vector_size())
5569	res += ", ";
5570	}
5571	}
5572	break;
5573
5574	case SPIRType::UByte:
5575	if (splat)
5576	{
5577	res += convert_to_string(c.scalar_u8(vector, `0`));
5578	}
5579	else
5580	{
5581	for (uint32_t i = `0`; i < c.vector_size(); i++)
5582	{
5583	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5584	res += to_expression(c.specialization_constant_id(vector, i));
5585	else
5586	{
5587	res += type_to_glsl(scalar_type);
5588	res += "(";
5589	res += convert_to_string(c.scalar_u8(vector, i));
5590	res += ")";
5591	}
5592
5593	if (i + `1` < c.vector_size())
5594	res += ", ";
5595	}
5596	}
5597	break;
5598
5599	case SPIRType::SByte:
5600	if (splat)
5601	{
5602	res += convert_to_string(c.scalar_i8(vector, `0`));
5603	}
5604	else
5605	{
5606	for (uint32_t i = `0`; i < c.vector_size(); i++)
5607	{
5608	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5609	res += to_expression(c.specialization_constant_id(vector, i));
5610	else
5611	{
5612	res += type_to_glsl(scalar_type);
5613	res += "(";
5614	res += convert_to_string(c.scalar_i8(vector, i));
5615	res += ")";
5616	}
5617
5618	if (i + `1` < c.vector_size())
5619	res += ", ";
5620	}
5621	}
5622	break;
5623
5624	case SPIRType::Boolean:
5625	if (splat)
5626	res += c.scalar(vector, `0`) ? "true" : "false";
5627	else
5628	{
5629	for (uint32_t i = `0`; i < c.vector_size(); i++)
5630	{
5631	if (c.vector_size() > `1` && c.specialization_constant_id(vector, i) != `0`)
5632	res += to_expression(c.specialization_constant_id(vector, i));
5633	else
5634	res += c.scalar(vector, i) ? "true" : "false";
5635
5636	if (i + `1` < c.vector_size())
5637	res += ", ";
5638	}
5639	}
5640	break;
5641
5642	default:
5643	SPIRV_CROSS_THROW("Invalid constant expression basetype.");
5644	}
5645
5646	if (c.vector_size() > `1` && !swizzle_splat)
5647	res += ")";
5648
5649	return res;
5650	}
5651
5652	SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id)
5653	{
5654	forced_temporaries.insert(id);
5655	emit_uninitialized_temporary(type, id);
5656	return set<SPIRExpression>(id, to_name(id), type, true);
5657	}
5658
5659	void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id)
5660	{
5661	// If we're declaring temporaries inside continue blocks,
5662	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5663	if (current_continue_block && !hoisted_temporaries.count(result_id))
5664	{
5665	auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5666	if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5667	[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5668	return tmp.first == result_type && tmp.second == result_id;
5669	}) == end(header.declare_temporary))
5670	{
5671	header.declare_temporary.emplace_back(result_type, result_id);
5672	hoisted_temporaries.insert(result_id);
5673	force_recompile();
5674	}
5675	}
5676	else if (hoisted_temporaries.count(result_id) == `0`)
5677	{
5678	auto &type = get<SPIRType>(result_type);
5679	auto &flags = ir.meta [result_id].decoration.decoration_flags;
5680
5681	// The result_id has not been made into an expression yet, so use flags interface.
5682	add_local_variable_name(result_id);
5683
5684	string initializer;
5685	if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
5686	initializer = join(" = ", to_zero_initialized_expression(result_type));
5687
5688	statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";");
5689	}
5690	}
5691
5692	string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
5693	{
5694	auto &type = get<SPIRType>(result_type);
5695	auto &flags = ir.meta [result_id].decoration.decoration_flags;
5696
5697	// If we're declaring temporaries inside continue blocks,
5698	// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
5699	if (current_continue_block && !hoisted_temporaries.count(result_id))
5700	{
5701	auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
5702	if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
5703	[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
5704	return tmp.first == result_type && tmp.second == result_id;
5705	}) == end(header.declare_temporary))
5706	{
5707	header.declare_temporary.emplace_back(result_type, result_id);
5708	hoisted_temporaries.insert(result_id);
5709	force_recompile();
5710	}
5711
5712	return join(to_name(result_id), " = ");
5713	}
5714	else if (hoisted_temporaries.count(result_id))
5715	{
5716	// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
5717	return join(to_name(result_id), " = ");
5718	}
5719	else
5720	{
5721	// The result_id has not been made into an expression yet, so use flags interface.
5722	add_local_variable_name(result_id);
5723	return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
5724	}
5725	}
5726
5727	bool CompilerGLSL::expression_is_forwarded(uint32_t id) const
5728	{
5729	return forwarded_temporaries.count(id) != `0`;
5730	}
5731
5732	bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const
5733	{
5734	return suppressed_usage_tracking.count(id) != `0`;
5735	}
5736
5737	bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const
5738	{
5739	auto *expr = maybe_get<SPIRExpression>(id);
5740	if (!expr)
5741	return false;
5742
5743	// If we're emitting code at a deeper loop level than when we emitted the expression,
5744	// we're probably reading the same expression over and over.
5745	return current_loop_level > expr->emitted_loop_level;
5746	}
5747
5748	SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
5749	bool suppress_usage_tracking)
5750	{
5751	if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
5752	{
5753	// Just forward it without temporary.
5754	// If the forward is trivial, we do not force flushing to temporary for this expression.
5755	forwarded_temporaries.insert(result_id);
5756	if (suppress_usage_tracking)
5757	suppressed_usage_tracking.insert(result_id);
5758
5759	return set<SPIRExpression>(result_id, rhs, result_type, true);
5760	}
5761	else
5762	{
5763	// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
5764	statement(declare_temporary(result_type, result_id), rhs, ";");
5765	return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
5766	}
5767	}
5768
5769	void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5770	{
5771	bool forward = should_forward(op0);
5772	emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward);
5773	inherit_expression_dependencies(result_id, op0);
5774	}
5775
5776	void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
5777	{
5778	// Various FP arithmetic opcodes such as add, sub, mul will hit this.
5779	bool force_temporary_precise = backend.support_precise_qualifier &&
5780	has_decoration(result_id, DecorationNoContraction) &&
5781	type_is_floating_point(get<SPIRType>(result_type));
5782	bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
5783
5784	emit_op(result_type, result_id,
5785	join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
5786
5787	inherit_expression_dependencies(result_id, op0);
5788	inherit_expression_dependencies(result_id, op1);
5789	}
5790
5791	void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
5792	{
5793	auto &type = get<SPIRType>(result_type);
5794	auto expr = type_to_glsl_constructor(type);
5795	expr += `'('`;
5796	for (uint32_t i = `0`; i < type.vecsize; i++)
5797	{
5798	// Make sure to call to_expression multiple times to ensure
5799	// that these expressions are properly flushed to temporaries if needed.
5800	expr += op;
5801	expr += to_extract_component_expression(operand, i);
5802
5803	if (i + `1` < type.vecsize)
5804	expr += ", ";
5805	}
5806	expr += `')'`;
5807	emit_op(result_type, result_id, expr, should_forward(operand));
5808
5809	inherit_expression_dependencies(result_id, operand);
5810	}
5811
5812	void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5813	const char op, bool* negate, SPIRType::BaseType expected_type)
5814	{
5815	auto &type0 = expression_type(op0);
5816	auto &type1 = expression_type(op1);
5817
5818	SPIRType target_type0 = type0;
5819	SPIRType target_type1 = type1;
5820	target_type0.basetype = expected_type;
5821	target_type1.basetype = expected_type;
5822	target_type0.vecsize = `1`;
5823	target_type1.vecsize = `1`;
5824
5825	auto &type = get<SPIRType>(result_type);
5826	auto expr = type_to_glsl_constructor(type);
5827	expr += `'('`;
5828	for (uint32_t i = `0`; i < type.vecsize; i++)
5829	{
5830	// Make sure to call to_expression multiple times to ensure
5831	// that these expressions are properly flushed to temporaries if needed.
5832	if (negate)
5833	expr += "!(";
5834
5835	if (expected_type != SPIRType::Unknown && type0.basetype != expected_type)
5836	expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i));
5837	else
5838	expr += to_extract_component_expression(op0, i);
5839
5840	expr += `' '`;
5841	expr += op;
5842	expr += `' '`;
5843
5844	if (expected_type != SPIRType::Unknown && type1.basetype != expected_type)
5845	expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i));
5846	else
5847	expr += to_extract_component_expression(op1, i);
5848
5849	if (negate)
5850	expr += ")";
5851
5852	if (i + `1` < type.vecsize)
5853	expr += ", ";
5854	}
5855	expr += `')'`;
5856	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5857
5858	inherit_expression_dependencies(result_id, op0);
5859	inherit_expression_dependencies(result_id, op1);
5860	}
5861
5862	SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
5863	uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
5864	{
5865	auto &type0 = expression_type(op0);
5866	auto &type1 = expression_type(op1);
5867
5868	// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
5869	// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
5870	// since equality test is exactly the same.
5871	bool cast = (type0.basetype != type1.basetype) \|\| (!skip_cast_if_equal_type && type0.basetype != input_type);
5872
5873	// Create a fake type so we can bitcast to it.
5874	// We only deal with regular arithmetic types here like int, uints and so on.
5875	SPIRType expected_type;
5876	expected_type.basetype = input_type;
5877	expected_type.vecsize = type0.vecsize;
5878	expected_type.columns = type0.columns;
5879	expected_type.width = type0.width;
5880
5881	if (cast)
5882	{
5883	cast_op0 = bitcast_glsl(expected_type, op0);
5884	cast_op1 = bitcast_glsl(expected_type, op1);
5885	}
5886	else
5887	{
5888	// If we don't cast, our actual input type is that of the first (or second) argument.
5889	cast_op0 = to_enclosed_unpacked_expression(op0);
5890	cast_op1 = to_enclosed_unpacked_expression(op1);
5891	input_type = type0.basetype;
5892	}
5893
5894	return expected_type;
5895	}
5896
5897	bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0)
5898	{
5899	// Some bitcasts may require complex casting sequences, and are implemented here.
5900	// Otherwise a simply unary function will do with bitcast_glsl_op.
5901
5902	auto &output_type = get<SPIRType>(result_type);
5903	auto &input_type = expression_type(op0);
5904	string expr;
5905
5906	if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == `1`)
5907	expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))");
5908	else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half &&
5909	input_type.vecsize == `2`)
5910	expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))");
5911	else
5912	return false;
5913
5914	emit_op(result_type, id, expr, should_forward(op0));
5915	return true;
5916	}
5917
5918	void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5919	const char op, SPIRType::BaseType input_type, bool* skip_cast_if_equal_type)
5920	{
5921	string cast_op0, cast_op1;
5922	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
5923	auto &out_type = get<SPIRType>(result_type);
5924
5925	// We might have casted away from the result type, so bitcast again.
5926	// For example, arithmetic right shift with uint inputs.
5927	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
5928	string expr;
5929	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
5930	{
5931	expected_type.basetype = input_type;
5932	expr = bitcast_glsl_op(out_type, expected_type);
5933	expr += `'('`;
5934	expr += join(cast_op0, " ", op, " ", cast_op1);
5935	expr += `')'`;
5936	}
5937	else
5938	expr += join(cast_op0, " ", op, " ", cast_op1);
5939
5940	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
5941	inherit_expression_dependencies(result_id, op0);
5942	inherit_expression_dependencies(result_id, op1);
5943	}
5944
5945	void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
5946	{
5947	bool forward = should_forward(op0);
5948	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward);
5949	inherit_expression_dependencies(result_id, op0);
5950	}
5951
5952	void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5953	const char *op)
5954	{
5955	bool forward = should_forward(op0) && should_forward(op1);
5956	emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"),
5957	forward);
5958	inherit_expression_dependencies(result_id, op0);
5959	inherit_expression_dependencies(result_id, op1);
5960	}
5961
5962	void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
5963	const char *op)
5964	{
5965	forced_temporaries.insert(result_id);
5966	emit_op(result_type, result_id,
5967	join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5968	to_unpacked_expression(op1), ")"), false);
5969	flush_all_atomic_capable_variables();
5970	}
5971
5972	void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
5973	uint32_t op0, uint32_t op1, uint32_t op2,
5974	const char *op)
5975	{
5976	forced_temporaries.insert(result_id);
5977	emit_op(result_type, result_id,
5978	join(op, "(", to_non_uniform_aware_expression(op0), ", ",
5979	to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false);
5980	flush_all_atomic_capable_variables();
5981	}
5982
5983	void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op,
5984	SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type)
5985	{
5986	auto &out_type = get<SPIRType>(result_type);
5987	auto &expr_type = expression_type(op0);
5988	auto expected_type = out_type;
5989
5990	// Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends.
5991	expected_type.basetype = input_type;
5992	expected_type.width = expr_type.width;
5993
5994	string cast_op;
5995	if (expr_type.basetype != input_type)
5996	{
5997	if (expr_type.basetype == SPIRType::Boolean)
5998	cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")");
5999	else
6000	cast_op = bitcast_glsl(expected_type, op0);
6001	}
6002	else
6003	cast_op = to_unpacked_expression(op0);
6004
6005	string expr;
6006	if (out_type.basetype != expected_result_type)
6007	{
6008	expected_type.basetype = expected_result_type;
6009	expected_type.width = out_type.width;
6010	if (out_type.basetype == SPIRType::Boolean)
6011	expr = type_to_glsl(out_type);
6012	else
6013	expr = bitcast_glsl_op(out_type, expected_type);
6014	expr += `'('`;
6015	expr += join(op, "(", cast_op, ")");
6016	expr += `')'`;
6017	}
6018	else
6019	{
6020	expr += join(op, "(", cast_op, ")");
6021	}
6022
6023	emit_op(result_type, result_id, expr, should_forward(op0));
6024	inherit_expression_dependencies(result_id, op0);
6025	}
6026
6027	// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs
6028	// and different vector sizes all at once. Need a special purpose method here.
6029	void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6030	uint32_t op2, const char *op,
6031	SPIRType::BaseType expected_result_type,
6032	SPIRType::BaseType input_type0, SPIRType::BaseType input_type1,
6033	SPIRType::BaseType input_type2)
6034	{
6035	auto &out_type = get<SPIRType>(result_type);
6036	auto expected_type = out_type;
6037	expected_type.basetype = input_type0;
6038
6039	string cast_op0 =
6040	expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6041
6042	auto op1_expr = to_unpacked_expression(op1);
6043	auto op2_expr = to_unpacked_expression(op2);
6044
6045	// Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit.
6046	expected_type.basetype = input_type1;
6047	expected_type.vecsize = `1`;
6048	string cast_op1 = expression_type(op1).basetype != input_type1 ?
6049	join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") :
6050	op1_expr;
6051
6052	expected_type.basetype = input_type2;
6053	expected_type.vecsize = `1`;
6054	string cast_op2 = expression_type(op2).basetype != input_type2 ?
6055	join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") :
6056	op2_expr;
6057
6058	string expr;
6059	if (out_type.basetype != expected_result_type)
6060	{
6061	expected_type.vecsize = out_type.vecsize;
6062	expected_type.basetype = expected_result_type;
6063	expr = bitcast_glsl_op(out_type, expected_type);
6064	expr += `'('`;
6065	expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6066	expr += `')'`;
6067	}
6068	else
6069	{
6070	expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6071	}
6072
6073	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
6074	inherit_expression_dependencies(result_id, op0);
6075	inherit_expression_dependencies(result_id, op1);
6076	inherit_expression_dependencies(result_id, op2);
6077	}
6078
6079	void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6080	uint32_t op2, const char *op, SPIRType::BaseType input_type)
6081	{
6082	auto &out_type = get<SPIRType>(result_type);
6083	auto expected_type = out_type;
6084	expected_type.basetype = input_type;
6085	string cast_op0 =
6086	expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6087	string cast_op1 =
6088	expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1);
6089	string cast_op2 =
6090	expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2);
6091
6092	string expr;
6093	if (out_type.basetype != input_type)
6094	{
6095	expr = bitcast_glsl_op(out_type, expected_type);
6096	expr += `'('`;
6097	expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6098	expr += `')'`;
6099	}
6100	else
6101	{
6102	expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")");
6103	}
6104
6105	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2));
6106	inherit_expression_dependencies(result_id, op0);
6107	inherit_expression_dependencies(result_id, op1);
6108	inherit_expression_dependencies(result_id, op2);
6109	}
6110
6111	void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0,
6112	uint32_t op1, const char *op, SPIRType::BaseType input_type)
6113	{
6114	// Special purpose method for implementing clustered subgroup opcodes.
6115	// Main difference is that op1 does not participate in any casting, it needs to be a literal.
6116	auto &out_type = get<SPIRType>(result_type);
6117	auto expected_type = out_type;
6118	expected_type.basetype = input_type;
6119	string cast_op0 =
6120	expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0);
6121
6122	string expr;
6123	if (out_type.basetype != input_type)
6124	{
6125	expr = bitcast_glsl_op(out_type, expected_type);
6126	expr += `'('`;
6127	expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6128	expr += `')'`;
6129	}
6130	else
6131	{
6132	expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")");
6133	}
6134
6135	emit_op(result_type, result_id, expr, should_forward(op0));
6136	inherit_expression_dependencies(result_id, op0);
6137	}
6138
6139	void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6140	const char op, SPIRType::BaseType input_type, bool* skip_cast_if_equal_type)
6141	{
6142	string cast_op0, cast_op1;
6143	auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
6144	auto &out_type = get<SPIRType>(result_type);
6145
6146	// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
6147	string expr;
6148	if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
6149	{
6150	expected_type.basetype = input_type;
6151	expr = bitcast_glsl_op(out_type, expected_type);
6152	expr += `'('`;
6153	expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6154	expr += `')'`;
6155	}
6156	else
6157	{
6158	expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
6159	}
6160
6161	emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
6162	inherit_expression_dependencies(result_id, op0);
6163	inherit_expression_dependencies(result_id, op1);
6164	}
6165
6166	void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6167	uint32_t op2, const char *op)
6168	{
6169	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
6170	emit_op(result_type, result_id,
6171	join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6172	to_unpacked_expression(op2), ")"),
6173	forward);
6174
6175	inherit_expression_dependencies(result_id, op0);
6176	inherit_expression_dependencies(result_id, op1);
6177	inherit_expression_dependencies(result_id, op2);
6178	}
6179
6180	void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6181	uint32_t op2, uint32_t op3, const char *op)
6182	{
6183	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6184	emit_op(result_type, result_id,
6185	join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ",
6186	to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"),
6187	forward);
6188
6189	inherit_expression_dependencies(result_id, op0);
6190	inherit_expression_dependencies(result_id, op1);
6191	inherit_expression_dependencies(result_id, op2);
6192	inherit_expression_dependencies(result_id, op3);
6193	}
6194
6195	void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
6196	uint32_t op2, uint32_t op3, const char *op,
6197	SPIRType::BaseType offset_count_type)
6198	{
6199	// Only need to cast offset/count arguments. Types of base/insert must be same as result type,
6200	// and bitfieldInsert is sign invariant.
6201	bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
6202
6203	auto op0_expr = to_unpacked_expression(op0);
6204	auto op1_expr = to_unpacked_expression(op1);
6205	auto op2_expr = to_unpacked_expression(op2);
6206	auto op3_expr = to_unpacked_expression(op3);
6207
6208	SPIRType target_type;
6209	target_type.vecsize = `1`;
6210	target_type.basetype = offset_count_type;
6211
6212	if (expression_type(op2).basetype != offset_count_type)
6213	{
6214	// Value-cast here. Input might be 16-bit. GLSL requires int.
6215	op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")");
6216	}
6217
6218	if (expression_type(op3).basetype != offset_count_type)
6219	{
6220	// Value-cast here. Input might be 16-bit. GLSL requires int.
6221	op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")");
6222	}
6223
6224	emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"),
6225	forward);
6226
6227	inherit_expression_dependencies(result_id, op0);
6228	inherit_expression_dependencies(result_id, op1);
6229	inherit_expression_dependencies(result_id, op2);
6230	inherit_expression_dependencies(result_id, op3);
6231	}
6232
6233	string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex)
6234	{
6235	const char *type;
6236	switch (imgtype.image.dim)
6237	{
6238	case spv::Dim1D:
6239	type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
6240	break;
6241	case spv::Dim2D:
6242	type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
6243	break;
6244	case spv::Dim3D:
6245	type = "3D";
6246	break;
6247	case spv::DimCube:
6248	type = "Cube";
6249	break;
6250	case spv::DimRect:
6251	type = "2DRect";
6252	break;
6253	case spv::DimBuffer:
6254	type = "Buffer";
6255	break;
6256	case spv::DimSubpassData:
6257	type = "2D";
6258	break;
6259	default:
6260	type = "";
6261	break;
6262	}
6263
6264	// In legacy GLSL, an extension is required for textureLod in the fragment
6265	// shader or textureGrad anywhere.
6266	bool legacy_lod_ext = false;
6267	auto &execution = get_entry_point();
6268	if (op == "textureGrad" \|\| op == "textureProjGrad" \|\|
6269	((op == "textureLod" \|\| op == "textureProjLod") && execution.model != ExecutionModelVertex))
6270	{
6271	if (is_legacy_es())
6272	{
6273	legacy_lod_ext = true;
6274	require_extension_internal("GL_EXT_shader_texture_lod");
6275	}
6276	else if (is_legacy_desktop())
6277	require_extension_internal("GL_ARB_shader_texture_lod");
6278	}
6279
6280	if (op == "textureLodOffset" \|\| op == "textureProjLodOffset")
6281	{
6282	if (is_legacy_es())
6283	SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES"));
6284
6285	require_extension_internal("GL_EXT_gpu_shader4");
6286	}
6287
6288	// GLES has very limited support for shadow samplers.
6289	// Basically shadow2D and shadow2DProj work through EXT_shadow_samplers,
6290	// everything else can just throw
6291	bool is_comparison = is_depth_image(imgtype, tex);
6292	if (is_comparison && is_legacy_es())
6293	{
6294	if (op == "texture" \|\| op == "textureProj")
6295	require_extension_internal("GL_EXT_shadow_samplers");
6296	else
6297	SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES"));
6298	}
6299
6300	if (op == "textureSize")
6301	{
6302	if (is_legacy_es())
6303	SPIRV_CROSS_THROW("textureSize not supported in legacy ES");
6304	if (is_comparison)
6305	SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL");
6306	require_extension_internal("GL_EXT_gpu_shader4");
6307	}
6308
6309	if (op == "texelFetch" && is_legacy_es())
6310	SPIRV_CROSS_THROW("texelFetch not supported in legacy ES");
6311
6312	bool is_es_and_depth = is_legacy_es() && is_comparison;
6313	std::string type_prefix = is_comparison ? "shadow" : "texture";
6314
6315	if (op == "texture")
6316	return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type);
6317	else if (op == "textureLod")
6318	return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod");
6319	else if (op == "textureProj")
6320	return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj");
6321	else if (op == "textureGrad")
6322	return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
6323	else if (op == "textureProjLod")
6324	return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod");
6325	else if (op == "textureLodOffset")
6326	return join(type_prefix, type, "LodOffset");
6327	else if (op == "textureProjGrad")
6328	return join(type_prefix, type,
6329	is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad");
6330	else if (op == "textureProjLodOffset")
6331	return join(type_prefix, type, "ProjLodOffset");
6332	else if (op == "textureSize")
6333	return join("textureSize", type);
6334	else if (op == "texelFetch")
6335	return join("texelFetch", type);
6336	else
6337	{
6338	SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
6339	}
6340	}
6341
6342	bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
6343	{
6344	auto *cleft = maybe_get<SPIRConstant>(left);
6345	auto *cright = maybe_get<SPIRConstant>(right);
6346	auto &lerptype = expression_type(lerp);
6347
6348	// If our targets aren't constants, we cannot use construction.
6349	if (!cleft \|\| !cright)
6350	return false;
6351
6352	// If our targets are spec constants, we cannot use construction.
6353	if (cleft->specialization \|\| cright->specialization)
6354	return false;
6355
6356	auto &value_type = get<SPIRType>(cleft->constant_type);
6357
6358	if (lerptype.basetype != SPIRType::Boolean)
6359	return false;
6360	if (value_type.basetype == SPIRType::Struct \|\| is_array(value_type))
6361	return false;
6362	if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize)
6363	return false;
6364
6365	// Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select.
6366	// matrix(scalar) constructor fills in diagnonals, so gets messy very quickly.
6367	// Just avoid this case.
6368	if (value_type.columns > `1`)
6369	return false;
6370
6371	// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
6372	bool ret = true;
6373	for (uint32_t row = `0`; ret && row < value_type.vecsize; row++)
6374	{
6375	switch (type.basetype)
6376	{
6377	case SPIRType::Short:
6378	case SPIRType::UShort:
6379	ret = cleft->scalar_u16(`0`, row) == `0` && cright->scalar_u16(`0`, row) == `1`;
6380	break;
6381
6382	case SPIRType::Int:
6383	case SPIRType::UInt:
6384	ret = cleft->scalar(`0`, row) == `0` && cright->scalar(`0`, row) == `1`;
6385	break;
6386
6387	case SPIRType::Half:
6388	ret = cleft->scalar_f16(`0`, row) == `0.0f` && cright->scalar_f16(`0`, row) == `1.0f`;
6389	break;
6390
6391	case SPIRType::Float:
6392	ret = cleft->scalar_f32(`0`, row) == `0.0f` && cright->scalar_f32(`0`, row) == `1.0f`;
6393	break;
6394
6395	case SPIRType::Double:
6396	ret = cleft->scalar_f64(`0`, row) == `0.0` && cright->scalar_f64(`0`, row) == `1.0`;
6397	break;
6398
6399	case SPIRType::Int64:
6400	case SPIRType::UInt64:
6401	ret = cleft->scalar_u64(`0`, row) == `0` && cright->scalar_u64(`0`, row) == `1`;
6402	break;
6403
6404	default:
6405	ret = false;
6406	break;
6407	}
6408	}
6409
6410	if (ret)
6411	op = type_to_glsl_constructor(type);
6412	return ret;
6413	}
6414
6415	string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value,
6416	uint32_t false_value)
6417	{
6418	string expr;
6419	auto &lerptype = expression_type(select);
6420
6421	if (lerptype.vecsize == `1`)
6422	expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ",
6423	to_enclosed_pointer_expression(false_value));
6424	else
6425	{
6426	auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); };
6427
6428	expr = type_to_glsl_constructor(restype);
6429	expr += "(";
6430	for (uint32_t i = `0`; i < restype.vecsize; i++)
6431	{
6432	expr += swiz (select, i);
6433	expr += " ? ";
6434	expr += swiz (true_value, i);
6435	expr += " : ";
6436	expr += swiz (false_value, i);
6437	if (i + `1` < restype.vecsize)
6438	expr += ", ";
6439	}
6440	expr += ")";
6441	}
6442
6443	return expr;
6444	}
6445
6446	void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
6447	{
6448	auto &lerptype = expression_type(lerp);
6449	auto &restype = get<SPIRType>(result_type);
6450
6451	// If this results in a variable pointer, assume it may be written through.
6452	if (restype.pointer)
6453	{
6454	register_write(left);
6455	register_write(right);
6456	}
6457
6458	string mix_op;
6459	bool has_boolean_mix = *backend.boolean_mix_function &&
6460	((options.es && options.version >= `310`) \|\| (!options.es && options.version >= `450`));
6461	bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
6462
6463	// Cannot use boolean mix when the lerp argument is just one boolean,
6464	// fall back to regular trinary statements.
6465	if (lerptype.vecsize == `1`)
6466	has_boolean_mix = false;
6467
6468	// If we can reduce the mix to a simple cast, do so.
6469	// This helps for cases like int(bool), uint(bool) which is implemented with
6470	// OpSelect bool 1 0.
6471	if (trivial_mix)
6472	{
6473	emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
6474	}
6475	else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
6476	{
6477	// Boolean mix not supported on desktop without extension.
6478	// Was added in OpenGL 4.5 with ES 3.1 compat.
6479	//
6480	// Could use GL_EXT_shader_integer_mix on desktop at least,
6481	// but Apple doesn't support it. :(
6482	// Just implement it as ternary expressions.
6483	auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left);
6484	emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
6485	inherit_expression_dependencies(id, left);
6486	inherit_expression_dependencies(id, right);
6487	inherit_expression_dependencies(id, lerp);
6488	}
6489	else if (lerptype.basetype == SPIRType::Boolean)
6490	emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function);
6491	else
6492	emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
6493	}
6494
6495	string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id)
6496	{
6497	// Keep track of the array indices we have used to load the image.
6498	// We'll need to use the same array index into the combined image sampler array.
6499	auto image_expr = to_non_uniform_aware_expression(image_id);
6500	string array_expr;
6501	auto array_index = image_expr.find_first_of(`'['`);
6502	if (array_index != string::npos)
6503	array_expr = image_expr.substr(array_index, string::npos);
6504
6505	auto &args = current_function->arguments;
6506
6507	// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
6508	// all possible combinations into new sampler2D uniforms.
6509	auto *image = maybe_get_backing_variable(image_id);
6510	auto *samp = maybe_get_backing_variable(samp_id);
6511	if (image)
6512	image_id = image->self;
6513	if (samp)
6514	samp_id = samp->self;
6515
6516	auto image_itr = find_if(begin(args), end(args),
6517	[image_id](const SPIRFunction::Parameter &param) { return image_id == param.id; });
6518
6519	auto sampler_itr = find_if(begin(args), end(args),
6520	[samp_id](const SPIRFunction::Parameter &param) { return samp_id == param.id; });
6521
6522	if (image_itr != end(args) \|\| sampler_itr != end(args))
6523	{
6524	// If any parameter originates from a parameter, we will find it in our argument list.
6525	bool global_image = image_itr == end(args);
6526	bool global_sampler = sampler_itr == end(args);
6527	VariableID iid = global_image ? image_id : VariableID (uint32_t(image_itr - begin(args)));
6528	VariableID sid = global_sampler ? samp_id : VariableID (uint32_t(sampler_itr - begin(args)));
6529
6530	auto &combined = current_function->combined_parameters;
6531	auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
6532	return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
6533	p.sampler_id == sid;
6534	});
6535
6536	if (itr != end(combined))
6537	return to_expression(itr->id) + array_expr;
6538	else
6539	{
6540	SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was "
6541	"build_combined_image_samplers() used "
6542	"before compile() was called?");
6543	}
6544	}
6545	else
6546	{
6547	// For global sampler2D, look directly at the global remapping table.
6548	auto &mapping = combined_image_samplers;
6549	auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
6550	return combined.image_id == image_id && combined.sampler_id == samp_id;
6551	});
6552
6553	if (itr != end(combined_image_samplers))
6554	return to_expression(itr->combined_id) + array_expr;
6555	else
6556	{
6557	SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
6558	"before compile() was called?");
6559	}
6560	}
6561	}
6562
6563	bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op)
6564	{
6565	switch (op)
6566	{
6567	case OpGroupNonUniformElect:
6568	case OpGroupNonUniformBallot:
6569	case OpGroupNonUniformBallotFindLSB:
6570	case OpGroupNonUniformBallotFindMSB:
6571	case OpGroupNonUniformBroadcast:
6572	case OpGroupNonUniformBroadcastFirst:
6573	case OpGroupNonUniformAll:
6574	case OpGroupNonUniformAny:
6575	case OpGroupNonUniformAllEqual:
6576	case OpControlBarrier:
6577	case OpMemoryBarrier:
6578	case OpGroupNonUniformBallotBitCount:
6579	case OpGroupNonUniformBallotBitExtract:
6580	case OpGroupNonUniformInverseBallot:
6581	return true;
6582	default:
6583	return false;
6584	}
6585	}
6586
6587	void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
6588	{
6589	if (options.vulkan_semantics && combined_image_samplers.empty())
6590	{
6591	emit_binary_func_op(result_type, result_id, image_id, samp_id,
6592	type_to_glsl(get<SPIRType>(result_type), result_id).c_str());
6593	}
6594	else
6595	{
6596	// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
6597	emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
6598	}
6599
6600	// Make sure to suppress usage tracking and any expression invalidation.
6601	// It is illegal to create temporaries of opaque types.
6602	forwarded_temporaries.erase(result_id);
6603	}
6604
6605	static inline bool image_opcode_is_sample_no_dref(Op op)
6606	{
6607	switch (op)
6608	{
6609	case OpImageSampleExplicitLod:
6610	case OpImageSampleImplicitLod:
6611	case OpImageSampleProjExplicitLod:
6612	case OpImageSampleProjImplicitLod:
6613	case OpImageFetch:
6614	case OpImageRead:
6615	case OpImageSparseSampleExplicitLod:
6616	case OpImageSparseSampleImplicitLod:
6617	case OpImageSparseSampleProjExplicitLod:
6618	case OpImageSparseSampleProjImplicitLod:
6619	case OpImageSparseFetch:
6620	case OpImageSparseRead:
6621	return true;
6622
6623	default:
6624	return false;
6625	}
6626	}
6627
6628	void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id,
6629	uint32_t &texel_id)
6630	{
6631	// Need to allocate two temporaries.
6632	if (options.es)
6633	SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL.");
6634	require_extension_internal("GL_ARB_sparse_texture2");
6635
6636	auto &temps = extra_sub_expressions [id];
6637	if (temps == `0`)
6638	temps = ir.increase_bound_by(`2`);
6639
6640	feedback_id = temps + `0`;
6641	texel_id = temps + `1`;
6642
6643	auto &return_type = get<SPIRType>(result_type_id);
6644	if (return_type.basetype != SPIRType::Struct \|\| return_type.member_types.size() != `2`)
6645	SPIRV_CROSS_THROW("Invalid return type for sparse feedback.");
6646	emit_uninitialized_temporary(return_type.member_types [`0`], feedback_id);
6647	emit_uninitialized_temporary(return_type.member_types [`1`], texel_id);
6648	}
6649
6650	uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const
6651	{
6652	auto itr = extra_sub_expressions.find(id);
6653	if (itr == extra_sub_expressions.end())
6654	return `0`;
6655	else
6656	return itr ->second + `1`;
6657	}
6658
6659	void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse)
6660	{
6661	auto *ops = stream(i);
6662	auto op = static_cast<Op>(i.op);
6663
6664	SmallVector<uint32_t> inherited_expressions;
6665
6666	uint32_t result_type_id = ops[`0`];
6667	uint32_t id = ops[`1`];
6668	auto &return_type = get<SPIRType>(result_type_id);
6669
6670	uint32_t sparse_code_id = `0`;
6671	uint32_t sparse_texel_id = `0`;
6672	if (sparse)
6673	emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id);
6674
6675	bool forward = false;
6676	string expr = to_texture_op(i, sparse, &forward, inherited_expressions);
6677
6678	if (sparse)
6679	{
6680	statement(to_expression(sparse_code_id), " = ", expr, ";");
6681	expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id),
6682	")");
6683	forward = true;
6684	inherited_expressions.clear();
6685	}
6686
6687	emit_op(result_type_id, id, expr, forward);
6688	for (auto &inherit : inherited_expressions)
6689	inherit_expression_dependencies(id, inherit);
6690
6691	// Do not register sparse ops as control dependent as they are always lowered to a temporary.
6692	switch (op)
6693	{
6694	case OpImageSampleDrefImplicitLod:
6695	case OpImageSampleImplicitLod:
6696	case OpImageSampleProjImplicitLod:
6697	case OpImageSampleProjDrefImplicitLod:
6698	register_control_dependent_expression(id);
6699	break;
6700
6701	default:
6702	break;
6703	}
6704	}
6705
6706	std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward,
6707	SmallVector<uint32_t> &inherited_expressions)
6708	{
6709	auto *ops = stream(i);
6710	auto op = static_cast<Op>(i.op);
6711	uint32_t length = i.length;
6712
6713	uint32_t result_type_id = ops[`0`];
6714	VariableID img = ops[`2`];
6715	uint32_t coord = ops[`3`];
6716	uint32_t dref = `0`;
6717	uint32_t comp = `0`;
6718	bool gather = false;
6719	bool proj = false;
6720	bool fetch = false;
6721	bool nonuniform_expression = false;
6722	const uint32_t opt = nullptr*;
6723
6724	auto &result_type = get<SPIRType>(result_type_id);
6725
6726	inherited_expressions.push_back(coord);
6727	if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img))
6728	nonuniform_expression = true;
6729
6730	switch (op)
6731	{
6732	case OpImageSampleDrefImplicitLod:
6733	case OpImageSampleDrefExplicitLod:
6734	case OpImageSparseSampleDrefImplicitLod:
6735	case OpImageSparseSampleDrefExplicitLod:
6736	dref = ops[`4`];
6737	opt = &ops[`5`];
6738	length -= `5`;
6739	break;
6740
6741	case OpImageSampleProjDrefImplicitLod:
6742	case OpImageSampleProjDrefExplicitLod:
6743	case OpImageSparseSampleProjDrefImplicitLod:
6744	case OpImageSparseSampleProjDrefExplicitLod:
6745	dref = ops[`4`];
6746	opt = &ops[`5`];
6747	length -= `5`;
6748	proj = true;
6749	break;
6750
6751	case OpImageDrefGather:
6752	case OpImageSparseDrefGather:
6753	dref = ops[`4`];
6754	opt = &ops[`5`];
6755	length -= `5`;
6756	gather = true;
6757	if (options.es && options.version < `310`)
6758	SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6759	else if (!options.es && options.version < `400`)
6760	SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400.");
6761	break;
6762
6763	case OpImageGather:
6764	case OpImageSparseGather:
6765	comp = ops[`4`];
6766	opt = &ops[`5`];
6767	length -= `5`;
6768	gather = true;
6769	if (options.es && options.version < `310`)
6770	SPIRV_CROSS_THROW("textureGather requires ESSL 310.");
6771	else if (!options.es && options.version < `400`)
6772	{
6773	if (!expression_is_constant_null(comp))
6774	SPIRV_CROSS_THROW("textureGather with component requires GLSL 400.");
6775	require_extension_internal("GL_ARB_texture_gather");
6776	}
6777	break;
6778
6779	case OpImageFetch:
6780	case OpImageSparseFetch:
6781	case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
6782	opt = &ops[`4`];
6783	length -= `4`;
6784	fetch = true;
6785	break;
6786
6787	case OpImageSampleProjImplicitLod:
6788	case OpImageSampleProjExplicitLod:
6789	case OpImageSparseSampleProjImplicitLod:
6790	case OpImageSparseSampleProjExplicitLod:
6791	opt = &ops[`4`];
6792	length -= `4`;
6793	proj = true;
6794	break;
6795
6796	default:
6797	opt = &ops[`4`];
6798	length -= `4`;
6799	break;
6800	}
6801
6802	// Bypass pointers because we need the real image struct
6803	auto &type = expression_type(img);
6804	auto &imgtype = get<SPIRType>(type.self);
6805
6806	uint32_t coord_components = `0`;
6807	switch (imgtype.image.dim)
6808	{
6809	case spv::Dim1D:
6810	coord_components = `1`;
6811	break;
6812	case spv::Dim2D:
6813	coord_components = `2`;
6814	break;
6815	case spv::Dim3D:
6816	coord_components = `3`;
6817	break;
6818	case spv::DimCube:
6819	coord_components = `3`;
6820	break;
6821	case spv::DimBuffer:
6822	coord_components = `1`;
6823	break;
6824	default:
6825	coord_components = `2`;
6826	break;
6827	}
6828
6829	if (dref)
6830	inherited_expressions.push_back(dref);
6831
6832	if (proj)
6833	coord_components++;
6834	if (imgtype.image.arrayed)
6835	coord_components++;
6836
6837	uint32_t bias = `0`;
6838	uint32_t lod = `0`;
6839	uint32_t grad_x = `0`;
6840	uint32_t grad_y = `0`;
6841	uint32_t coffset = `0`;
6842	uint32_t offset = `0`;
6843	uint32_t coffsets = `0`;
6844	uint32_t sample = `0`;
6845	uint32_t minlod = `0`;
6846	uint32_t flags = `0`;
6847
6848	if (length)
6849	{
6850	flags = *opt++;
6851	length--;
6852	}
6853
6854	auto test = [&](uint32_t &v, uint32_t flag) {
6855	if (length && (flags & flag))
6856	{
6857	v = *opt++;
6858	inherited_expressions.push_back(v);
6859	length--;
6860	}
6861	};
6862
6863	test (bias, ImageOperandsBiasMask);
6864	test (lod, ImageOperandsLodMask);
6865	test (grad_x, ImageOperandsGradMask);
6866	test (grad_y, ImageOperandsGradMask);
6867	test (coffset, ImageOperandsConstOffsetMask);
6868	test (offset, ImageOperandsOffsetMask);
6869	test (coffsets, ImageOperandsConstOffsetsMask);
6870	test (sample, ImageOperandsSampleMask);
6871	test (minlod, ImageOperandsMinLodMask);
6872
6873	TextureFunctionBaseArguments base_args = {};
6874	base_args.img = img;
6875	base_args.imgtype = &imgtype;
6876	base_args.is_fetch = fetch != `0`;
6877	base_args.is_gather = gather != `0`;
6878	base_args.is_proj = proj != `0`;
6879
6880	string expr;
6881	TextureFunctionNameArguments name_args = {};
6882
6883	name_args.base = base_args;
6884	name_args.has_array_offsets = coffsets != `0`;
6885	name_args.has_offset = coffset != `0` \|\| offset != `0`;
6886	name_args.has_grad = grad_x != `0` \|\| grad_y != `0`;
6887	name_args.has_dref = dref != `0`;
6888	name_args.is_sparse_feedback = sparse;
6889	name_args.has_min_lod = minlod != `0`;
6890	name_args.lod = lod;
6891	expr += to_function_name(name_args);
6892	expr += "(";
6893
6894	uint32_t sparse_texel_id = `0`;
6895	if (sparse)
6896	sparse_texel_id = get_sparse_feedback_texel_id(ops[`1`]);
6897
6898	TextureFunctionArguments args = {};
6899	args.base = base_args;
6900	args.coord = coord;
6901	args.coord_components = coord_components;
6902	args.dref = dref;
6903	args.grad_x = grad_x;
6904	args.grad_y = grad_y;
6905	args.lod = lod;
6906	args.coffset = coffset;
6907	args.offset = offset;
6908	args.bias = bias;
6909	args.component = comp;
6910	args.sample = sample;
6911	args.sparse_texel = sparse_texel_id;
6912	args.min_lod = minlod;
6913	args.nonuniform_expression = nonuniform_expression;
6914	expr += to_function_args(args, forward);
6915	expr += ")";
6916
6917	// texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here.
6918	if (is_legacy() && is_depth_image(imgtype, img))
6919	expr += ".r";
6920
6921	// Sampling from a texture which was deduced to be a depth image, might actually return 1 component here.
6922	// Remap back to 4 components as sampling opcodes expect.
6923	if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op))
6924	{
6925	bool image_is_depth = false;
6926	const auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
6927	VariableID image_id = combined ? combined->image : img;
6928
6929	if (combined && is_depth_image(imgtype, combined->image))
6930	image_is_depth = true;
6931	else if (is_depth_image(imgtype, img))
6932	image_is_depth = true;
6933
6934	// We must also check the backing variable for the image.
6935	// We might have loaded an OpImage, and used that handle for two different purposes.
6936	// Once with comparison, once without.
6937	auto *image_variable = maybe_get_backing_variable(image_id);
6938	if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self))
6939	image_is_depth = true;
6940
6941	if (image_is_depth)
6942	expr = remap_swizzle(result_type, `1`, expr);
6943	}
6944
6945	if (!sparse && !backend.support_small_type_sampling_result && result_type.width < `32`)
6946	{
6947	// Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically.
6948	// Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision.
6949	expr = join(type_to_glsl_constructor(result_type), "(", expr, ")");
6950	}
6951
6952	// Deals with reads from MSL. We might need to downconvert to fewer components.
6953	if (op == OpImageRead)
6954	expr = remap_swizzle(result_type, `4`, expr);
6955
6956	return expr;
6957	}
6958
6959	bool CompilerGLSL::expression_is_constant_null(uint32_t id) const
6960	{
6961	auto *c = maybe_get<SPIRConstant>(id);
6962	if (!c)
6963	return false;
6964	return c->constant_is_null();
6965	}
6966
6967	bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr)
6968	{
6969	auto &type = expression_type(ptr);
6970	if (type.array.empty())
6971	return false;
6972
6973	if (!backend.array_is_value_type)
6974	return true;
6975
6976	auto *var = maybe_get_backing_variable(ptr);
6977	if (!var)
6978	return false;
6979
6980	auto &backed_type = get<SPIRType>(var->basetype);
6981	return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct &&
6982	has_member_decoration(backed_type.self, `0`, DecorationOffset);
6983	}
6984
6985	// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
6986	// For some subclasses, the function is a method on the specified image.
6987	string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args)
6988	{
6989	if (args.has_min_lod)
6990	{
6991	if (options.es)
6992	SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL.");
6993	require_extension_internal("GL_ARB_sparse_texture_clamp");
6994	}
6995
6996	string fname;
6997	auto &imgtype = *args.base.imgtype;
6998	VariableID tex = args.base.img;
6999
7000	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7001	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
7002	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7003	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7004	bool workaround_lod_array_shadow_as_grad = false;
7005	if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) \|\| imgtype.image.dim == DimCube) &&
7006	is_depth_image(imgtype, tex) && args.lod)
7007	{
7008	if (!expression_is_constant_null(args.lod))
7009	{
7010	SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be "
7011	"expressed in GLSL.");
7012	}
7013	workaround_lod_array_shadow_as_grad = true;
7014	}
7015
7016	if (args.is_sparse_feedback)
7017	fname += "sparse";
7018
7019	if (args.base.is_fetch)
7020	fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch";
7021	else
7022	{
7023	fname += args.is_sparse_feedback ? "Texture" : "texture";
7024
7025	if (args.base.is_gather)
7026	fname += "Gather";
7027	if (args.has_array_offsets)
7028	fname += "Offsets";
7029	if (args.base.is_proj)
7030	fname += "Proj";
7031	if (args.has_grad \|\| workaround_lod_array_shadow_as_grad)
7032	fname += "Grad";
7033	if (args.lod != `0` && !workaround_lod_array_shadow_as_grad)
7034	fname += "Lod";
7035	}
7036
7037	if (args.has_offset)
7038	fname += "Offset";
7039
7040	if (args.has_min_lod)
7041	fname += "Clamp";
7042
7043	if (args.is_sparse_feedback \|\| args.has_min_lod)
7044	fname += "ARB";
7045
7046	return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname;
7047	}
7048
7049	std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id)
7050	{
7051	auto *var = maybe_get_backing_variable(id);
7052
7053	// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL.
7054	// In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions.
7055	if (var)
7056	{
7057	auto &type = get<SPIRType>(var->basetype);
7058	if (type.basetype == SPIRType::Image && type.image.sampled == `1` && type.image.dim != DimBuffer)
7059	{
7060	if (options.vulkan_semantics)
7061	{
7062	if (dummy_sampler_id)
7063	{
7064	// Don't need to consider Shadow state since the dummy sampler is always non-shadow.
7065	auto sampled_type = type;
7066	sampled_type.basetype = SPIRType::SampledImage;
7067	return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ",
7068	to_expression(dummy_sampler_id), ")");
7069	}
7070	else
7071	{
7072	// Newer glslang supports this extension to deal with texture2D as argument to texture functions.
7073	require_extension_internal("GL_EXT_samplerless_texture_functions");
7074	}
7075	}
7076	else
7077	{
7078	if (!dummy_sampler_id)
7079	SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was "
7080	"build_dummy_sampler_for_combined_images() called?");
7081
7082	return to_combined_image_sampler(id, dummy_sampler_id);
7083	}
7084	}
7085	}
7086
7087	return to_non_uniform_aware_expression(id);
7088	}
7089
7090	// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
7091	string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward)
7092	{
7093	VariableID img = args.base.img;
7094	auto &imgtype = *args.base.imgtype;
7095
7096	string farg_str;
7097	if (args.base.is_fetch)
7098	farg_str = convert_separate_image_to_expression(img);
7099	else
7100	farg_str = to_non_uniform_aware_expression(img);
7101
7102	if (args.nonuniform_expression && farg_str.find_first_of(`'['`) != string::npos)
7103	{
7104	// Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way.
7105	farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")");
7106	}
7107
7108	bool swizz_func = backend.swizzle_is_function;
7109	auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
7110	if (comps == in_comps)
7111	return "";
7112
7113	switch (comps)
7114	{
7115	case `1`:
7116	return ".x";
7117	case `2`:
7118	return swizz_func ? ".xy()" : ".xy";
7119	case `3`:
7120	return swizz_func ? ".xyz()" : ".xyz";
7121	default:
7122	return "";
7123	}
7124	};
7125
7126	bool forward = should_forward(args.coord);
7127
7128	// The IR can give us more components than we need, so chop them off as needed.
7129	auto swizzle_expr = swizzle (args.coord_components, expression_type(args.coord).vecsize);
7130	// Only enclose the UV expression if needed.
7131	auto coord_expr =
7132	(*swizzle_expr == `'\0'`) ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr);
7133
7134	// texelFetch only takes int, not uint.
7135	auto &coord_type = expression_type(args.coord);
7136	if (coord_type.basetype == SPIRType::UInt)
7137	{
7138	auto expected_type = coord_type;
7139	expected_type.vecsize = args.coord_components;
7140	expected_type.basetype = SPIRType::Int;
7141	coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
7142	}
7143
7144	// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
7145	// To emulate this, we will have to use textureGrad with a constant gradient of 0.
7146	// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
7147	// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
7148	bool workaround_lod_array_shadow_as_grad =
7149	((imgtype.image.arrayed && imgtype.image.dim == Dim2D) \|\| imgtype.image.dim == DimCube) &&
7150	is_depth_image(imgtype, img) && args.lod != `0`;
7151
7152	if (args.dref)
7153	{
7154	forward = forward && should_forward(args.dref);
7155
7156	// SPIR-V splits dref and coordinate.
7157	if (args.base.is_gather \|\|
7158	args.coord_components == `4`) // GLSL also splits the arguments in two. Same for textureGather.
7159	{
7160	farg_str += ", ";
7161	farg_str += to_expression(args.coord);
7162	farg_str += ", ";
7163	farg_str += to_expression(args.dref);
7164	}
7165	else if (args.base.is_proj)
7166	{
7167	// Have to reshuffle so we get vec4(coord, dref, proj), special case.
7168	// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
7169	// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
7170	farg_str += ", vec4(";
7171
7172	if (imgtype.image.dim == Dim1D)
7173	{
7174	// Could reuse coord_expr, but we will mess up the temporary usage checking.
7175	farg_str += to_enclosed_expression(args.coord) + ".x";
7176	farg_str += ", ";
7177	farg_str += "0.0, ";
7178	farg_str += to_expression(args.dref);
7179	farg_str += ", ";
7180	farg_str += to_enclosed_expression(args.coord) + ".y)";
7181	}
7182	else if (imgtype.image.dim == Dim2D)
7183	{
7184	// Could reuse coord_expr, but we will mess up the temporary usage checking.
7185	farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy");
7186	farg_str += ", ";
7187	farg_str += to_expression(args.dref);
7188	farg_str += ", ";
7189	farg_str += to_enclosed_expression(args.coord) + ".z)";
7190	}
7191	else
7192	SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
7193	}
7194	else
7195	{
7196	// Create a composite which merges coord/dref into a single vector.
7197	auto type = expression_type(args.coord);
7198	type.vecsize = args.coord_components + `1`;
7199	farg_str += ", ";
7200	farg_str += type_to_glsl_constructor(type);
7201	farg_str += "(";
7202	farg_str += coord_expr;
7203	farg_str += ", ";
7204	farg_str += to_expression(args.dref);
7205	farg_str += ")";
7206	}
7207	}
7208	else
7209	{
7210	farg_str += ", ";
7211	farg_str += coord_expr;
7212	}
7213
7214	if (args.grad_x \|\| args.grad_y)
7215	{
7216	forward = forward && should_forward(args.grad_x);
7217	forward = forward && should_forward(args.grad_y);
7218	farg_str += ", ";
7219	farg_str += to_expression(args.grad_x);
7220	farg_str += ", ";
7221	farg_str += to_expression(args.grad_y);
7222	}
7223
7224	if (args.lod)
7225	{
7226	if (workaround_lod_array_shadow_as_grad)
7227	{
7228	// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
7229	// Implementing this as plain texture() is not safe on some implementations.
7230	if (imgtype.image.dim == Dim2D)
7231	farg_str += ", vec2(0.0), vec2(0.0)";
7232	else if (imgtype.image.dim == DimCube)
7233	farg_str += ", vec3(0.0), vec3(0.0)";
7234	}
7235	else
7236	{
7237	forward = forward && should_forward(args.lod);
7238	farg_str += ", ";
7239
7240	auto &lod_expr_type = expression_type(args.lod);
7241
7242	// Lod expression for TexelFetch in GLSL must be int, and only int.
7243	if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms &&
7244	lod_expr_type.basetype != SPIRType::Int)
7245	{
7246	farg_str += join("int(", to_expression(args.lod), ")");
7247	}
7248	else
7249	{
7250	farg_str += to_expression(args.lod);
7251	}
7252	}
7253	}
7254	else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
7255	{
7256	// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
7257	farg_str += ", 0";
7258	}
7259
7260	if (args.coffset)
7261	{
7262	forward = forward && should_forward(args.coffset);
7263	farg_str += ", ";
7264	farg_str += to_expression(args.coffset);
7265	}
7266	else if (args.offset)
7267	{
7268	forward = forward && should_forward(args.offset);
7269	farg_str += ", ";
7270	farg_str += to_expression(args.offset);
7271	}
7272
7273	if (args.sample)
7274	{
7275	farg_str += ", ";
7276	farg_str += to_expression(args.sample);
7277	}
7278
7279	if (args.min_lod)
7280	{
7281	farg_str += ", ";
7282	farg_str += to_expression(args.min_lod);
7283	}
7284
7285	if (args.sparse_texel)
7286	{
7287	// Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments.
7288	farg_str += ", ";
7289	farg_str += to_expression(args.sparse_texel);
7290	}
7291
7292	if (args.bias)
7293	{
7294	forward = forward && should_forward(args.bias);
7295	farg_str += ", ";
7296	farg_str += to_expression(args.bias);
7297	}
7298
7299	if (args.component && !expression_is_constant_null(args.component))
7300	{
7301	forward = forward && should_forward(args.component);
7302	farg_str += ", ";
7303	auto &component_type = expression_type(args.component);
7304	if (component_type.basetype == SPIRType::Int)
7305	farg_str += to_expression(args.component);
7306	else
7307	farg_str += join("int(", to_expression(args.component), ")");
7308	}
7309
7310	*p_forward = forward;
7311
7312	return farg_str;
7313	}
7314
7315	void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length)
7316	{
7317	auto op = static_cast<GLSLstd450>(eop);
7318
7319	if (is_legacy() && is_unsigned_glsl_opcode(op))
7320	SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
7321
7322	// If we need to do implicit bitcasts, make sure we do it with the correct type.
7323	uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length);
7324	auto int_type = to_signed_basetype(integer_width);
7325	auto uint_type = to_unsigned_basetype(integer_width);
7326
7327	switch (op)
7328	{
7329	// FP fiddling
7330	case GLSLstd450Round:
7331	if (!is_legacy())
7332	emit_unary_func_op(result_type, id, args[`0`], "round");
7333	else
7334	{
7335	auto op0 = to_enclosed_expression(args[`0`]);
7336	auto &op0_type = expression_type(args[`0`]);
7337	auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))");
7338	bool forward = should_forward(args[`0`]);
7339	emit_op(result_type, id, expr, forward);
7340	inherit_expression_dependencies(id, args[`0`]);
7341	}
7342	break;
7343
7344	case GLSLstd450RoundEven:
7345	if (!is_legacy())
7346	emit_unary_func_op(result_type, id, args[`0`], "roundEven");
7347	else if (!options.es)
7348	{
7349	// This extension provides round() with round-to-even semantics.
7350	require_extension_internal("GL_EXT_gpu_shader4");
7351	emit_unary_func_op(result_type, id, args[`0`], "round");
7352	}
7353	else
7354	SPIRV_CROSS_THROW("roundEven supported only in ESSL 300.");
7355	break;
7356
7357	case GLSLstd450Trunc:
7358	emit_unary_func_op(result_type, id, args[`0`], "trunc");
7359	break;
7360	case GLSLstd450SAbs:
7361	emit_unary_func_op_cast(result_type, id, args[`0`], "abs", int_type, int_type);
7362	break;
7363	case GLSLstd450FAbs:
7364	emit_unary_func_op(result_type, id, args[`0`], "abs");
7365	break;
7366	case GLSLstd450SSign:
7367	emit_unary_func_op_cast(result_type, id, args[`0`], "sign", int_type, int_type);
7368	break;
7369	case GLSLstd450FSign:
7370	emit_unary_func_op(result_type, id, args[`0`], "sign");
7371	break;
7372	case GLSLstd450Floor:
7373	emit_unary_func_op(result_type, id, args[`0`], "floor");
7374	break;
7375	case GLSLstd450Ceil:
7376	emit_unary_func_op(result_type, id, args[`0`], "ceil");
7377	break;
7378	case GLSLstd450Fract:
7379	emit_unary_func_op(result_type, id, args[`0`], "fract");
7380	break;
7381	case GLSLstd450Radians:
7382	emit_unary_func_op(result_type, id, args[`0`], "radians");
7383	break;
7384	case GLSLstd450Degrees:
7385	emit_unary_func_op(result_type, id, args[`0`], "degrees");
7386	break;
7387	case GLSLstd450Fma:
7388	if ((!options.es && options.version < `400`) \|\| (options.es && options.version < `320`))
7389	{
7390	auto expr = join(to_enclosed_expression(args[`0`]), " * ", to_enclosed_expression(args[`1`]), " + ",
7391	to_enclosed_expression(args[`2`]));
7392
7393	emit_op(result_type, id, expr,
7394	should_forward(args[`0`]) && should_forward(args[`1`]) && should_forward(args[`2`]));
7395	for (uint32_t i = `0`; i < `3`; i++)
7396	inherit_expression_dependencies(id, args[i]);
7397	}
7398	else
7399	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "fma");
7400	break;
7401	case GLSLstd450Modf:
7402	register_call_out_argument(args[`1`]);
7403	forced_temporaries.insert(id);
7404	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "modf");
7405	break;
7406
7407	case GLSLstd450ModfStruct:
7408	{
7409	auto &type = get<SPIRType>(result_type);
7410	emit_uninitialized_temporary_expression(result_type, id);
7411	statement(to_expression(id), ".", to_member_name(type, `0`), " = ", "modf(", to_expression(args[`0`]), ", ",
7412	to_expression(id), ".", to_member_name(type, `1`), ");");
7413	break;
7414	}
7415
7416	// Minmax
7417	case GLSLstd450UMin:
7418	emit_binary_func_op_cast(result_type, id, args[`0`], args[`1`], "min", uint_type, false);
7419	break;
7420
7421	case GLSLstd450SMin:
7422	emit_binary_func_op_cast(result_type, id, args[`0`], args[`1`], "min", int_type, false);
7423	break;
7424
7425	case GLSLstd450FMin:
7426	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "min");
7427	break;
7428
7429	case GLSLstd450FMax:
7430	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "max");
7431	break;
7432
7433	case GLSLstd450UMax:
7434	emit_binary_func_op_cast(result_type, id, args[`0`], args[`1`], "max", uint_type, false);
7435	break;
7436
7437	case GLSLstd450SMax:
7438	emit_binary_func_op_cast(result_type, id, args[`0`], args[`1`], "max", int_type, false);
7439	break;
7440
7441	case GLSLstd450FClamp:
7442	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "clamp");
7443	break;
7444
7445	case GLSLstd450UClamp:
7446	emit_trinary_func_op_cast(result_type, id, args[`0`], args[`1`], args[`2`], "clamp", uint_type);
7447	break;
7448
7449	case GLSLstd450SClamp:
7450	emit_trinary_func_op_cast(result_type, id, args[`0`], args[`1`], args[`2`], "clamp", int_type);
7451	break;
7452
7453	// Trig
7454	case GLSLstd450Sin:
7455	emit_unary_func_op(result_type, id, args[`0`], "sin");
7456	break;
7457	case GLSLstd450Cos:
7458	emit_unary_func_op(result_type, id, args[`0`], "cos");
7459	break;
7460	case GLSLstd450Tan:
7461	emit_unary_func_op(result_type, id, args[`0`], "tan");
7462	break;
7463	case GLSLstd450Asin:
7464	emit_unary_func_op(result_type, id, args[`0`], "asin");
7465	break;
7466	case GLSLstd450Acos:
7467	emit_unary_func_op(result_type, id, args[`0`], "acos");
7468	break;
7469	case GLSLstd450Atan:
7470	emit_unary_func_op(result_type, id, args[`0`], "atan");
7471	break;
7472	case GLSLstd450Sinh:
7473	emit_unary_func_op(result_type, id, args[`0`], "sinh");
7474	break;
7475	case GLSLstd450Cosh:
7476	emit_unary_func_op(result_type, id, args[`0`], "cosh");
7477	break;
7478	case GLSLstd450Tanh:
7479	emit_unary_func_op(result_type, id, args[`0`], "tanh");
7480	break;
7481	case GLSLstd450Asinh:
7482	emit_unary_func_op(result_type, id, args[`0`], "asinh");
7483	break;
7484	case GLSLstd450Acosh:
7485	emit_unary_func_op(result_type, id, args[`0`], "acosh");
7486	break;
7487	case GLSLstd450Atanh:
7488	emit_unary_func_op(result_type, id, args[`0`], "atanh");
7489	break;
7490	case GLSLstd450Atan2:
7491	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "atan");
7492	break;
7493
7494	// Exponentials
7495	case GLSLstd450Pow:
7496	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "pow");
7497	break;
7498	case GLSLstd450Exp:
7499	emit_unary_func_op(result_type, id, args[`0`], "exp");
7500	break;
7501	case GLSLstd450Log:
7502	emit_unary_func_op(result_type, id, args[`0`], "log");
7503	break;
7504	case GLSLstd450Exp2:
7505	emit_unary_func_op(result_type, id, args[`0`], "exp2");
7506	break;
7507	case GLSLstd450Log2:
7508	emit_unary_func_op(result_type, id, args[`0`], "log2");
7509	break;
7510	case GLSLstd450Sqrt:
7511	emit_unary_func_op(result_type, id, args[`0`], "sqrt");
7512	break;
7513	case GLSLstd450InverseSqrt:
7514	emit_unary_func_op(result_type, id, args[`0`], "inversesqrt");
7515	break;
7516
7517	// Matrix math
7518	case GLSLstd450Determinant:
7519	emit_unary_func_op(result_type, id, args[`0`], "determinant");
7520	break;
7521	case GLSLstd450MatrixInverse:
7522	emit_unary_func_op(result_type, id, args[`0`], "inverse");
7523	break;
7524
7525	// Lerping
7526	case GLSLstd450FMix:
7527	case GLSLstd450IMix:
7528	{
7529	emit_mix_op(result_type, id, args[`0`], args[`1`], args[`2`]);
7530	break;
7531	}
7532	case GLSLstd450Step:
7533	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "step");
7534	break;
7535	case GLSLstd450SmoothStep:
7536	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "smoothstep");
7537	break;
7538
7539	// Packing
7540	case GLSLstd450Frexp:
7541	register_call_out_argument(args[`1`]);
7542	forced_temporaries.insert(id);
7543	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "frexp");
7544	break;
7545
7546	case GLSLstd450FrexpStruct:
7547	{
7548	auto &type = get<SPIRType>(result_type);
7549	emit_uninitialized_temporary_expression(result_type, id);
7550	statement(to_expression(id), ".", to_member_name(type, `0`), " = ", "frexp(", to_expression(args[`0`]), ", ",
7551	to_expression(id), ".", to_member_name(type, `1`), ");");
7552	break;
7553	}
7554
7555	case GLSLstd450Ldexp:
7556	{
7557	bool forward = should_forward(args[`0`]) && should_forward(args[`1`]);
7558
7559	auto op0 = to_unpacked_expression(args[`0`]);
7560	auto op1 = to_unpacked_expression(args[`1`]);
7561	auto &op1_type = expression_type(args[`1`]);
7562	if (op1_type.basetype != SPIRType::Int)
7563	{
7564	// Need a value cast here.
7565	auto target_type = op1_type;
7566	target_type.basetype = SPIRType::Int;
7567	op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")");
7568	}
7569
7570	auto expr = join("ldexp(", op0, ", ", op1, ")");
7571
7572	emit_op(result_type, id, expr, forward);
7573	inherit_expression_dependencies(id, args[`0`]);
7574	inherit_expression_dependencies(id, args[`1`]);
7575	break;
7576	}
7577
7578	case GLSLstd450PackSnorm4x8:
7579	emit_unary_func_op(result_type, id, args[`0`], "packSnorm4x8");
7580	break;
7581	case GLSLstd450PackUnorm4x8:
7582	emit_unary_func_op(result_type, id, args[`0`], "packUnorm4x8");
7583	break;
7584	case GLSLstd450PackSnorm2x16:
7585	emit_unary_func_op(result_type, id, args[`0`], "packSnorm2x16");
7586	break;
7587	case GLSLstd450PackUnorm2x16:
7588	emit_unary_func_op(result_type, id, args[`0`], "packUnorm2x16");
7589	break;
7590	case GLSLstd450PackHalf2x16:
7591	emit_unary_func_op(result_type, id, args[`0`], "packHalf2x16");
7592	break;
7593	case GLSLstd450UnpackSnorm4x8:
7594	emit_unary_func_op(result_type, id, args[`0`], "unpackSnorm4x8");
7595	break;
7596	case GLSLstd450UnpackUnorm4x8:
7597	emit_unary_func_op(result_type, id, args[`0`], "unpackUnorm4x8");
7598	break;
7599	case GLSLstd450UnpackSnorm2x16:
7600	emit_unary_func_op(result_type, id, args[`0`], "unpackSnorm2x16");
7601	break;
7602	case GLSLstd450UnpackUnorm2x16:
7603	emit_unary_func_op(result_type, id, args[`0`], "unpackUnorm2x16");
7604	break;
7605	case GLSLstd450UnpackHalf2x16:
7606	emit_unary_func_op(result_type, id, args[`0`], "unpackHalf2x16");
7607	break;
7608
7609	case GLSLstd450PackDouble2x32:
7610	emit_unary_func_op(result_type, id, args[`0`], "packDouble2x32");
7611	break;
7612	case GLSLstd450UnpackDouble2x32:
7613	emit_unary_func_op(result_type, id, args[`0`], "unpackDouble2x32");
7614	break;
7615
7616	// Vector math
7617	case GLSLstd450Length:
7618	emit_unary_func_op(result_type, id, args[`0`], "length");
7619	break;
7620	case GLSLstd450Distance:
7621	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "distance");
7622	break;
7623	case GLSLstd450Cross:
7624	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "cross");
7625	break;
7626	case GLSLstd450Normalize:
7627	emit_unary_func_op(result_type, id, args[`0`], "normalize");
7628	break;
7629	case GLSLstd450FaceForward:
7630	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "faceforward");
7631	break;
7632	case GLSLstd450Reflect:
7633	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "reflect");
7634	break;
7635	case GLSLstd450Refract:
7636	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "refract");
7637	break;
7638
7639	// Bit-fiddling
7640	case GLSLstd450FindILsb:
7641	// findLSB always returns int.
7642	emit_unary_func_op_cast(result_type, id, args[`0`], "findLSB", expression_type(args[`0`]).basetype, int_type);
7643	break;
7644
7645	case GLSLstd450FindSMsb:
7646	emit_unary_func_op_cast(result_type, id, args[`0`], "findMSB", int_type, int_type);
7647	break;
7648
7649	case GLSLstd450FindUMsb:
7650	emit_unary_func_op_cast(result_type, id, args[`0`], "findMSB", uint_type,
7651	int_type); // findMSB always returns int.
7652	break;
7653
7654	// Multisampled varying
7655	case GLSLstd450InterpolateAtCentroid:
7656	emit_unary_func_op(result_type, id, args[`0`], "interpolateAtCentroid");
7657	break;
7658	case GLSLstd450InterpolateAtSample:
7659	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "interpolateAtSample");
7660	break;
7661	case GLSLstd450InterpolateAtOffset:
7662	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "interpolateAtOffset");
7663	break;
7664
7665	case GLSLstd450NMin:
7666	case GLSLstd450NMax:
7667	{
7668	emit_nminmax_op(result_type, id, args[`0`], args[`1`], op);
7669	break;
7670	}
7671
7672	case GLSLstd450NClamp:
7673	{
7674	// Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op.
7675	// IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags.
7676	uint32_t &max_id = extra_sub_expressions [id \| EXTRA_SUB_EXPRESSION_TYPE_AUX];
7677	if (!max_id)
7678	max_id = ir.increase_bound_by(`1`);
7679
7680	// Inherit precision qualifiers.
7681	ir.meta [max_id] = ir.meta [id];
7682
7683	emit_nminmax_op(result_type, max_id, args[`0`], args[`1`], GLSLstd450NMax);
7684	emit_nminmax_op(result_type, id, max_id, args[`2`], GLSLstd450NMin);
7685	break;
7686	}
7687
7688	default:
7689	statement("// unimplemented GLSL op ", eop);
7690	break;
7691	}
7692	}
7693
7694	void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op)
7695	{
7696	// Need to emulate this call.
7697	uint32_t &ids = extra_sub_expressions [id];
7698	if (!ids)
7699	{
7700	ids = ir.increase_bound_by(`5`);
7701	auto btype = get<SPIRType>(result_type);
7702	btype.basetype = SPIRType::Boolean;
7703	set<SPIRType>(ids, btype);
7704	}
7705
7706	uint32_t btype_id = ids + `0`;
7707	uint32_t left_nan_id = ids + `1`;
7708	uint32_t right_nan_id = ids + `2`;
7709	uint32_t tmp_id = ids + `3`;
7710	uint32_t mixed_first_id = ids + `4`;
7711
7712	// Inherit precision qualifiers.
7713	ir.meta [tmp_id] = ir.meta [id];
7714	ir.meta [mixed_first_id] = ir.meta [id];
7715
7716	emit_unary_func_op(btype_id, left_nan_id, op0, "isnan");
7717	emit_unary_func_op(btype_id, right_nan_id, op1, "isnan");
7718	emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max");
7719	emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id);
7720	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
7721	}
7722
7723	void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7724	uint32_t)
7725	{
7726	require_extension_internal("GL_AMD_shader_ballot");
7727
7728	enum AMDShaderBallot
7729	{
7730	SwizzleInvocationsAMD = `1`,
7731	SwizzleInvocationsMaskedAMD = `2`,
7732	WriteInvocationAMD = `3`,
7733	MbcntAMD = `4`
7734	};
7735
7736	auto op = static_cast<AMDShaderBallot>(eop);
7737
7738	switch (op)
7739	{
7740	case SwizzleInvocationsAMD:
7741	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "swizzleInvocationsAMD");
7742	register_control_dependent_expression(id);
7743	break;
7744
7745	case SwizzleInvocationsMaskedAMD:
7746	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "swizzleInvocationsMaskedAMD");
7747	register_control_dependent_expression(id);
7748	break;
7749
7750	case WriteInvocationAMD:
7751	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "writeInvocationAMD");
7752	register_control_dependent_expression(id);
7753	break;
7754
7755	case MbcntAMD:
7756	emit_unary_func_op(result_type, id, args[`0`], "mbcntAMD");
7757	register_control_dependent_expression(id);
7758	break;
7759
7760	default:
7761	statement("// unimplemented SPV AMD shader ballot op ", eop);
7762	break;
7763	}
7764	}
7765
7766	void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
7767	const uint32_t *args, uint32_t)
7768	{
7769	require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
7770
7771	enum AMDShaderExplicitVertexParameter
7772	{
7773	InterpolateAtVertexAMD = `1`
7774	};
7775
7776	auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
7777
7778	switch (op)
7779	{
7780	case InterpolateAtVertexAMD:
7781	emit_binary_func_op(result_type, id, args[`0`], args[`1`], "interpolateAtVertexAMD");
7782	break;
7783
7784	default:
7785	statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
7786	break;
7787	}
7788	}
7789
7790	void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
7791	const uint32_t *args, uint32_t)
7792	{
7793	require_extension_internal("GL_AMD_shader_trinary_minmax");
7794
7795	enum AMDShaderTrinaryMinMax
7796	{
7797	FMin3AMD = `1`,
7798	UMin3AMD = `2`,
7799	SMin3AMD = `3`,
7800	FMax3AMD = `4`,
7801	UMax3AMD = `5`,
7802	SMax3AMD = `6`,
7803	FMid3AMD = `7`,
7804	UMid3AMD = `8`,
7805	SMid3AMD = `9`
7806	};
7807
7808	auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
7809
7810	switch (op)
7811	{
7812	case FMin3AMD:
7813	case UMin3AMD:
7814	case SMin3AMD:
7815	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "min3");
7816	break;
7817
7818	case FMax3AMD:
7819	case UMax3AMD:
7820	case SMax3AMD:
7821	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "max3");
7822	break;
7823
7824	case FMid3AMD:
7825	case UMid3AMD:
7826	case SMid3AMD:
7827	emit_trinary_func_op(result_type, id, args[`0`], args[`1`], args[`2`], "mid3");
7828	break;
7829
7830	default:
7831	statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
7832	break;
7833	}
7834	}
7835
7836	void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
7837	uint32_t)
7838	{
7839	require_extension_internal("GL_AMD_gcn_shader");
7840
7841	enum AMDGCNShader
7842	{
7843	CubeFaceIndexAMD = `1`,
7844	CubeFaceCoordAMD = `2`,
7845	TimeAMD = `3`
7846	};
7847
7848	auto op = static_cast<AMDGCNShader>(eop);
7849
7850	switch (op)
7851	{
7852	case CubeFaceIndexAMD:
7853	emit_unary_func_op(result_type, id, args[`0`], "cubeFaceIndexAMD");
7854	break;
7855	case CubeFaceCoordAMD:
7856	emit_unary_func_op(result_type, id, args[`0`], "cubeFaceCoordAMD");
7857	break;
7858	case TimeAMD:
7859	{
7860	string expr = "timeAMD()";
7861	emit_op(result_type, id, expr, true);
7862	register_control_dependent_expression(id);
7863	break;
7864	}
7865
7866	default:
7867	statement("// unimplemented SPV AMD gcn shader op ", eop);
7868	break;
7869	}
7870	}
7871
7872	void CompilerGLSL::emit_subgroup_op(const Instruction &i)
7873	{
7874	const uint32_t *ops = stream(i);
7875	auto op = static_cast<Op>(i.op);
7876
7877	if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op))
7878	SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics.");
7879
7880	// If we need to do implicit bitcasts, make sure we do it with the correct type.
7881	uint32_t integer_width = get_integer_width_for_instruction(i);
7882	auto int_type = to_signed_basetype(integer_width);
7883	auto uint_type = to_unsigned_basetype(integer_width);
7884
7885	switch (op)
7886	{
7887	case OpGroupNonUniformElect:
7888	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect);
7889	break;
7890
7891	case OpGroupNonUniformBallotBitCount:
7892	{
7893	const GroupOperation operation = static_cast<GroupOperation>(ops[`3`]);
7894	if (operation == GroupOperationReduce)
7895	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount);
7896	else if (operation == GroupOperationInclusiveScan \|\| operation == GroupOperationExclusiveScan)
7897	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7898	}
7899	break;
7900
7901	case OpGroupNonUniformBallotBitExtract:
7902	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract);
7903	break;
7904
7905	case OpGroupNonUniformInverseBallot:
7906	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout);
7907	break;
7908
7909	case OpGroupNonUniformBallot:
7910	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot);
7911	break;
7912
7913	case OpGroupNonUniformBallotFindLSB:
7914	case OpGroupNonUniformBallotFindMSB:
7915	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB);
7916	break;
7917
7918	case OpGroupNonUniformBroadcast:
7919	case OpGroupNonUniformBroadcastFirst:
7920	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First);
7921	break;
7922
7923	case OpGroupNonUniformShuffle:
7924	case OpGroupNonUniformShuffleXor:
7925	require_extension_internal("GL_KHR_shader_subgroup_shuffle");
7926	break;
7927
7928	case OpGroupNonUniformShuffleUp:
7929	case OpGroupNonUniformShuffleDown:
7930	require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
7931	break;
7932
7933	case OpGroupNonUniformAll:
7934	case OpGroupNonUniformAny:
7935	case OpGroupNonUniformAllEqual:
7936	{
7937	const SPIRType &type = expression_type(ops[`3`]);
7938	if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == `1u`)
7939	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool);
7940	else
7941	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT);
7942	}
7943	break;
7944
7945	case OpGroupNonUniformFAdd:
7946	case OpGroupNonUniformFMul:
7947	case OpGroupNonUniformFMin:
7948	case OpGroupNonUniformFMax:
7949	case OpGroupNonUniformIAdd:
7950	case OpGroupNonUniformIMul:
7951	case OpGroupNonUniformSMin:
7952	case OpGroupNonUniformSMax:
7953	case OpGroupNonUniformUMin:
7954	case OpGroupNonUniformUMax:
7955	case OpGroupNonUniformBitwiseAnd:
7956	case OpGroupNonUniformBitwiseOr:
7957	case OpGroupNonUniformBitwiseXor:
7958	case OpGroupNonUniformLogicalAnd:
7959	case OpGroupNonUniformLogicalOr:
7960	case OpGroupNonUniformLogicalXor:
7961	{
7962	auto operation = static_cast<GroupOperation>(ops[`3`]);
7963	if (operation == GroupOperationClusteredReduce)
7964	{
7965	require_extension_internal("GL_KHR_shader_subgroup_clustered");
7966	}
7967	else if (operation == GroupOperationExclusiveScan \|\| operation == GroupOperationInclusiveScan \|\|
7968	operation == GroupOperationReduce)
7969	{
7970	require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
7971	}
7972	else
7973	SPIRV_CROSS_THROW("Invalid group operation.");
7974	break;
7975	}
7976
7977	case OpGroupNonUniformQuadSwap:
7978	case OpGroupNonUniformQuadBroadcast:
7979	require_extension_internal("GL_KHR_shader_subgroup_quad");
7980	break;
7981
7982	default:
7983	SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
7984	}
7985
7986	uint32_t result_type = ops[`0`];
7987	uint32_t id = ops[`1`];
7988
7989	auto scope = static_cast<Scope>(evaluate_constant_u32(ops[`2`]));
7990	if (scope != ScopeSubgroup)
7991	SPIRV_CROSS_THROW("Only subgroup scope is supported.");
7992
7993	switch (op)
7994	{
7995	case OpGroupNonUniformElect:
7996	emit_op(result_type, id, "subgroupElect()", true);
7997	break;
7998
7999	case OpGroupNonUniformBroadcast:
8000	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupBroadcast");
8001	break;
8002
8003	case OpGroupNonUniformBroadcastFirst:
8004	emit_unary_func_op(result_type, id, ops[`3`], "subgroupBroadcastFirst");
8005	break;
8006
8007	case OpGroupNonUniformBallot:
8008	emit_unary_func_op(result_type, id, ops[`3`], "subgroupBallot");
8009	break;
8010
8011	case OpGroupNonUniformInverseBallot:
8012	emit_unary_func_op(result_type, id, ops[`3`], "subgroupInverseBallot");
8013	break;
8014
8015	case OpGroupNonUniformBallotBitExtract:
8016	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupBallotBitExtract");
8017	break;
8018
8019	case OpGroupNonUniformBallotFindLSB:
8020	emit_unary_func_op(result_type, id, ops[`3`], "subgroupBallotFindLSB");
8021	break;
8022
8023	case OpGroupNonUniformBallotFindMSB:
8024	emit_unary_func_op(result_type, id, ops[`3`], "subgroupBallotFindMSB");
8025	break;
8026
8027	case OpGroupNonUniformBallotBitCount:
8028	{
8029	auto operation = static_cast<GroupOperation>(ops[`3`]);
8030	if (operation == GroupOperationReduce)
8031	emit_unary_func_op(result_type, id, ops[`4`], "subgroupBallotBitCount");
8032	else if (operation == GroupOperationInclusiveScan)
8033	emit_unary_func_op(result_type, id, ops[`4`], "subgroupBallotInclusiveBitCount");
8034	else if (operation == GroupOperationExclusiveScan)
8035	emit_unary_func_op(result_type, id, ops[`4`], "subgroupBallotExclusiveBitCount");
8036	else
8037	SPIRV_CROSS_THROW("Invalid BitCount operation.");
8038	break;
8039	}
8040
8041	case OpGroupNonUniformShuffle:
8042	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupShuffle");
8043	break;
8044
8045	case OpGroupNonUniformShuffleXor:
8046	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupShuffleXor");
8047	break;
8048
8049	case OpGroupNonUniformShuffleUp:
8050	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupShuffleUp");
8051	break;
8052
8053	case OpGroupNonUniformShuffleDown:
8054	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupShuffleDown");
8055	break;
8056
8057	case OpGroupNonUniformAll:
8058	emit_unary_func_op(result_type, id, ops[`3`], "subgroupAll");
8059	break;
8060
8061	case OpGroupNonUniformAny:
8062	emit_unary_func_op(result_type, id, ops[`3`], "subgroupAny");
8063	break;
8064
8065	case OpGroupNonUniformAllEqual:
8066	emit_unary_func_op(result_type, id, ops[`3`], "subgroupAllEqual");
8067	break;
8068
8069	// clang-format off
8070	#define GLSL_GROUP_OP(op, glsl_op) \
8071	case OpGroupNonUniform##op: \
8072	{ \
8073	auto operation = static_cast<GroupOperation>(ops[3]); \
8074	if (operation == GroupOperationReduce) \
8075	emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
8076	else if (operation == GroupOperationInclusiveScan) \
8077	emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
8078	else if (operation == GroupOperationExclusiveScan) \
8079	emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
8080	else if (operation == GroupOperationClusteredReduce) \
8081	emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
8082	else \
8083	SPIRV_CROSS_THROW("Invalid group operation."); \
8084	break; \
8085	}
8086
8087	#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \
8088	case OpGroupNonUniform##op: \
8089	{ \
8090	auto operation = static_cast<GroupOperation>(ops[3]); \
8091	if (operation == GroupOperationReduce) \
8092	emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \
8093	else if (operation == GroupOperationInclusiveScan) \
8094	emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \
8095	else if (operation == GroupOperationExclusiveScan) \
8096	emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \
8097	else if (operation == GroupOperationClusteredReduce) \
8098	emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \
8099	else \
8100	SPIRV_CROSS_THROW("Invalid group operation."); \
8101	break; \
8102	}
8103
8104	GLSL_GROUP_OP(FAdd, Add)
8105	GLSL_GROUP_OP(FMul, Mul)
8106	GLSL_GROUP_OP(FMin, Min)
8107	GLSL_GROUP_OP(FMax, Max)
8108	GLSL_GROUP_OP(IAdd, Add)
8109	GLSL_GROUP_OP(IMul, Mul)
8110	GLSL_GROUP_OP_CAST(SMin, Min, int_type)
8111	GLSL_GROUP_OP_CAST(SMax, Max, int_type)
8112	GLSL_GROUP_OP_CAST(UMin, Min, uint_type)
8113	GLSL_GROUP_OP_CAST(UMax, Max, uint_type)
8114	GLSL_GROUP_OP(BitwiseAnd, And)
8115	GLSL_GROUP_OP(BitwiseOr, Or)
8116	GLSL_GROUP_OP(BitwiseXor, Xor)
8117	GLSL_GROUP_OP(LogicalAnd, And)
8118	GLSL_GROUP_OP(LogicalOr, Or)
8119	GLSL_GROUP_OP(LogicalXor, Xor)
8120	#undef GLSL_GROUP_OP
8121	#undef GLSL_GROUP_OP_CAST
8122	// clang-format on
8123
8124	case OpGroupNonUniformQuadSwap:
8125	{
8126	uint32_t direction = evaluate_constant_u32(ops[`4`]);
8127	if (direction == `0`)
8128	emit_unary_func_op(result_type, id, ops[`3`], "subgroupQuadSwapHorizontal");
8129	else if (direction == `1`)
8130	emit_unary_func_op(result_type, id, ops[`3`], "subgroupQuadSwapVertical");
8131	else if (direction == `2`)
8132	emit_unary_func_op(result_type, id, ops[`3`], "subgroupQuadSwapDiagonal");
8133	else
8134	SPIRV_CROSS_THROW("Invalid quad swap direction.");
8135	break;
8136	}
8137
8138	case OpGroupNonUniformQuadBroadcast:
8139	{
8140	emit_binary_func_op(result_type, id, ops[`3`], ops[`4`], "subgroupQuadBroadcast");
8141	break;
8142	}
8143
8144	default:
8145	SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
8146	}
8147
8148	register_control_dependent_expression(id);
8149	}
8150
8151	string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
8152	{
8153	// OpBitcast can deal with pointers.
8154	if (out_type.pointer \|\| in_type.pointer)
8155	{
8156	if (out_type.vecsize == `2` \|\| in_type.vecsize == `2`)
8157	require_extension_internal("GL_EXT_buffer_reference_uvec2");
8158	return type_to_glsl(out_type);
8159	}
8160
8161	if (out_type.basetype == in_type.basetype)
8162	return "";
8163
8164	assert(out_type.basetype != SPIRType::Boolean);
8165	assert(in_type.basetype != SPIRType::Boolean);
8166
8167	bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type);
8168	bool same_size_cast = out_type.width == in_type.width;
8169
8170	// Trivial bitcast case, casts between integers.
8171	if (integral_cast && same_size_cast)
8172	return type_to_glsl(out_type);
8173
8174	// Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types).
8175	if (out_type.width == `8` && in_type.width >= `16` && integral_cast && in_type.vecsize == `1`)
8176	return "unpack8";
8177	else if (in_type.width == `8` && out_type.width == `16` && integral_cast && out_type.vecsize == `1`)
8178	return "pack16";
8179	else if (in_type.width == `8` && out_type.width == `32` && integral_cast && out_type.vecsize == `1`)
8180	return "pack32";
8181
8182	// Floating <-> Integer special casts. Just have to enumerate all cases. :(
8183	// 16-bit, 32-bit and 64-bit floats.
8184	if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
8185	{
8186	if (is_legacy_es())
8187	SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL.");
8188	else if (!options.es && options.version < `330`)
8189	require_extension_internal("GL_ARB_shader_bit_encoding");
8190	return "floatBitsToUint";
8191	}
8192	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
8193	{
8194	if (is_legacy_es())
8195	SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL.");
8196	else if (!options.es && options.version < `330`)
8197	require_extension_internal("GL_ARB_shader_bit_encoding");
8198	return "floatBitsToInt";
8199	}
8200	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
8201	{
8202	if (is_legacy_es())
8203	SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL.");
8204	else if (!options.es && options.version < `330`)
8205	require_extension_internal("GL_ARB_shader_bit_encoding");
8206	return "uintBitsToFloat";
8207	}
8208	else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
8209	{
8210	if (is_legacy_es())
8211	SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL.");
8212	else if (!options.es && options.version < `330`)
8213	require_extension_internal("GL_ARB_shader_bit_encoding");
8214	return "intBitsToFloat";
8215	}
8216
8217	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
8218	return "doubleBitsToInt64";
8219	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
8220	return "doubleBitsToUint64";
8221	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
8222	return "int64BitsToDouble";
8223	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
8224	return "uint64BitsToDouble";
8225	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half)
8226	return "float16BitsToInt16";
8227	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half)
8228	return "float16BitsToUint16";
8229	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short)
8230	return "int16BitsToFloat16";
8231	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort)
8232	return "uint16BitsToFloat16";
8233
8234	// And finally, some even more special purpose casts.
8235	if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == `2`)
8236	return "packUint2x32";
8237	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == `2`)
8238	return "unpackUint2x32";
8239	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == `1`)
8240	return "unpackFloat2x16";
8241	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == `2`)
8242	return "packFloat2x16";
8243	else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == `2`)
8244	return "packInt2x16";
8245	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == `1`)
8246	return "unpackInt2x16";
8247	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == `2`)
8248	return "packUint2x16";
8249	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == `1`)
8250	return "unpackUint2x16";
8251	else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == `4`)
8252	return "packInt4x16";
8253	else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == `1`)
8254	return "unpackInt4x16";
8255	else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == `4`)
8256	return "packUint4x16";
8257	else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == `1`)
8258	return "unpackUint4x16";
8259
8260	return "";
8261	}
8262
8263	string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
8264	{
8265	auto op = bitcast_glsl_op(result_type, expression_type(argument));
8266	if (op.empty())
8267	return to_enclosed_unpacked_expression(argument);
8268	else
8269	return join(op, "(", to_unpacked_expression(argument), ")");
8270	}
8271
8272	std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
8273	{
8274	auto expr = to_expression(arg);
8275	auto &src_type = expression_type(arg);
8276	if (src_type.basetype != target_type)
8277	{
8278	auto target = src_type;
8279	target.basetype = target_type;
8280	expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
8281	}
8282
8283	return expr;
8284	}
8285
8286	std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
8287	const std::string &expr)
8288	{
8289	if (target_type.basetype == expr_type)
8290	return expr;
8291
8292	auto src_type = target_type;
8293	src_type.basetype = expr_type;
8294	return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
8295	}
8296
8297	string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
8298	{
8299	switch (builtin)
8300	{
8301	case BuiltInPosition:
8302	return "gl_Position";
8303	case BuiltInPointSize:
8304	return "gl_PointSize";
8305	case BuiltInClipDistance:
8306	return "gl_ClipDistance";
8307	case BuiltInCullDistance:
8308	return "gl_CullDistance";
8309	case BuiltInVertexId:
8310	if (options.vulkan_semantics)
8311	SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created "
8312	"with GL semantics.");
8313	return "gl_VertexID";
8314	case BuiltInInstanceId:
8315	if (options.vulkan_semantics)
8316	{
8317	auto model = get_entry_point().model;
8318	switch (model)
8319	{
8320	case spv::ExecutionModelIntersectionKHR:
8321	case spv::ExecutionModelAnyHitKHR:
8322	case spv::ExecutionModelClosestHitKHR:
8323	// gl_InstanceID is allowed in these shaders.
8324	break;
8325
8326	default:
8327	SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was "
8328	"created with GL semantics.");
8329	}
8330	}
8331	if (!options.es && options.version < `140`)
8332	{
8333	require_extension_internal("GL_ARB_draw_instanced");
8334	}
8335	return "gl_InstanceID";
8336	case BuiltInVertexIndex:
8337	if (options.vulkan_semantics)
8338	return "gl_VertexIndex";
8339	else
8340	return "gl_VertexID"; // gl_VertexID already has the base offset applied.
8341	case BuiltInInstanceIndex:
8342	if (options.vulkan_semantics)
8343	return "gl_InstanceIndex";
8344
8345	if (!options.es && options.version < `140`)
8346	{
8347	require_extension_internal("GL_ARB_draw_instanced");
8348	}
8349
8350	if (options.vertex.support_nonzero_base_instance)
8351	{
8352	if (!options.vulkan_semantics)
8353	{
8354	// This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported.
8355	require_extension_internal("GL_ARB_shader_draw_parameters");
8356	}
8357	return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
8358	}
8359	else
8360	return "gl_InstanceID";
8361	case BuiltInPrimitiveId:
8362	if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry)
8363	return "gl_PrimitiveIDIn";
8364	else
8365	return "gl_PrimitiveID";
8366	case BuiltInInvocationId:
8367	return "gl_InvocationID";
8368	case BuiltInLayer:
8369	return "gl_Layer";
8370	case BuiltInViewportIndex:
8371	return "gl_ViewportIndex";
8372	case BuiltInTessLevelOuter:
8373	return "gl_TessLevelOuter";
8374	case BuiltInTessLevelInner:
8375	return "gl_TessLevelInner";
8376	case BuiltInTessCoord:
8377	return "gl_TessCoord";
8378	case BuiltInFragCoord:
8379	return "gl_FragCoord";
8380	case BuiltInPointCoord:
8381	return "gl_PointCoord";
8382	case BuiltInFrontFacing:
8383	return "gl_FrontFacing";
8384	case BuiltInFragDepth:
8385	return "gl_FragDepth";
8386	case BuiltInNumWorkgroups:
8387	return "gl_NumWorkGroups";
8388	case BuiltInWorkgroupSize:
8389	return "gl_WorkGroupSize";
8390	case BuiltInWorkgroupId:
8391	return "gl_WorkGroupID";
8392	case BuiltInLocalInvocationId:
8393	return "gl_LocalInvocationID";
8394	case BuiltInGlobalInvocationId:
8395	return "gl_GlobalInvocationID";
8396	case BuiltInLocalInvocationIndex:
8397	return "gl_LocalInvocationIndex";
8398	case BuiltInHelperInvocation:
8399	return "gl_HelperInvocation";
8400
8401	case BuiltInBaseVertex:
8402	if (options.es)
8403	SPIRV_CROSS_THROW("BaseVertex not supported in ES profile.");
8404
8405	if (options.vulkan_semantics)
8406	{
8407	if (options.version < `460`)
8408	{
8409	require_extension_internal("GL_ARB_shader_draw_parameters");
8410	return "gl_BaseVertexARB";
8411	}
8412	return "gl_BaseVertex";
8413	}
8414	// On regular GL, this is soft-enabled and we emit ifdefs in code.
8415	require_extension_internal("GL_ARB_shader_draw_parameters");
8416	return "SPIRV_Cross_BaseVertex";
8417
8418	case BuiltInBaseInstance:
8419	if (options.es)
8420	SPIRV_CROSS_THROW("BaseInstance not supported in ES profile.");
8421
8422	if (options.vulkan_semantics)
8423	{
8424	if (options.version < `460`)
8425	{
8426	require_extension_internal("GL_ARB_shader_draw_parameters");
8427	return "gl_BaseInstanceARB";
8428	}
8429	return "gl_BaseInstance";
8430	}
8431	// On regular GL, this is soft-enabled and we emit ifdefs in code.
8432	require_extension_internal("GL_ARB_shader_draw_parameters");
8433	return "SPIRV_Cross_BaseInstance";
8434
8435	case BuiltInDrawIndex:
8436	if (options.es)
8437	SPIRV_CROSS_THROW("DrawIndex not supported in ES profile.");
8438
8439	if (options.vulkan_semantics)
8440	{
8441	if (options.version < `460`)
8442	{
8443	require_extension_internal("GL_ARB_shader_draw_parameters");
8444	return "gl_DrawIDARB";
8445	}
8446	return "gl_DrawID";
8447	}
8448	// On regular GL, this is soft-enabled and we emit ifdefs in code.
8449	require_extension_internal("GL_ARB_shader_draw_parameters");
8450	return "gl_DrawIDARB";
8451
8452	case BuiltInSampleId:
8453	if (options.es && options.version < `320`)
8454	require_extension_internal("GL_OES_sample_variables");
8455	if (!options.es && options.version < `400`)
8456	SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
8457	return "gl_SampleID";
8458
8459	case BuiltInSampleMask:
8460	if (options.es && options.version < `320`)
8461	require_extension_internal("GL_OES_sample_variables");
8462	if (!options.es && options.version < `400`)
8463	SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
8464
8465	if (storage == StorageClassInput)
8466	return "gl_SampleMaskIn";
8467	else
8468	return "gl_SampleMask";
8469
8470	case BuiltInSamplePosition:
8471	if (options.es && options.version < `320`)
8472	require_extension_internal("GL_OES_sample_variables");
8473	if (!options.es && options.version < `400`)
8474	SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
8475	return "gl_SamplePosition";
8476
8477	case BuiltInViewIndex:
8478	if (options.vulkan_semantics)
8479	return "gl_ViewIndex";
8480	else
8481	return "gl_ViewID_OVR";
8482
8483	case BuiltInNumSubgroups:
8484	request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups);
8485	return "gl_NumSubgroups";
8486
8487	case BuiltInSubgroupId:
8488	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID);
8489	return "gl_SubgroupID";
8490
8491	case BuiltInSubgroupSize:
8492	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize);
8493	return "gl_SubgroupSize";
8494
8495	case BuiltInSubgroupLocalInvocationId:
8496	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID);
8497	return "gl_SubgroupInvocationID";
8498
8499	case BuiltInSubgroupEqMask:
8500	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8501	return "gl_SubgroupEqMask";
8502
8503	case BuiltInSubgroupGeMask:
8504	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8505	return "gl_SubgroupGeMask";
8506
8507	case BuiltInSubgroupGtMask:
8508	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8509	return "gl_SubgroupGtMask";
8510
8511	case BuiltInSubgroupLeMask:
8512	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8513	return "gl_SubgroupLeMask";
8514
8515	case BuiltInSubgroupLtMask:
8516	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask);
8517	return "gl_SubgroupLtMask";
8518
8519	case BuiltInLaunchIdKHR:
8520	return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV";
8521	case BuiltInLaunchSizeKHR:
8522	return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV";
8523	case BuiltInWorldRayOriginKHR:
8524	return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV";
8525	case BuiltInWorldRayDirectionKHR:
8526	return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV";
8527	case BuiltInObjectRayOriginKHR:
8528	return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV";
8529	case BuiltInObjectRayDirectionKHR:
8530	return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV";
8531	case BuiltInRayTminKHR:
8532	return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV";
8533	case BuiltInRayTmaxKHR:
8534	return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV";
8535	case BuiltInInstanceCustomIndexKHR:
8536	return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV";
8537	case BuiltInObjectToWorldKHR:
8538	return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV";
8539	case BuiltInWorldToObjectKHR:
8540	return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV";
8541	case BuiltInHitTNV:
8542	// gl_HitTEXT is an alias of RayTMax in KHR.
8543	return "gl_HitTNV";
8544	case BuiltInHitKindKHR:
8545	return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV";
8546	case BuiltInIncomingRayFlagsKHR:
8547	return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV";
8548
8549	case BuiltInBaryCoordNV:
8550	{
8551	if (options.es && options.version < `320`)
8552	SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320.");
8553	else if (!options.es && options.version < `450`)
8554	SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450.");
8555	require_extension_internal("GL_NV_fragment_shader_barycentric");
8556	return "gl_BaryCoordNV";
8557	}
8558
8559	case BuiltInBaryCoordNoPerspNV:
8560	{
8561	if (options.es && options.version < `320`)
8562	SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320.");
8563	else if (!options.es && options.version < `450`)
8564	SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450.");
8565	require_extension_internal("GL_NV_fragment_shader_barycentric");
8566	return "gl_BaryCoordNoPerspNV";
8567	}
8568
8569	case BuiltInFragStencilRefEXT:
8570	{
8571	if (!options.es)
8572	{
8573	require_extension_internal("GL_ARB_shader_stencil_export");
8574	return "gl_FragStencilRefARB";
8575	}
8576	else
8577	SPIRV_CROSS_THROW("Stencil export not supported in GLES.");
8578	}
8579
8580	case BuiltInPrimitiveShadingRateKHR:
8581	{
8582	if (!options.vulkan_semantics)
8583	SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL.");
8584	require_extension_internal("GL_EXT_fragment_shading_rate");
8585	return "gl_PrimitiveShadingRateEXT";
8586	}
8587
8588	case BuiltInShadingRateKHR:
8589	{
8590	if (!options.vulkan_semantics)
8591	SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL.");
8592	require_extension_internal("GL_EXT_fragment_shading_rate");
8593	return "gl_ShadingRateEXT";
8594	}
8595
8596	case BuiltInDeviceIndex:
8597	if (!options.vulkan_semantics)
8598	SPIRV_CROSS_THROW("Need Vulkan semantics for device group support.");
8599	require_extension_internal("GL_EXT_device_group");
8600	return "gl_DeviceIndex";
8601
8602	case BuiltInFullyCoveredEXT:
8603	if (!options.es)
8604	require_extension_internal("GL_NV_conservative_raster_underestimation");
8605	else
8606	SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation.");
8607	return "gl_FragFullyCoveredNV";
8608
8609	default:
8610	return join("gl_BuiltIn_", convert_to_string(builtin));
8611	}
8612	}
8613
8614	const char *CompilerGLSL::index_to_swizzle(uint32_t index)
8615	{
8616	switch (index)
8617	{
8618	case `0`:
8619	return "x";
8620	case `1`:
8621	return "y";
8622	case `2`:
8623	return "z";
8624	case `3`:
8625	return "w";
8626	default:
8627	return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec.
8628	}
8629	}
8630
8631	void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /base/, const SPIRType * /type/,
8632	AccessChainFlags flags, bool & /access_chain_is_arrayed/,
8633	uint32_t index)
8634	{
8635	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != `0`;
8636	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == `0`;
8637
8638	expr += "[";
8639
8640	if (index_is_literal)
8641	expr += convert_to_string(index);
8642	else
8643	expr += to_unpacked_expression(index, register_expression_read);
8644
8645	expr += "]";
8646	}
8647
8648	bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t)
8649	{
8650	return true;
8651	}
8652
8653	string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
8654	AccessChainFlags flags, AccessChainMeta *meta)
8655	{
8656	string expr;
8657
8658	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != `0`;
8659	bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != `0`;
8660	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != `0`;
8661	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != `0`;
8662	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == `0`;
8663	bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != `0`;
8664
8665	if (!chain_only)
8666	{
8667	// We handle transpose explicitly, so don't resolve that here.
8668	auto *e = maybe_get<SPIRExpression>(base);
8669	bool old_transpose = e && e->need_transpose;
8670	if (e)
8671	e->need_transpose = false;
8672	expr = to_enclosed_expression(base, register_expression_read);
8673	if (e)
8674	e->need_transpose = old_transpose;
8675	}
8676
8677	// Start traversing type hierarchy at the proper non-pointer types,
8678	// but keep type_id referencing the original pointer for use below.
8679	uint32_t type_id = expression_type_id(base);
8680
8681	if (!backend.native_pointers)
8682	{
8683	if (ptr_chain)
8684	SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain.");
8685
8686	// Wrapped buffer reference pointer types will need to poke into the internal "value" member before
8687	// continuing the access chain.
8688	if (should_dereference(base))
8689	{
8690	auto &type = get<SPIRType>(type_id);
8691	expr = dereference_expression(type, expr);
8692	}
8693	}
8694
8695	const auto *type = &get_pointee_type(type_id);
8696
8697	bool access_chain_is_arrayed = expr.find_first_of(`'['`) != string::npos;
8698	bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
8699	bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked);
8700	uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID);
8701	bool is_invariant = has_decoration(base, DecorationInvariant);
8702	bool pending_array_enclose = false;
8703	bool dimension_flatten = false;
8704
8705	const auto append_index = [&](uint32_t index, bool is_literal) {
8706	AccessChainFlags mod_flags = flags;
8707	if (!is_literal)
8708	mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
8709	access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
8710	};
8711
8712	for (uint32_t i = `0`; i < count; i++)
8713	{
8714	uint32_t index = indices[i];
8715
8716	bool is_literal = index_is_literal;
8717	if (is_literal && msb_is_id && (index >> `31u`) != `0u`)
8718	{
8719	is_literal = false;
8720	index &= `0x7fffffffu`;
8721	}
8722
8723	// Pointer chains
8724	if (ptr_chain && i == `0`)
8725	{
8726	// If we are flattening multidimensional arrays, only create opening bracket on first
8727	// array index.
8728	if (options.flatten_multidimensional_arrays)
8729	{
8730	dimension_flatten = type->array.size() >= `1`;
8731	pending_array_enclose = dimension_flatten;
8732	if (pending_array_enclose)
8733	expr += "[";
8734	}
8735
8736	if (options.flatten_multidimensional_arrays && dimension_flatten)
8737	{
8738	// If we are flattening multidimensional arrays, do manual stride computation.
8739	if (is_literal)
8740	expr += convert_to_string(index);
8741	else
8742	expr += to_enclosed_expression(index, register_expression_read);
8743
8744	for (auto j = uint32_t(type->array.size()); j; j--)
8745	{
8746	expr += " * ";
8747	expr += enclose_expression(to_array_size(*type, j - `1`));
8748	}
8749
8750	if (type->array.empty())
8751	pending_array_enclose = false;
8752	else
8753	expr += " + ";
8754
8755	if (!pending_array_enclose)
8756	expr += "]";
8757	}
8758	else
8759	{
8760	append_index (index, is_literal);
8761	}
8762
8763	if (type->basetype == SPIRType::ControlPointArray)
8764	{
8765	type_id = type->parent_type;
8766	type = &get<SPIRType>(type_id);
8767	}
8768
8769	access_chain_is_arrayed = true;
8770	}
8771	// Arrays
8772	else if (!type->array.empty())
8773	{
8774	// If we are flattening multidimensional arrays, only create opening bracket on first
8775	// array index.
8776	if (options.flatten_multidimensional_arrays && !pending_array_enclose)
8777	{
8778	dimension_flatten = type->array.size() > `1`;
8779	pending_array_enclose = dimension_flatten;
8780	if (pending_array_enclose)
8781	expr += "[";
8782	}
8783
8784	assert(type->parent_type);
8785
8786	auto *var = maybe_get<SPIRVariable>(base);
8787	if (backend.force_gl_in_out_block && i == `0` && var && is_builtin_variable(*var) &&
8788	!has_decoration(type->self, DecorationBlock))
8789	{
8790	// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
8791	// Normally, these variables live in blocks when compiled from GLSL,
8792	// but HLSL seems to just emit straight arrays here.
8793	// We must pretend this access goes through gl_in/gl_out arrays
8794	// to be able to access certain builtins as arrays.
8795	auto builtin = ir.meta [base].decoration.builtin_type;
8796	switch (builtin)
8797	{
8798	// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
8799	// case BuiltInClipDistance:
8800	case BuiltInPosition:
8801	case BuiltInPointSize:
8802	if (var->storage == StorageClassInput)
8803	expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr);
8804	else if (var->storage == StorageClassOutput)
8805	expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
8806	else
8807	append_index (index, is_literal);
8808	break;
8809
8810	default:
8811	append_index (index, is_literal);
8812	break;
8813	}
8814	}
8815	else if (options.flatten_multidimensional_arrays && dimension_flatten)
8816	{
8817	// If we are flattening multidimensional arrays, do manual stride computation.
8818	auto &parent_type = get<SPIRType>(type->parent_type);
8819
8820	if (is_literal)
8821	expr += convert_to_string(index);
8822	else
8823	expr += to_enclosed_expression(index, register_expression_read);
8824
8825	for (auto j = uint32_t(parent_type.array.size()); j; j--)
8826	{
8827	expr += " * ";
8828	expr += enclose_expression(to_array_size(parent_type, j - `1`));
8829	}
8830
8831	if (parent_type.array.empty())
8832	pending_array_enclose = false;
8833	else
8834	expr += " + ";
8835
8836	if (!pending_array_enclose)
8837	expr += "]";
8838	}
8839	// Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal.
8840	// By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
8841	else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
8842	{
8843	append_index (index, is_literal);
8844	}
8845
8846	type_id = type->parent_type;
8847	type = &get<SPIRType>(type_id);
8848
8849	access_chain_is_arrayed = true;
8850	}
8851	// For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping.
8852	// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
8853	else if (type->basetype == SPIRType::Struct)
8854	{
8855	if (!is_literal)
8856	index = evaluate_constant_u32(index);
8857
8858	if (index < uint32_t(type->member_type_index_redirection.size()))
8859	index = type->member_type_index_redirection [index];
8860
8861	if (index >= type->member_types.size())
8862	SPIRV_CROSS_THROW("Member index is out of bounds!");
8863
8864	BuiltIn builtin;
8865	if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base))
8866	{
8867	if (access_chain_is_arrayed)
8868	{
8869	expr += ".";
8870	expr += builtin_to_glsl(builtin, type->storage);
8871	}
8872	else
8873	expr = builtin_to_glsl(builtin, type->storage);
8874	}
8875	else
8876	{
8877	// If the member has a qualified name, use it as the entire chain
8878	string qual_mbr_name = get_member_qualified_name(type_id, index);
8879	if (!qual_mbr_name.empty())
8880	expr = qual_mbr_name;
8881	else if (flatten_member_reference)
8882	expr += join("_", to_member_name(*type, index));
8883	else
8884	expr += to_member_reference(base, *type, index, ptr_chain);
8885	}
8886
8887	if (has_member_decoration(type->self, index, DecorationInvariant))
8888	is_invariant = true;
8889
8890	is_packed = member_is_packed_physical_type(*type, index);
8891	if (member_is_remapped_physical_type(*type, index))
8892	physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID);
8893	else
8894	physical_type = `0`;
8895
8896	row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
8897	type = &get<SPIRType>(type->member_types [index]);
8898	}
8899	// Matrix -> Vector
8900	else if (type->columns > `1`)
8901	{
8902	// If we have a row-major matrix here, we need to defer any transpose in case this access chain
8903	// is used to store a column. We can resolve it right here and now if we access a scalar directly,
8904	// by flipping indexing order of the matrix.
8905
8906	expr += "[";
8907	if (is_literal)
8908	expr += convert_to_string(index);
8909	else
8910	expr += to_unpacked_expression(index, register_expression_read);
8911	expr += "]";
8912
8913	type_id = type->parent_type;
8914	type = &get<SPIRType>(type_id);
8915	}
8916	// Vector -> Scalar
8917	else if (type->vecsize > `1`)
8918	{
8919	string deferred_index;
8920	if (row_major_matrix_needs_conversion)
8921	{
8922	// Flip indexing order.
8923	auto column_index = expr.find_last_of(`'['`);
8924	if (column_index != string::npos)
8925	{
8926	deferred_index = expr.substr(column_index);
8927	expr.resize(column_index);
8928	}
8929	}
8930
8931	// Internally, access chain implementation can also be used on composites,
8932	// ignore scalar access workarounds in this case.
8933	StorageClass effective_storage = StorageClassGeneric;
8934	bool ignore_potential_sliced_writes = false;
8935	if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == `0`)
8936	{
8937	if (expression_type(base).pointer)
8938	effective_storage = get_expression_effective_storage_class(base);
8939
8940	// Special consideration for control points.
8941	// Control points can only be written by InvocationID, so there is no need
8942	// to consider scalar access chains here.
8943	// Cleans up some cases where it's very painful to determine the accurate storage class
8944	// since blocks can be partially masked ...
8945	auto *var = maybe_get_backing_variable(base);
8946	if (var && var->storage == StorageClassOutput &&
8947	get_execution_model() == ExecutionModelTessellationControl &&
8948	!has_decoration(var->self, DecorationPatch))
8949	{
8950	ignore_potential_sliced_writes = true;
8951	}
8952	}
8953	else
8954	ignore_potential_sliced_writes = true;
8955
8956	if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
8957	{
8958	// On some backends, we might not be able to safely access individual scalars in a vector.
8959	// To work around this, we might have to cast the access chain reference to something which can,
8960	// like a pointer to scalar, which we can then index into.
8961	prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
8962	is_packed);
8963	}
8964
8965	if (is_literal)
8966	{
8967	bool out_of_bounds = (index >= type->vecsize);
8968
8969	if (!is_packed && !row_major_matrix_needs_conversion)
8970	{
8971	expr += ".";
8972	expr += index_to_swizzle(out_of_bounds ? `0` : index);
8973	}
8974	else
8975	{
8976	// For packed vectors, we can only access them as an array, not by swizzle.
8977	expr += join("[", out_of_bounds ? `0` : index, "]");
8978	}
8979	}
8980	else if (ir.ids [index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion)
8981	{
8982	auto &c = get<SPIRConstant>(index);
8983	bool out_of_bounds = (c.scalar() >= type->vecsize);
8984
8985	if (c.specialization)
8986	{
8987	// If the index is a spec constant, we cannot turn extract into a swizzle.
8988	expr += join("[", out_of_bounds ? "0" : to_expression(index), "]");
8989	}
8990	else
8991	{
8992	expr += ".";
8993	expr += index_to_swizzle(out_of_bounds ? `0` : c.scalar());
8994	}
8995	}
8996	else
8997	{
8998	expr += "[";
8999	expr += to_unpacked_expression(index, register_expression_read);
9000	expr += "]";
9001	}
9002
9003	if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes)
9004	{
9005	prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage,
9006	is_packed);
9007	}
9008
9009	expr += deferred_index;
9010	row_major_matrix_needs_conversion = false;
9011
9012	is_packed = false;
9013	physical_type = `0`;
9014	type_id = type->parent_type;
9015	type = &get<SPIRType>(type_id);
9016	}
9017	else if (!backend.allow_truncated_access_chain)
9018	SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9019	}
9020
9021	if (pending_array_enclose)
9022	{
9023	SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
9024	"but the access chain was terminated in the middle of a multidimensional array. "
9025	"This is not supported.");
9026	}
9027
9028	if (meta)
9029	{
9030	meta->need_transpose = row_major_matrix_needs_conversion;
9031	meta->storage_is_packed = is_packed;
9032	meta->storage_is_invariant = is_invariant;
9033	meta->storage_physical_type = physical_type;
9034	}
9035
9036	return expr;
9037	}
9038
9039	void CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
9040	{
9041	}
9042
9043	string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index)
9044	{
9045	auto ret = join(basename, "_", to_member_name(type, index));
9046	ParsedIR::sanitize_underscores(ret);
9047	return ret;
9048	}
9049
9050	string CompilerGLSL::access_chain(uint32_t base, const uint32_t indices, uint32_t count, const* SPIRType &target_type,
9051	AccessChainMeta meta, bool* ptr_chain)
9052	{
9053	if (flattened_buffer_blocks.count(base))
9054	{
9055	uint32_t matrix_stride = `0`;
9056	uint32_t array_stride = `0`;
9057	bool need_transpose = false;
9058	flattened_access_chain_offset(expression_type(base), indices, count, `0`, `16`, &need_transpose, &matrix_stride,
9059	&array_stride, ptr_chain);
9060
9061	if (meta)
9062	{
9063	meta->need_transpose = target_type.columns > `1` && need_transpose;
9064	meta->storage_is_packed = false;
9065	}
9066
9067	return flattened_access_chain(base, indices, count, target_type, `0`, matrix_stride, array_stride,
9068	need_transpose);
9069	}
9070	else if (flattened_structs.count(base) && count > `0`)
9071	{
9072	AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT \| ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
9073	if (ptr_chain)
9074	flags \|= ACCESS_CHAIN_PTR_CHAIN_BIT;
9075
9076	if (flattened_structs [base])
9077	{
9078	flags \|= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT;
9079	if (meta)
9080	meta->flattened_struct = target_type.basetype == SPIRType::Struct;
9081	}
9082
9083	auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(`1`);
9084	if (meta)
9085	{
9086	meta->need_transpose = false;
9087	meta->storage_is_packed = false;
9088	}
9089
9090	auto basename = to_flattened_access_chain_expression(base);
9091	auto ret = join(basename, "_", chain);
9092	ParsedIR::sanitize_underscores(ret);
9093	return ret;
9094	}
9095	else
9096	{
9097	AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT;
9098	if (ptr_chain)
9099	flags \|= ACCESS_CHAIN_PTR_CHAIN_BIT;
9100	return access_chain_internal(base, indices, count, flags, meta);
9101	}
9102	}
9103
9104	string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type)
9105	{
9106	auto expr = type_to_glsl_constructor(type);
9107	expr += `'('`;
9108
9109	for (uint32_t i = `0`; i < uint32_t(type.member_types.size()); i++)
9110	{
9111	if (i)
9112	expr += ", ";
9113
9114	auto &member_type = get<SPIRType>(type.member_types [i]);
9115	if (member_type.basetype == SPIRType::Struct)
9116	expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type);
9117	else
9118	expr += to_flattened_struct_member(basename, type, i);
9119	}
9120	expr += `')'`;
9121	return expr;
9122	}
9123
9124	std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id)
9125	{
9126	// Do not use to_expression as that will unflatten access chains.
9127	string basename;
9128	if (const auto *var = maybe_get<SPIRVariable>(id))
9129	basename = to_name(var->self);
9130	else if (const auto *expr = maybe_get<SPIRExpression>(id))
9131	basename = expr->expression;
9132	else
9133	basename = to_expression(id);
9134
9135	return basename;
9136	}
9137
9138	void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type,
9139	const SmallVector<uint32_t> &indices)
9140	{
9141	SmallVector<uint32_t> sub_indices = indices;
9142	sub_indices.push_back(`0`);
9143
9144	auto *member_type = &type;
9145	for (auto &index : indices)
9146	member_type = &get<SPIRType>(member_type->member_types [index]);
9147
9148	for (uint32_t i = `0`; i < uint32_t(member_type->member_types.size()); i++)
9149	{
9150	sub_indices.back() = i;
9151	auto lhs = join(basename, "_", to_member_name(*member_type, i));
9152	ParsedIR::sanitize_underscores(lhs);
9153
9154	if (get<SPIRType>(member_type->member_types [i]).basetype == SPIRType::Struct)
9155	{
9156	store_flattened_struct(lhs, rhs_id, type, sub_indices);
9157	}
9158	else
9159	{
9160	auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices);
9161	statement(lhs, " = ", rhs, ";");
9162	}
9163	}
9164	}
9165
9166	void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value)
9167	{
9168	auto &type = expression_type(lhs_id);
9169	auto basename = to_flattened_access_chain_expression(lhs_id);
9170	store_flattened_struct(basename, value, type, {});
9171	}
9172
9173	std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
9174	const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
9175	uint32_t / array_stride /, bool need_transpose)
9176	{
9177	if (!target_type.array.empty())
9178	SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
9179	else if (target_type.basetype == SPIRType::Struct)
9180	return flattened_access_chain_struct(base, indices, count, target_type, offset);
9181	else if (target_type.columns > `1`)
9182	return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9183	else
9184	return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
9185	}
9186
9187	std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
9188	const SPIRType &target_type, uint32_t offset)
9189	{
9190	std::string expr;
9191
9192	expr += type_to_glsl_constructor(target_type);
9193	expr += "(";
9194
9195	for (uint32_t i = `0`; i < uint32_t(target_type.member_types.size()); ++i)
9196	{
9197	if (i != `0`)
9198	expr += ", ";
9199
9200	const SPIRType &member_type = get<SPIRType>(target_type.member_types [i]);
9201	uint32_t member_offset = type_struct_member_offset(target_type, i);
9202
9203	// The access chain terminates at the struct, so we need to find matrix strides and row-major information
9204	// ahead of time.
9205	bool need_transpose = false;
9206	uint32_t matrix_stride = `0`;
9207	if (member_type.columns > `1`)
9208	{
9209	need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
9210	matrix_stride = type_struct_member_matrix_stride(target_type, i);
9211	}
9212
9213	auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
9214	`0` / array_stride /, need_transpose);
9215
9216	// Cannot forward transpositions, so resolve them here.
9217	if (need_transpose)
9218	expr += convert_row_major_matrix(tmp, member_type, `0`, false);
9219	else
9220	expr += tmp;
9221	}
9222
9223	expr += ")";
9224
9225	return expr;
9226	}
9227
9228	std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
9229	const SPIRType &target_type, uint32_t offset,
9230	uint32_t matrix_stride, bool need_transpose)
9231	{
9232	assert(matrix_stride);
9233	SPIRType tmp_type = target_type;
9234	if (need_transpose)
9235	swap(tmp_type.vecsize, tmp_type.columns);
9236
9237	std::string expr;
9238
9239	expr += type_to_glsl_constructor(tmp_type);
9240	expr += "(";
9241
9242	for (uint32_t i = `0`; i < tmp_type.columns; i++)
9243	{
9244	if (i != `0`)
9245	expr += ", ";
9246
9247	expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
9248	/ need_transpose= / false);
9249	}
9250
9251	expr += ")";
9252
9253	return expr;
9254	}
9255
9256	std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
9257	const SPIRType &target_type, uint32_t offset,
9258	uint32_t matrix_stride, bool need_transpose)
9259	{
9260	auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, `16`);
9261
9262	auto buffer_name = to_name(expression_type(base).self);
9263
9264	if (need_transpose)
9265	{
9266	std::string expr;
9267
9268	if (target_type.vecsize > `1`)
9269	{
9270	expr += type_to_glsl_constructor(target_type);
9271	expr += "(";
9272	}
9273
9274	for (uint32_t i = `0`; i < target_type.vecsize; ++i)
9275	{
9276	if (i != `0`)
9277	expr += ", ";
9278
9279	uint32_t component_offset = result.second + i * matrix_stride;
9280
9281	assert(component_offset % (target_type.width / `8`) == `0`);
9282	uint32_t index = component_offset / (target_type.width / `8`);
9283
9284	expr += buffer_name;
9285	expr += "[";
9286	expr += result.first; // this is a series of N1 k1 + N2 * k2 + ... that is either empty or ends with a +*
9287	expr += convert_to_string(index / `4`);
9288	expr += "]";
9289
9290	expr += vector_swizzle(`1`, index % `4`);
9291	}
9292
9293	if (target_type.vecsize > `1`)
9294	{
9295	expr += ")";
9296	}
9297
9298	return expr;
9299	}
9300	else
9301	{
9302	assert(result.second % (target_type.width / `8`) == `0`);
9303	uint32_t index = result.second / (target_type.width / `8`);
9304
9305	std::string expr;
9306
9307	expr += buffer_name;
9308	expr += "[";
9309	expr += result.first; // this is a series of N1 k1 + N2 * k2 + ... that is either empty or ends with a +*
9310	expr += convert_to_string(index / `4`);
9311	expr += "]";
9312
9313	expr += vector_swizzle(target_type.vecsize, index % `4`);
9314
9315	return expr;
9316	}
9317	}
9318
9319	std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
9320	const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
9321	bool need_transpose, uint32_t out_matrix_stride, uint32_t out_array_stride, bool* ptr_chain)
9322	{
9323	// Start traversing type hierarchy at the proper non-pointer types.
9324	const auto *type = &get_pointee_type(basetype);
9325
9326	std::string expr;
9327
9328	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
9329	bool row_major_matrix_needs_conversion = need_transpose ? need_transpose : false*;
9330	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : `0`;
9331	uint32_t array_stride = out_array_stride ? *out_array_stride : `0`;
9332
9333	for (uint32_t i = `0`; i < count; i++)
9334	{
9335	uint32_t index = indices[i];
9336
9337	// Pointers
9338	if (ptr_chain && i == `0`)
9339	{
9340	// Here, the pointer type will be decorated with an array stride.
9341	array_stride = get_decoration(basetype.self, DecorationArrayStride);
9342	if (!array_stride)
9343	SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
9344
9345	auto *constant = maybe_get<SPIRConstant>(index);
9346	if (constant)
9347	{
9348	// Constant array access.
9349	offset += constant->scalar() * array_stride;
9350	}
9351	else
9352	{
9353	// Dynamic array access.
9354	if (array_stride % word_stride)
9355	{
9356	SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9357	"of a 4-component vector. "
9358	"Likely culprit here is a float or vec2 array inside a push "
9359	"constant block which is std430. "
9360	"This cannot be flattened. Try using std140 layout instead.");
9361	}
9362
9363	expr += to_enclosed_expression(index);
9364	expr += " * ";
9365	expr += convert_to_string(array_stride / word_stride);
9366	expr += " + ";
9367	}
9368	}
9369	// Arrays
9370	else if (!type->array.empty())
9371	{
9372	auto *constant = maybe_get<SPIRConstant>(index);
9373	if (constant)
9374	{
9375	// Constant array access.
9376	offset += constant->scalar() * array_stride;
9377	}
9378	else
9379	{
9380	// Dynamic array access.
9381	if (array_stride % word_stride)
9382	{
9383	SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size "
9384	"of a 4-component vector. "
9385	"Likely culprit here is a float or vec2 array inside a push "
9386	"constant block which is std430. "
9387	"This cannot be flattened. Try using std140 layout instead.");
9388	}
9389
9390	expr += to_enclosed_expression(index, false);
9391	expr += " * ";
9392	expr += convert_to_string(array_stride / word_stride);
9393	expr += " + ";
9394	}
9395
9396	uint32_t parent_type = type->parent_type;
9397	type = &get<SPIRType>(parent_type);
9398
9399	if (!type->array.empty())
9400	array_stride = get_decoration(parent_type, DecorationArrayStride);
9401	}
9402	// For structs, the index refers to a constant, which indexes into the members.
9403	// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
9404	else if (type->basetype == SPIRType::Struct)
9405	{
9406	index = evaluate_constant_u32(index);
9407
9408	if (index >= type->member_types.size())
9409	SPIRV_CROSS_THROW("Member index is out of bounds!");
9410
9411	offset += type_struct_member_offset(*type, index);
9412
9413	auto &struct_type = *type;
9414	type = &get<SPIRType>(type->member_types [index]);
9415
9416	if (type->columns > `1`)
9417	{
9418	matrix_stride = type_struct_member_matrix_stride(struct_type, index);
9419	row_major_matrix_needs_conversion =
9420	combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
9421	}
9422	else
9423	row_major_matrix_needs_conversion = false;
9424
9425	if (!type->array.empty())
9426	array_stride = type_struct_member_array_stride(struct_type, index);
9427	}
9428	// Matrix -> Vector
9429	else if (type->columns > `1`)
9430	{
9431	auto *constant = maybe_get<SPIRConstant>(index);
9432	if (constant)
9433	{
9434	index = evaluate_constant_u32(index);
9435	offset += index * (row_major_matrix_needs_conversion ? (type->width / `8`) : matrix_stride);
9436	}
9437	else
9438	{
9439	uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / `8`) : matrix_stride;
9440	// Dynamic array access.
9441	if (indexing_stride % word_stride)
9442	{
9443	SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a "
9444	"4-component vector. "
9445	"Likely culprit here is a row-major matrix being accessed dynamically. "
9446	"This cannot be flattened. Try using std140 layout instead.");
9447	}
9448
9449	expr += to_enclosed_expression(index, false);
9450	expr += " * ";
9451	expr += convert_to_string(indexing_stride / word_stride);
9452	expr += " + ";
9453	}
9454
9455	type = &get<SPIRType>(type->parent_type);
9456	}
9457	// Vector -> Scalar
9458	else if (type->vecsize > `1`)
9459	{
9460	auto *constant = maybe_get<SPIRConstant>(index);
9461	if (constant)
9462	{
9463	index = evaluate_constant_u32(index);
9464	offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / `8`));
9465	}
9466	else
9467	{
9468	uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / `8`);
9469
9470	// Dynamic array access.
9471	if (indexing_stride % word_stride)
9472	{
9473	SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the "
9474	"size of a 4-component vector. "
9475	"This cannot be flattened in legacy targets.");
9476	}
9477
9478	expr += to_enclosed_expression(index, false);
9479	expr += " * ";
9480	expr += convert_to_string(indexing_stride / word_stride);
9481	expr += " + ";
9482	}
9483
9484	type = &get<SPIRType>(type->parent_type);
9485	}
9486	else
9487	SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
9488	}
9489
9490	if (need_transpose)
9491	*need_transpose = row_major_matrix_needs_conversion;
9492	if (out_matrix_stride)
9493	*out_matrix_stride = matrix_stride;
9494	if (out_array_stride)
9495	*out_array_stride = array_stride;
9496
9497	return std::make_pair(expr, offset);
9498	}
9499
9500	bool CompilerGLSL::should_dereference(uint32_t id)
9501	{
9502	const auto &type = expression_type(id);
9503	// Non-pointer expressions don't need to be dereferenced.
9504	if (!type.pointer)
9505	return false;
9506
9507	// Handles shouldn't be dereferenced either.
9508	if (!expression_is_lvalue(id))
9509	return false;
9510
9511	// If id is a variable but not a phi variable, we should not dereference it.
9512	if (auto *var = maybe_get<SPIRVariable>(id))
9513	return var->phi_variable;
9514
9515	// If id is an access chain, we should not dereference it.
9516	if (auto *expr = maybe_get<SPIRExpression>(id))
9517	return !expr->access_chain;
9518
9519	// Otherwise, we should dereference this pointer expression.
9520	return true;
9521	}
9522
9523	bool CompilerGLSL::should_forward(uint32_t id) const
9524	{
9525	// If id is a variable we will try to forward it regardless of force_temporary check below
9526	// This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL
9527
9528	auto *var = maybe_get<SPIRVariable>(id);
9529	if (var && var->forwardable)
9530	return true;
9531
9532	// For debugging emit temporary variables for all expressions
9533	if (options.force_temporary)
9534	return false;
9535
9536	// If an expression carries enough dependencies we need to stop forwarding at some point,
9537	// or we explode compilers. There are usually limits to how much we can nest expressions.
9538	auto *expr = maybe_get<SPIRExpression>(id);
9539	const uint32_t max_expression_dependencies = `64`;
9540	if (expr && expr->expression_dependencies.size() >= max_expression_dependencies)
9541	return false;
9542
9543	// Immutable expression can always be forwarded.
9544	if (is_immutable(id))
9545	return true;
9546
9547	return false;
9548	}
9549
9550	bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const
9551	{
9552	// Used only by opcodes which don't do any real "work", they just swizzle data in some fashion.
9553	return !expression_is_forwarded(id) \|\| expression_suppresses_usage_tracking(id);
9554	}
9555
9556	void CompilerGLSL::track_expression_read(uint32_t id)
9557	{
9558	switch (ir.ids [id].get_type())
9559	{
9560	case TypeExpression:
9561	{
9562	auto &e = get<SPIRExpression>(id);
9563	for (auto implied_read : e.implied_read_expressions)
9564	track_expression_read(implied_read);
9565	break;
9566	}
9567
9568	case TypeAccessChain:
9569	{
9570	auto &e = get<SPIRAccessChain>(id);
9571	for (auto implied_read : e.implied_read_expressions)
9572	track_expression_read(implied_read);
9573	break;
9574	}
9575
9576	default:
9577	break;
9578	}
9579
9580	// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
9581	// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
9582	if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id))
9583	{
9584	auto &v = expression_usage_counts [id];
9585	v++;
9586
9587	// If we create an expression outside a loop,
9588	// but access it inside a loop, we're implicitly reading it multiple times.
9589	// If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion
9590	// working inside the backend compiler.
9591	if (expression_read_implies_multiple_reads(id))
9592	v++;
9593
9594	if (v >= `2`)
9595	{
9596	//if (v == 2)
9597	// fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
9598
9599	forced_temporaries.insert(id);
9600	// Force a recompile after this pass to avoid forwarding this variable.
9601	force_recompile();
9602	}
9603	}
9604	}
9605
9606	bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t args, uint32_t num_args, bool* pure)
9607	{
9608	if (forced_temporaries.find(id) != end(forced_temporaries))
9609	return false;
9610
9611	for (uint32_t i = `0`; i < num_args; i++)
9612	if (!should_forward(args[i]))
9613	return false;
9614
9615	// We need to forward globals as well.
9616	if (!pure)
9617	{
9618	for (auto global : global_variables)
9619	if (!should_forward(global))
9620	return false;
9621	for (auto aliased : aliased_variables)
9622	if (!should_forward(aliased))
9623	return false;
9624	}
9625
9626	return true;
9627	}
9628
9629	void CompilerGLSL::register_impure_function_call()
9630	{
9631	// Impure functions can modify globals and aliased variables, so invalidate them as well.
9632	for (auto global : global_variables)
9633	flush_dependees(get<SPIRVariable>(global));
9634	for (auto aliased : aliased_variables)
9635	flush_dependees(get<SPIRVariable>(aliased));
9636	}
9637
9638	void CompilerGLSL::register_call_out_argument(uint32_t id)
9639	{
9640	register_write(id);
9641
9642	auto *var = maybe_get<SPIRVariable>(id);
9643	if (var)
9644	flush_variable_declaration(var->self);
9645	}
9646
9647	string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
9648	{
9649	// These variables are always function local,
9650	// so make sure we emit the variable without storage qualifiers.
9651	// Some backends will inject custom variables locally in a function
9652	// with a storage qualifier which is not function-local.
9653	auto old_storage = var.storage;
9654	var.storage = StorageClassFunction;
9655	auto expr = variable_decl(var);
9656	var.storage = old_storage;
9657	return expr;
9658	}
9659
9660	void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var)
9661	{
9662	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9663	if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self))
9664	{
9665	auto &type = get<SPIRType>(var.basetype);
9666	auto &flags = get_decoration_bitset(var.self);
9667	statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";");
9668	flushed_phi_variables.insert(var.self);
9669	}
9670	}
9671
9672	void CompilerGLSL::flush_variable_declaration(uint32_t id)
9673	{
9674	// Ensure that we declare phi-variable copies even if the original declaration isn't deferred
9675	auto *var = maybe_get<SPIRVariable>(id);
9676	if (var && var->deferred_declaration)
9677	{
9678	string initializer;
9679	if (options.force_zero_initialized_variables &&
9680	(var->storage == StorageClassFunction \|\| var->storage == StorageClassGeneric \|\|
9681	var->storage == StorageClassPrivate) &&
9682	!var->initializer && type_can_zero_initialize(get_variable_data_type(*var)))
9683	{
9684	initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var)));
9685	}
9686
9687	statement(variable_decl_function_local(*var), initializer, ";");
9688	var->deferred_declaration = false;
9689	}
9690	if (var)
9691	{
9692	emit_variable_temporary_copies(*var);
9693	}
9694	}
9695
9696	bool CompilerGLSL::remove_duplicate_swizzle(string &op)
9697	{
9698	auto pos = op.find_last_of(`'.'`);
9699	if (pos == string::npos \|\| pos == `0`)
9700	return false;
9701
9702	string final_swiz = op.substr(pos + `1`, string::npos);
9703
9704	if (backend.swizzle_is_function)
9705	{
9706	if (final_swiz.size() < `2`)
9707	return false;
9708
9709	if (final_swiz.substr(final_swiz.size() - `2`, string::npos) == "()")
9710	final_swiz.erase(final_swiz.size() - `2`, string::npos);
9711	else
9712	return false;
9713	}
9714
9715	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9716	// If so, and previous swizzle is of same length,
9717	// we can drop the final swizzle altogether.
9718	for (uint32_t i = `0`; i < final_swiz.size(); i++)
9719	{
9720	static const char expected[] = { `'x'`, `'y'`, `'z'`, `'w'` };
9721	if (i >= `4` \|\| final_swiz [i] != expected[i])
9722	return false;
9723	}
9724
9725	auto prevpos = op.find_last_of(`'.'`, pos - `1`);
9726	if (prevpos == string::npos)
9727	return false;
9728
9729	prevpos++;
9730
9731	// Make sure there are only swizzles here ...
9732	for (auto i = prevpos; i < pos; i++)
9733	{
9734	if (op [i] < `'w'` \|\| op [i] > `'z'`)
9735	{
9736	// If swizzles are foo.xyz() like in C++ backend for example, check for that.
9737	if (backend.swizzle_is_function && i + `2` == pos && op [i] == `'('` && op [i + `1`] == `')'`)
9738	break;
9739	return false;
9740	}
9741	}
9742
9743	// If original swizzle is large enough, just carve out the components we need.
9744	// E.g. foobar.wyx.xy will turn into foobar.wy.
9745	if (pos - prevpos >= final_swiz.size())
9746	{
9747	op.erase(prevpos + final_swiz.size(), string::npos);
9748
9749	// Add back the function call ...
9750	if (backend.swizzle_is_function)
9751	op += "()";
9752	}
9753	return true;
9754	}
9755
9756	// Optimizes away vector swizzles where we have something like
9757	// vec3 foo;
9758	// foo.xyz <-- swizzle expression does nothing.
9759	// This is a very common pattern after OpCompositeCombine.
9760	bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
9761	{
9762	auto pos = op.find_last_of(`'.'`);
9763	if (pos == string::npos \|\| pos == `0`)
9764	return false;
9765
9766	string final_swiz = op.substr(pos + `1`, string::npos);
9767
9768	if (backend.swizzle_is_function)
9769	{
9770	if (final_swiz.size() < `2`)
9771	return false;
9772
9773	if (final_swiz.substr(final_swiz.size() - `2`, string::npos) == "()")
9774	final_swiz.erase(final_swiz.size() - `2`, string::npos);
9775	else
9776	return false;
9777	}
9778
9779	// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
9780	// If so, and previous swizzle is of same length,
9781	// we can drop the final swizzle altogether.
9782	for (uint32_t i = `0`; i < final_swiz.size(); i++)
9783	{
9784	static const char expected[] = { `'x'`, `'y'`, `'z'`, `'w'` };
9785	if (i >= `4` \|\| final_swiz [i] != expected[i])
9786	return false;
9787	}
9788
9789	auto &type = expression_type(base);
9790
9791	// Sanity checking ...
9792	assert(type.columns == `1` && type.array.empty());
9793
9794	if (type.vecsize == final_swiz.size())
9795	op.erase(pos, string::npos);
9796	return true;
9797	}
9798
9799	string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
9800	{
9801	ID base = `0`;
9802	string op;
9803	string subop;
9804
9805	// Can only merge swizzles for vectors.
9806	auto &type = get<SPIRType>(return_type);
9807	bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == `1`;
9808	bool swizzle_optimization = false;
9809
9810	for (uint32_t i = `0`; i < length; i++)
9811	{
9812	auto *e = maybe_get<SPIRExpression>(elems[i]);
9813
9814	// If we're merging another scalar which belongs to the same base
9815	// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
9816	if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
9817	{
9818	// Only supposed to be used for vector swizzle -> scalar.
9819	assert(!e->expression.empty() && e->expression.front() == `'.'`);
9820	subop += e->expression.substr(`1`, string::npos);
9821	swizzle_optimization = true;
9822	}
9823	else
9824	{
9825	// We'll likely end up with duplicated swizzles, e.g.
9826	// foobar.xyz.xyz from patterns like
9827	// OpVectorShuffle
9828	// OpCompositeExtract x 3
9829	// OpCompositeConstruct 3x + other scalar.
9830	// Just modify op in-place.
9831	if (swizzle_optimization)
9832	{
9833	if (backend.swizzle_is_function)
9834	subop += "()";
9835
9836	// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
9837	// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
9838	// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
9839	// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
9840	// Case 1:
9841	// foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
9842	// foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
9843	// Case 2:
9844	// foo.xyz: Duplicate swizzle won't kick in.
9845	// If foo is vec3, we can remove xyz, giving just foo.
9846	if (!remove_duplicate_swizzle(subop))
9847	remove_unity_swizzle(base, subop);
9848
9849	// Strips away redundant parens if we created them during component extraction.
9850	strip_enclosed_expression(subop);
9851	swizzle_optimization = false;
9852	op += subop;
9853	}
9854	else
9855	op += subop;
9856
9857	if (i)
9858	op += ", ";
9859
9860	bool uses_buffer_offset =
9861	type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset);
9862	subop = to_composite_constructor_expression(elems[i], uses_buffer_offset);
9863	}
9864
9865	base = e ? e->base_expression : ID (`0`);
9866	}
9867
9868	if (swizzle_optimization)
9869	{
9870	if (backend.swizzle_is_function)
9871	subop += "()";
9872
9873	if (!remove_duplicate_swizzle(subop))
9874	remove_unity_swizzle(base, subop);
9875	// Strips away redundant parens if we created them during component extraction.
9876	strip_enclosed_expression(subop);
9877	}
9878
9879	op += subop;
9880	return op;
9881	}
9882
9883	bool CompilerGLSL::skip_argument(uint32_t id) const
9884	{
9885	if (!combined_image_samplers.empty() \|\| !options.vulkan_semantics)
9886	{
9887	auto &type = expression_type(id);
9888	if (type.basetype == SPIRType::Sampler \|\| (type.basetype == SPIRType::Image && type.image.sampled == `1`))
9889	return true;
9890	}
9891	return false;
9892	}
9893
9894	bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
9895	{
9896	// Do this with strings because we have a very clear pattern we can check for and it avoids
9897	// adding lots of special cases to the code emission.
9898	if (rhs.size() < lhs.size() + `3`)
9899	return false;
9900
9901	// Do not optimize matrices. They are a bit awkward to reason about in general
9902	// (in which order does operation happen?), and it does not work on MSL anyways.
9903	if (type.vecsize > `1` && type.columns > `1`)
9904	return false;
9905
9906	auto index = rhs.find(lhs);
9907	if (index != `0`)
9908	return false;
9909
9910	// TODO: Shift operators, but it's not important for now.
9911	auto op = rhs.find_first_of("+-/*%\|&^", lhs.size() + `1`);
9912	if (op != lhs.size() + `1`)
9913	return false;
9914
9915	// Check that the op is followed by space. This excludes && and \|\|.
9916	if (rhs [op + `1`] != `' '`)
9917	return false;
9918
9919	char bop = rhs [op];
9920	auto expr = rhs.substr(lhs.size() + `3`);
9921	// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
9922	// Find some common patterns which are equivalent.
9923	if ((bop == `'+'` \|\| bop == `'-'`) && (expr == "1" \|\| expr == "uint(1)" \|\| expr == "1u" \|\| expr == "int(1u)"))
9924	statement(lhs, bop, bop, ";");
9925	else
9926	statement(lhs, " ", bop, "= ", expr, ";");
9927	return true;
9928	}
9929
9930	void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
9931	{
9932	if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
9933	return;
9934
9935	assert(current_emitting_block);
9936	current_emitting_block->invalidate_expressions.push_back(expr);
9937	}
9938
9939	void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
9940	{
9941	current_emitting_block = &block;
9942	for (auto &op : block.ops)
9943	emit_instruction(op);
9944	current_emitting_block = nullptr;
9945	}
9946
9947	void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr)
9948	{
9949	// Allow trivially forwarded expressions like OpLoad or trivial shuffles,
9950	// these will be marked as having suppressed usage tracking.
9951	// Our only concern is to make sure arithmetic operations are done in similar ways.
9952	if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) &&
9953	forced_invariant_temporaries.count(expr.self) == `0`)
9954	{
9955	forced_temporaries.insert(expr.self);
9956	forced_invariant_temporaries.insert(expr.self);
9957	force_recompile();
9958
9959	for (auto &dependent : expr.expression_dependencies)
9960	disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent));
9961	}
9962	}
9963
9964	void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id)
9965	{
9966	// Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to
9967	// this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary
9968	// in one translation unit, but not another, e.g. due to multiple use of an expression.
9969	// This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent
9970	// expressions to be temporaries.
9971	// It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough
9972	// for all reasonable uses of invariant.
9973	if (!has_decoration(store_id, DecorationInvariant))
9974	return;
9975
9976	auto *expr = maybe_get<SPIRExpression>(value_id);
9977	if (!expr)
9978	return;
9979
9980	disallow_forwarding_in_expression_chain(*expr);
9981	}
9982
9983	void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression)
9984	{
9985	auto rhs = to_pointer_expression(rhs_expression);
9986
9987	// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
9988	if (!rhs.empty())
9989	{
9990	handle_store_to_invariant_variable(lhs_expression, rhs_expression);
9991
9992	if (!unroll_array_to_complex_store(lhs_expression, rhs_expression))
9993	{
9994	auto lhs = to_dereferenced_expression(lhs_expression);
9995	if (has_decoration(lhs_expression, DecorationNonUniform))
9996	convert_non_uniform_expression(lhs, lhs_expression);
9997
9998	// We might need to cast in order to store to a builtin.
9999	cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression));
10000
10001	// Tries to optimize assignments like "<lhs> = <lhs> op expr".
10002	// While this is purely cosmetic, this is important for legacy ESSL where loop
10003	// variable increments must be in either i++ or i += const-expr.
10004	// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
10005	if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
10006	statement(lhs, " = ", rhs, ";");
10007	}
10008	register_write(lhs_expression);
10009	}
10010	}
10011
10012	uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const
10013	{
10014	if (instr.length < `3`)
10015	return `32`;
10016
10017	auto *ops = stream(instr);
10018
10019	switch (instr.op)
10020	{
10021	case OpSConvert:
10022	case OpConvertSToF:
10023	case OpUConvert:
10024	case OpConvertUToF:
10025	case OpIEqual:
10026	case OpINotEqual:
10027	case OpSLessThan:
10028	case OpSLessThanEqual:
10029	case OpSGreaterThan:
10030	case OpSGreaterThanEqual:
10031	case OpULessThan:
10032	case OpULessThanEqual:
10033	case OpUGreaterThan:
10034	case OpUGreaterThanEqual:
10035	return expression_type(ops[`2`]).width;
10036
10037	default:
10038	{
10039	// We can look at result type which is more robust.
10040	auto *type = maybe_get<SPIRType>(ops[`0`]);
10041	if (type && type_is_integral(*type))
10042	return type->width;
10043	else
10044	return `32`;
10045	}
10046	}
10047	}
10048
10049	uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t ops, uint32_t length) const*
10050	{
10051	if (length < `1`)
10052	return `32`;
10053
10054	switch (op)
10055	{
10056	case GLSLstd450SAbs:
10057	case GLSLstd450SSign:
10058	case GLSLstd450UMin:
10059	case GLSLstd450SMin:
10060	case GLSLstd450UMax:
10061	case GLSLstd450SMax:
10062	case GLSLstd450UClamp:
10063	case GLSLstd450SClamp:
10064	case GLSLstd450FindSMsb:
10065	case GLSLstd450FindUMsb:
10066	return expression_type(ops[`0`]).width;
10067
10068	default:
10069	{
10070	// We don't need to care about other opcodes, just return 32.
10071	return `32`;
10072	}
10073	}
10074	}
10075
10076	void CompilerGLSL::emit_instruction(const Instruction &instruction)
10077	{
10078	auto ops = stream(instruction);
10079	auto opcode = static_cast<Op>(instruction.op);
10080	uint32_t length = instruction.length;
10081
10082	#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
10083	#define GLSL_BOP_CAST(op, type) \
10084	emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
10085	#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
10086	#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
10087	#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
10088	#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
10089	#define GLSL_BFOP_CAST(op, type) \
10090	emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
10091	#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
10092	#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
10093
10094	// If we need to do implicit bitcasts, make sure we do it with the correct type.
10095	uint32_t integer_width = get_integer_width_for_instruction(instruction);
10096	auto int_type = to_signed_basetype(integer_width);
10097	auto uint_type = to_unsigned_basetype(integer_width);
10098
10099	switch (opcode)
10100	{
10101	// Dealing with memory
10102	case OpLoad:
10103	{
10104	uint32_t result_type = ops[`0`];
10105	uint32_t id = ops[`1`];
10106	uint32_t ptr = ops[`2`];
10107
10108	flush_variable_declaration(ptr);
10109
10110	// If we're loading from memory that cannot be changed by the shader,
10111	// just forward the expression directly to avoid needless temporaries.
10112	// If an expression is mutable and forwardable, we speculate that it is immutable.
10113	bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
10114
10115	// If loading a non-native row-major matrix, mark the expression as need_transpose.
10116	bool need_transpose = false;
10117	bool old_need_transpose = false;
10118
10119	auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
10120
10121	if (forward)
10122	{
10123	// If we're forwarding the load, we're also going to forward transpose state, so don't transpose while
10124	// taking the expression.
10125	if (ptr_expression && ptr_expression->need_transpose)
10126	{
10127	old_need_transpose = true;
10128	ptr_expression->need_transpose = false;
10129	need_transpose = true;
10130	}
10131	else if (is_non_native_row_major_matrix(ptr))
10132	need_transpose = true;
10133	}
10134
10135	// If we are forwarding this load,
10136	// don't register the read to access chain here, defer that to when we actually use the expression,
10137	// using the add_implied_read_expression mechanism.
10138	string expr;
10139
10140	bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked);
10141	bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID);
10142	if (forward \|\| (!is_packed && !is_remapped))
10143	{
10144	// For the simple case, we do not need to deal with repacking.
10145	expr = to_dereferenced_expression(ptr, false);
10146	}
10147	else
10148	{
10149	// If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before
10150	// storing the expression to a temporary.
10151	expr = to_unpacked_expression(ptr);
10152	}
10153
10154	auto &type = get<SPIRType>(result_type);
10155	auto &expr_type = expression_type(ptr);
10156
10157	// If the expression has more vector components than the result type, insert
10158	// a swizzle. This shouldn't happen normally on valid SPIR-V, but it might
10159	// happen with e.g. the MSL backend replacing the type of an input variable.
10160	if (expr_type.vecsize > type.vecsize)
10161	expr = enclose_expression(expr + vector_swizzle(type.vecsize, `0`));
10162
10163	// We might need to cast in order to load from a builtin.
10164	cast_from_variable_load(ptr, expr, type);
10165
10166	// We might be trying to load a gl_Position[N], where we should be
10167	// doing float4[](gl_in[i].gl_Position, ...) instead.
10168	// Similar workarounds are required for input arrays in tessellation.
10169	// Also, loading from gl_SampleMask array needs special unroll.
10170	unroll_array_from_complex_load(id, ptr, expr);
10171
10172	if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform))
10173	{
10174	// If we're loading something non-opaque, we need to handle non-uniform descriptor access.
10175	convert_non_uniform_expression(expr, ptr);
10176	}
10177
10178	if (forward && ptr_expression)
10179	ptr_expression->need_transpose = old_need_transpose;
10180
10181	bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != `0`;
10182
10183	if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened)
10184	rewrite_load_for_wrapped_row_major(expr, result_type, ptr);
10185
10186	// By default, suppress usage tracking since using same expression multiple times does not imply any extra work.
10187	// However, if we try to load a complex, composite object from a flattened buffer,
10188	// we should avoid emitting the same code over and over and lower the result to a temporary.
10189	bool usage_tracking = flattened && (type.basetype == SPIRType::Struct \|\| (type.columns > `1`));
10190
10191	SPIRExpression e = nullptr*;
10192	if (!forward && expression_is_non_value_type_array(ptr))
10193	{
10194	// Complicated load case where we need to make a copy of ptr, but we cannot, because
10195	// it is an array, and our backend does not support arrays as value types.
10196	// Emit the temporary, and copy it explicitly.
10197	e = &emit_uninitialized_temporary_expression(result_type, id);
10198	emit_array_copy(to_expression(id), id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr));
10199	}
10200	else
10201	e = &emit_op(result_type, id, expr, forward, !usage_tracking);
10202
10203	e->need_transpose = need_transpose;
10204	register_read(id, ptr, forward);
10205
10206	if (forward)
10207	{
10208	// Pass through whether the result is of a packed type and the physical type ID.
10209	if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked))
10210	set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10211	if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID))
10212	{
10213	set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID,
10214	get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID));
10215	}
10216	}
10217	else
10218	{
10219	// This might have been set on an earlier compilation iteration, force it to be unset.
10220	unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10221	unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID);
10222	}
10223
10224	inherit_expression_dependencies(id, ptr);
10225	if (forward)
10226	add_implied_read_expression(*e, ptr);
10227	break;
10228	}
10229
10230	case OpInBoundsAccessChain:
10231	case OpAccessChain:
10232	case OpPtrAccessChain:
10233	{
10234	auto *var = maybe_get<SPIRVariable>(ops[`2`]);
10235	if (var)
10236	flush_variable_declaration(var->self);
10237
10238	// If the base is immutable, the access chain pointer must also be.
10239	// If an expression is mutable and forwardable, we speculate that it is immutable.
10240	AccessChainMeta meta;
10241	bool ptr_chain = opcode == OpPtrAccessChain;
10242	auto e = access_chain(ops[`2`], &ops[`3`], length - `3`, get<SPIRType>(ops[`0`]), &meta, ptr_chain);
10243
10244	auto &expr = set<SPIRExpression>(ops[`1`], move(e), ops[`0`], should_forward(ops[`2`]));
10245
10246	auto *backing_variable = maybe_get_backing_variable(ops[`2`]);
10247	expr.loaded_from = backing_variable ? backing_variable->self : ID (ops[`2`]);
10248	expr.need_transpose = meta.need_transpose;
10249	expr.access_chain = true;
10250
10251	// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
10252	if (meta.storage_is_packed)
10253	set_extended_decoration(ops[`1`], SPIRVCrossDecorationPhysicalTypePacked);
10254	if (meta.storage_physical_type != `0`)
10255	set_extended_decoration(ops[`1`], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10256	if (meta.storage_is_invariant)
10257	set_decoration(ops[`1`], DecorationInvariant);
10258	if (meta.flattened_struct)
10259	flattened_structs [ops[`1`]] = true;
10260
10261	// If we have some expression dependencies in our access chain, this access chain is technically a forwarded
10262	// temporary which could be subject to invalidation.
10263	// Need to assume we're forwarded while calling inherit_expression_depdendencies.
10264	forwarded_temporaries.insert(ops[`1`]);
10265	// The access chain itself is never forced to a temporary, but its dependencies might.
10266	suppressed_usage_tracking.insert(ops[`1`]);
10267
10268	for (uint32_t i = `2`; i < length; i++)
10269	{
10270	inherit_expression_dependencies(ops[`1`], ops[i]);
10271	add_implied_read_expression(expr, ops[i]);
10272	}
10273
10274	// If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries,
10275	// we're not forwarded after all.
10276	if (expr.expression_dependencies.empty())
10277	forwarded_temporaries.erase(ops[`1`]);
10278
10279	break;
10280	}
10281
10282	case OpStore:
10283	{
10284	auto *var = maybe_get<SPIRVariable>(ops[`0`]);
10285
10286	if (var && var->statically_assigned)
10287	var->static_expression = ops[`1`];
10288	else if (var && var->loop_variable && !var->loop_variable_enable)
10289	var->static_expression = ops[`1`];
10290	else if (var && var->remapped_variable && var->static_expression)
10291	{
10292	// Skip the write.
10293	}
10294	else if (flattened_structs.count(ops[`0`]))
10295	{
10296	store_flattened_struct(ops[`0`], ops[`1`]);
10297	register_write(ops[`0`]);
10298	}
10299	else
10300	{
10301	emit_store_statement(ops[`0`], ops[`1`]);
10302	}
10303
10304	// Storing a pointer results in a variable pointer, so we must conservatively assume
10305	// we can write through it.
10306	if (expression_type(ops[`1`]).pointer)
10307	register_write(ops[`1`]);
10308	break;
10309	}
10310
10311	case OpArrayLength:
10312	{
10313	uint32_t result_type = ops[`0`];
10314	uint32_t id = ops[`1`];
10315	auto e = access_chain_internal(ops[`2`], &ops[`3`], length - `3`, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10316	if (has_decoration(ops[`2`], DecorationNonUniform))
10317	convert_non_uniform_expression(e, ops[`2`]);
10318	set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type,
10319	true);
10320	break;
10321	}
10322
10323	// Function calls
10324	case OpFunctionCall:
10325	{
10326	uint32_t result_type = ops[`0`];
10327	uint32_t id = ops[`1`];
10328	uint32_t func = ops[`2`];
10329	const auto *arg = &ops[`3`];
10330	length -= `3`;
10331
10332	auto &callee = get<SPIRFunction>(func);
10333	auto &return_type = get<SPIRType>(callee.return_type);
10334	bool pure = function_is_pure(callee);
10335
10336	bool callee_has_out_variables = false;
10337	bool emit_return_value_as_argument = false;
10338
10339	// Invalidate out variables passed to functions since they can be OpStore'd to.
10340	for (uint32_t i = `0`; i < length; i++)
10341	{
10342	if (callee.arguments [i].write_count)
10343	{
10344	register_call_out_argument(arg[i]);
10345	callee_has_out_variables = true;
10346	}
10347
10348	flush_variable_declaration(arg[i]);
10349	}
10350
10351	if (!return_type.array.empty() && !backend.can_return_array)
10352	{
10353	callee_has_out_variables = true;
10354	emit_return_value_as_argument = true;
10355	}
10356
10357	if (!pure)
10358	register_impure_function_call();
10359
10360	string funexpr;
10361	SmallVector<string> arglist;
10362	funexpr += to_name(func) + "(";
10363
10364	if (emit_return_value_as_argument)
10365	{
10366	statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
10367	arglist.push_back(to_name(id));
10368	}
10369
10370	for (uint32_t i = `0`; i < length; i++)
10371	{
10372	// Do not pass in separate images or samplers if we're remapping
10373	// to combined image samplers.
10374	if (skip_argument(arg[i]))
10375	continue;
10376
10377	arglist.push_back(to_func_call_arg(callee.arguments [i], arg[i]));
10378	}
10379
10380	for (auto &combined : callee.combined_parameters)
10381	{
10382	auto image_id = combined.global_image ? combined.image_id : VariableID (arg[combined.image_id]);
10383	auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID (arg[combined.sampler_id]);
10384	arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
10385	}
10386
10387	append_global_func_args(callee, length, arglist);
10388
10389	funexpr += merge(arglist);
10390	funexpr += ")";
10391
10392	// Check for function call constraints.
10393	check_function_call_constraints(arg, length);
10394
10395	if (return_type.basetype != SPIRType::Void)
10396	{
10397	// If the function actually writes to an out variable,
10398	// take the conservative route and do not forward.
10399	// The problem is that we might not read the function
10400	// result (and emit the function) before an out variable
10401	// is read (common case when return value is ignored!
10402	// In order to avoid start tracking invalid variables,
10403	// just avoid the forwarding problem altogether.
10404	bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
10405	(forced_temporaries.find(id) == end(forced_temporaries));
10406
10407	if (emit_return_value_as_argument)
10408	{
10409	statement(funexpr, ";");
10410	set<SPIRExpression>(id, to_name(id), result_type, true);
10411	}
10412	else
10413	emit_op(result_type, id, funexpr, forward);
10414
10415	// Function calls are implicit loads from all variables in question.
10416	// Set dependencies for them.
10417	for (uint32_t i = `0`; i < length; i++)
10418	register_read(id, arg[i], forward);
10419
10420	// If we're going to forward the temporary result,
10421	// put dependencies on every variable that must not change.
10422	if (forward)
10423	register_global_read_dependencies(callee, id);
10424	}
10425	else
10426	statement(funexpr, ";");
10427
10428	break;
10429	}
10430
10431	// Composite munging
10432	case OpCompositeConstruct:
10433	{
10434	uint32_t result_type = ops[`0`];
10435	uint32_t id = ops[`1`];
10436	const auto *const elems = &ops[`2`];
10437	length -= `2`;
10438
10439	bool forward = true;
10440	for (uint32_t i = `0`; i < length; i++)
10441	forward = forward && should_forward(elems[i]);
10442
10443	auto &out_type = get<SPIRType>(result_type);
10444	auto in_type = length > `0` ? &expression_type(elems[`0`]) : nullptr*;
10445
10446	// Only splat if we have vector constructors.
10447	// Arrays and structs must be initialized properly in full.
10448	bool composite = !out_type.array.empty() \|\| out_type.basetype == SPIRType::Struct;
10449
10450	bool splat = false;
10451	bool swizzle_splat = false;
10452
10453	if (in_type)
10454	{
10455	splat = in_type->vecsize == `1` && in_type->columns == `1` && !composite && backend.use_constructor_splatting;
10456	swizzle_splat = in_type->vecsize == `1` && in_type->columns == `1` && backend.can_swizzle_scalar;
10457
10458	if (ir.ids [elems[`0`]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
10459	{
10460	// Cannot swizzle literal integers as a special case.
10461	swizzle_splat = false;
10462	}
10463	}
10464
10465	if (splat \|\| swizzle_splat)
10466	{
10467	uint32_t input = elems[`0`];
10468	for (uint32_t i = `0`; i < length; i++)
10469	{
10470	if (input != elems[i])
10471	{
10472	splat = false;
10473	swizzle_splat = false;
10474	}
10475	}
10476	}
10477
10478	if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
10479	forward = false;
10480	if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
10481	forward = false;
10482	if (type_is_empty(out_type) && !backend.supports_empty_struct)
10483	forward = false;
10484
10485	string constructor_op;
10486	if (backend.use_initializer_list && composite)
10487	{
10488	bool needs_trailing_tracket = false;
10489	// Only use this path if we are building composites.
10490	// This path cannot be used for arithmetic.
10491	if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty())
10492	constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
10493	else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty())
10494	{
10495	// MSL path. Array constructor is baked into type here, do not use _constructor variant.
10496	constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10497	needs_trailing_tracket = true;
10498	}
10499	constructor_op += "{ ";
10500
10501	if (type_is_empty(out_type) && !backend.supports_empty_struct)
10502	constructor_op += "0";
10503	else if (splat)
10504	constructor_op += to_unpacked_expression(elems[`0`]);
10505	else
10506	constructor_op += build_composite_combiner(result_type, elems, length);
10507	constructor_op += " }";
10508	if (needs_trailing_tracket)
10509	constructor_op += ")";
10510	}
10511	else if (swizzle_splat && !composite)
10512	{
10513	constructor_op = remap_swizzle(get<SPIRType>(result_type), `1`, to_unpacked_expression(elems[`0`]));
10514	}
10515	else
10516	{
10517	constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
10518	if (type_is_empty(out_type) && !backend.supports_empty_struct)
10519	constructor_op += "0";
10520	else if (splat)
10521	constructor_op += to_unpacked_expression(elems[`0`]);
10522	else
10523	constructor_op += build_composite_combiner(result_type, elems, length);
10524	constructor_op += ")";
10525	}
10526
10527	if (!constructor_op.empty())
10528	{
10529	emit_op(result_type, id, constructor_op, forward);
10530	for (uint32_t i = `0`; i < length; i++)
10531	inherit_expression_dependencies(id, elems[i]);
10532	}
10533	break;
10534	}
10535
10536	case OpVectorInsertDynamic:
10537	{
10538	uint32_t result_type = ops[`0`];
10539	uint32_t id = ops[`1`];
10540	uint32_t vec = ops[`2`];
10541	uint32_t comp = ops[`3`];
10542	uint32_t index = ops[`4`];
10543
10544	flush_variable_declaration(vec);
10545
10546	// Make a copy, then use access chain to store the variable.
10547	statement(declare_temporary(result_type, id), to_expression(vec), ";");
10548	set<SPIRExpression>(id, to_name(id), result_type, true);
10549	auto chain = access_chain_internal(id, &index, `1`, `0`, nullptr);
10550	statement(chain, " = ", to_unpacked_expression(comp), ";");
10551	break;
10552	}
10553
10554	case OpVectorExtractDynamic:
10555	{
10556	uint32_t result_type = ops[`0`];
10557	uint32_t id = ops[`1`];
10558
10559	auto expr = access_chain_internal(ops[`2`], &ops[`3`], `1`, `0`, nullptr);
10560	emit_op(result_type, id, expr, should_forward(ops[`2`]));
10561	inherit_expression_dependencies(id, ops[`2`]);
10562	inherit_expression_dependencies(id, ops[`3`]);
10563	break;
10564	}
10565
10566	case OpCompositeExtract:
10567	{
10568	uint32_t result_type = ops[`0`];
10569	uint32_t id = ops[`1`];
10570	length -= `3`;
10571
10572	auto &type = get<SPIRType>(result_type);
10573
10574	// We can only split the expression here if our expression is forwarded as a temporary.
10575	bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
10576
10577	// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
10578	auto &composite_type = expression_type(ops[`2`]);
10579	bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct \|\| !composite_type.array.empty();
10580	if (composite_type_is_complex)
10581	allow_base_expression = false;
10582
10583	// Packed expressions or physical ID mapped expressions cannot be split up.
10584	if (has_extended_decoration(ops[`2`], SPIRVCrossDecorationPhysicalTypePacked) \|\|
10585	has_extended_decoration(ops[`2`], SPIRVCrossDecorationPhysicalTypeID))
10586	allow_base_expression = false;
10587
10588	// Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern
10589	// into the base expression.
10590	if (is_non_native_row_major_matrix(ops[`2`]))
10591	allow_base_expression = false;
10592
10593	AccessChainMeta meta;
10594	SPIRExpression e = nullptr*;
10595	auto *c = maybe_get<SPIRConstant>(ops[`2`]);
10596
10597	if (c && !c->specialization && !composite_type_is_complex)
10598	{
10599	auto expr = to_extract_constant_composite_expression(result_type, *c, ops + `3`, length);
10600	e = &emit_op(result_type, id, expr, true, true);
10601	}
10602	else if (allow_base_expression && should_forward(ops[`2`]) && type.vecsize == `1` && type.columns == `1` && length == `1`)
10603	{
10604	// Only apply this optimization if result is scalar.
10605
10606	// We want to split the access chain from the base.
10607	// This is so we can later combine different CompositeExtract results
10608	// with CompositeConstruct without emitting code like
10609	//
10610	// vec3 temp = texture(...).xyz
10611	// vec4(temp.x, temp.y, temp.z, 1.0).
10612	//
10613	// when we actually wanted to emit this
10614	// vec4(texture(...).xyz, 1.0).
10615	//
10616	// Including the base will prevent this and would trigger multiple reads
10617	// from expression causing it to be forced to an actual temporary in GLSL.
10618	auto expr = access_chain_internal(ops[`2`], &ops[`3`], length,
10619	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT \| ACCESS_CHAIN_CHAIN_ONLY_BIT \|
10620	ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10621	e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[`2`]));
10622	inherit_expression_dependencies(id, ops[`2`]);
10623	e->base_expression = ops[`2`];
10624	}
10625	else
10626	{
10627	auto expr = access_chain_internal(ops[`2`], &ops[`3`], length,
10628	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT \| ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta);
10629	e = &emit_op(result_type, id, expr, should_forward(ops[`2`]), should_suppress_usage_tracking(ops[`2`]));
10630	inherit_expression_dependencies(id, ops[`2`]);
10631	}
10632
10633	// Pass through some meta information to the loaded expression.
10634	// We can still end up loading a buffer type to a variable, then CompositeExtract from it
10635	// instead of loading everything through an access chain.
10636	e->need_transpose = meta.need_transpose;
10637	if (meta.storage_is_packed)
10638	set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked);
10639	if (meta.storage_physical_type != `0`)
10640	set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type);
10641	if (meta.storage_is_invariant)
10642	set_decoration(id, DecorationInvariant);
10643
10644	break;
10645	}
10646
10647	case OpCompositeInsert:
10648	{
10649	uint32_t result_type = ops[`0`];
10650	uint32_t id = ops[`1`];
10651	uint32_t obj = ops[`2`];
10652	uint32_t composite = ops[`3`];
10653	const auto *elems = &ops[`4`];
10654	length -= `4`;
10655
10656	flush_variable_declaration(composite);
10657
10658	// Make a copy, then use access chain to store the variable.
10659	statement(declare_temporary(result_type, id), to_expression(composite), ";");
10660	set<SPIRExpression>(id, to_name(id), result_type, true);
10661	auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr);
10662	statement(chain, " = ", to_unpacked_expression(obj), ";");
10663
10664	break;
10665	}
10666
10667	case OpCopyMemory:
10668	{
10669	uint32_t lhs = ops[`0`];
10670	uint32_t rhs = ops[`1`];
10671	if (lhs != rhs)
10672	{
10673	uint32_t &tmp_id = extra_sub_expressions [instruction.offset \| EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET];
10674	if (!tmp_id)
10675	tmp_id = ir.increase_bound_by(`1`);
10676	uint32_t tmp_type_id = expression_type(rhs).parent_type;
10677
10678	EmbeddedInstruction fake_load, fake_store;
10679	fake_load.op = OpLoad;
10680	fake_load.length = `3`;
10681	fake_load.ops.push_back(tmp_type_id);
10682	fake_load.ops.push_back(tmp_id);
10683	fake_load.ops.push_back(rhs);
10684
10685	fake_store.op = OpStore;
10686	fake_store.length = `2`;
10687	fake_store.ops.push_back(lhs);
10688	fake_store.ops.push_back(tmp_id);
10689
10690	// Load and Store do a lot* of workarounds, and we'd like to reuse them as much as possible.*
10691	// Synthesize a fake Load and Store pair for CopyMemory.
10692	emit_instruction(fake_load);
10693	emit_instruction(fake_store);
10694	}
10695	break;
10696	}
10697
10698	case OpCopyLogical:
10699	{
10700	// This is used for copying object of different types, arrays and structs.
10701	// We need to unroll the copy, element-by-element.
10702	uint32_t result_type = ops[`0`];
10703	uint32_t id = ops[`1`];
10704	uint32_t rhs = ops[`2`];
10705
10706	emit_uninitialized_temporary_expression(result_type, id);
10707	emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
10708	break;
10709	}
10710
10711	case OpCopyObject:
10712	{
10713	uint32_t result_type = ops[`0`];
10714	uint32_t id = ops[`1`];
10715	uint32_t rhs = ops[`2`];
10716	bool pointer = get<SPIRType>(result_type).pointer;
10717
10718	auto *chain = maybe_get<SPIRAccessChain>(rhs);
10719	auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs);
10720	if (chain)
10721	{
10722	// Cannot lower to a SPIRExpression, just copy the object.
10723	auto &e = set<SPIRAccessChain>(id, *chain);
10724	e.self = id;
10725	}
10726	else if (imgsamp)
10727	{
10728	// Cannot lower to a SPIRExpression, just copy the object.
10729	// GLSL does not currently use this type and will never get here, but MSL does.
10730	// Handled here instead of CompilerMSL for better integration and general handling,
10731	// and in case GLSL or other subclasses require it in the future.
10732	auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp);
10733	e.self = id;
10734	}
10735	else if (expression_is_lvalue(rhs) && !pointer)
10736	{
10737	// Need a copy.
10738	// For pointer types, we copy the pointer itself.
10739	statement(declare_temporary(result_type, id), to_unpacked_expression(rhs), ";");
10740	set<SPIRExpression>(id, to_name(id), result_type, true);
10741	}
10742	else
10743	{
10744	// RHS expression is immutable, so just forward it.
10745	// Copying these things really make no sense, but
10746	// seems to be allowed anyways.
10747	auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
10748	if (pointer)
10749	{
10750	auto *var = maybe_get_backing_variable(rhs);
10751	e.loaded_from = var ? var->self : ID (`0`);
10752	}
10753
10754	// If we're copying an access chain, need to inherit the read expressions.
10755	auto *rhs_expr = maybe_get<SPIRExpression>(rhs);
10756	if (rhs_expr)
10757	{
10758	e.implied_read_expressions = rhs_expr->implied_read_expressions;
10759	e.expression_dependencies = rhs_expr->expression_dependencies;
10760	}
10761	}
10762	break;
10763	}
10764
10765	case OpVectorShuffle:
10766	{
10767	uint32_t result_type = ops[`0`];
10768	uint32_t id = ops[`1`];
10769	uint32_t vec0 = ops[`2`];
10770	uint32_t vec1 = ops[`3`];
10771	const auto *elems = &ops[`4`];
10772	length -= `4`;
10773
10774	auto &type0 = expression_type(vec0);
10775
10776	// If we have the undefined swizzle index -1, we need to swizzle in undefined data,
10777	// or in our case, T(0).
10778	bool shuffle = false;
10779	for (uint32_t i = `0`; i < length; i++)
10780	if (elems[i] >= type0.vecsize \|\| elems[i] == `0xffffffffu`)
10781	shuffle = true;
10782
10783	// Cannot use swizzles with packed expressions, force shuffle path.
10784	if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked))
10785	shuffle = true;
10786
10787	string expr;
10788	bool should_fwd, trivial_forward;
10789
10790	if (shuffle)
10791	{
10792	should_fwd = should_forward(vec0) && should_forward(vec1);
10793	trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1);
10794
10795	// Constructor style and shuffling from two different vectors.
10796	SmallVector<string> args;
10797	for (uint32_t i = `0`; i < length; i++)
10798	{
10799	if (elems[i] == `0xffffffffu`)
10800	{
10801	// Use a constant 0 here.
10802	// We could use the first component or similar, but then we risk propagating
10803	// a value we might not need, and bog down codegen.
10804	SPIRConstant c;
10805	c.constant_type = type0.parent_type;
10806	assert(type0.parent_type != ID(`0`));
10807	args.push_back(constant_expression(c));
10808	}
10809	else if (elems[i] >= type0.vecsize)
10810	args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize));
10811	else
10812	args.push_back(to_extract_component_expression(vec0, elems[i]));
10813	}
10814	expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
10815	}
10816	else
10817	{
10818	should_fwd = should_forward(vec0);
10819	trivial_forward = should_suppress_usage_tracking(vec0);
10820
10821	// We only source from first vector, so can use swizzle.
10822	// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
10823	expr += to_enclosed_unpacked_expression(vec0);
10824	expr += ".";
10825	for (uint32_t i = `0`; i < length; i++)
10826	{
10827	assert(elems[i] != `0xffffffffu`);
10828	expr += index_to_swizzle(elems[i]);
10829	}
10830
10831	if (backend.swizzle_is_function && length > `1`)
10832	expr += "()";
10833	}
10834
10835	// A shuffle is trivial in that it doesn't actually do* anything.*
10836	// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
10837
10838	emit_op(result_type, id, expr, should_fwd, trivial_forward);
10839
10840	inherit_expression_dependencies(id, vec0);
10841	if (vec0 != vec1)
10842	inherit_expression_dependencies(id, vec1);
10843	break;
10844	}
10845
10846	// ALU
10847	case OpIsNan:
10848	GLSL_UFOP(isnan);
10849	break;
10850
10851	case OpIsInf:
10852	GLSL_UFOP(isinf);
10853	break;
10854
10855	case OpSNegate:
10856	case OpFNegate:
10857	GLSL_UOP(-);
10858	break;
10859
10860	case OpIAdd:
10861	{
10862	// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
10863	auto type = get<SPIRType>(ops[`0`]).basetype;
10864	GLSL_BOP_CAST(+, type);
10865	break;
10866	}
10867
10868	case OpFAdd:
10869	GLSL_BOP(+);
10870	break;
10871
10872	case OpISub:
10873	{
10874	auto type = get<SPIRType>(ops[`0`]).basetype;
10875	GLSL_BOP_CAST(-, type);
10876	break;
10877	}
10878
10879	case OpFSub:
10880	GLSL_BOP(-);
10881	break;
10882
10883	case OpIMul:
10884	{
10885	auto type = get<SPIRType>(ops[`0`]).basetype;
10886	GLSL_BOP_CAST(*, type);
10887	break;
10888	}
10889
10890	case OpVectorTimesMatrix:
10891	case OpMatrixTimesVector:
10892	{
10893	// If the matrix needs transpose, just flip the multiply order.
10894	auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? `2` : `3`]);
10895	if (e && e->need_transpose)
10896	{
10897	e->need_transpose = false;
10898	string expr;
10899
10900	if (opcode == OpMatrixTimesVector)
10901	expr = join(to_enclosed_unpacked_expression(ops[`3`]), " * ",
10902	enclose_expression(to_unpacked_row_major_matrix_expression(ops[`2`])));
10903	else
10904	expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[`3`])), " * ",
10905	to_enclosed_unpacked_expression(ops[`2`]));
10906
10907	bool forward = should_forward(ops[`2`]) && should_forward(ops[`3`]);
10908	emit_op(ops[`0`], ops[`1`], expr, forward);
10909	e->need_transpose = true;
10910	inherit_expression_dependencies(ops[`1`], ops[`2`]);
10911	inherit_expression_dependencies(ops[`1`], ops[`3`]);
10912	}
10913	else
10914	GLSL_BOP(*);
10915	break;
10916	}
10917
10918	case OpMatrixTimesMatrix:
10919	{
10920	auto *a = maybe_get<SPIRExpression>(ops[`2`]);
10921	auto *b = maybe_get<SPIRExpression>(ops[`3`]);
10922
10923	// If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed.
10924	// a^T b^T = (b * a)^T.*
10925	if (a && b && a->need_transpose && b->need_transpose)
10926	{
10927	a->need_transpose = false;
10928	b->need_transpose = false;
10929	auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[`3`])), " * ",
10930	enclose_expression(to_unpacked_row_major_matrix_expression(ops[`2`])));
10931	bool forward = should_forward(ops[`2`]) && should_forward(ops[`3`]);
10932	auto &e = emit_op(ops[`0`], ops[`1`], expr, forward);
10933	e.need_transpose = true;
10934	a->need_transpose = true;
10935	b->need_transpose = true;
10936	inherit_expression_dependencies(ops[`1`], ops[`2`]);
10937	inherit_expression_dependencies(ops[`1`], ops[`3`]);
10938	}
10939	else
10940	GLSL_BOP(*);
10941
10942	break;
10943	}
10944
10945	case OpFMul:
10946	case OpMatrixTimesScalar:
10947	case OpVectorTimesScalar:
10948	GLSL_BOP(*);
10949	break;
10950
10951	case OpOuterProduct:
10952	GLSL_BFOP(outerProduct);
10953	break;
10954
10955	case OpDot:
10956	GLSL_BFOP(dot);
10957	break;
10958
10959	case OpTranspose:
10960	if (options.version < `120`) // Matches GLSL 1.10 / ESSL 1.00
10961	{
10962	// transpose() is not available, so instead, flip need_transpose,
10963	// which can later be turned into an emulated transpose op by
10964	// convert_row_major_matrix(), if necessary.
10965	uint32_t result_type = ops[`0`];
10966	uint32_t result_id = ops[`1`];
10967	uint32_t input = ops[`2`];
10968
10969	// Force need_transpose to false temporarily to prevent
10970	// to_expression() from doing the transpose.
10971	bool need_transpose = false;
10972	auto *input_e = maybe_get<SPIRExpression>(input);
10973	if (input_e)
10974	swap(need_transpose, input_e->need_transpose);
10975
10976	bool forward = should_forward(input);
10977	auto &e = emit_op(result_type, result_id, to_expression(input), forward);
10978	e.need_transpose = !need_transpose;
10979
10980	// Restore the old need_transpose flag.
10981	if (input_e)
10982	input_e->need_transpose = need_transpose;
10983	}
10984	else
10985	GLSL_UFOP(transpose);
10986	break;
10987
10988	case OpSRem:
10989	{
10990	uint32_t result_type = ops[`0`];
10991	uint32_t result_id = ops[`1`];
10992	uint32_t op0 = ops[`2`];
10993	uint32_t op1 = ops[`3`];
10994
10995	// Needs special handling.
10996	bool forward = should_forward(op0) && should_forward(op1);
10997	auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(",
10998	to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
10999
11000	emit_op(result_type, result_id, expr, forward);
11001	inherit_expression_dependencies(result_id, op0);
11002	inherit_expression_dependencies(result_id, op1);
11003	break;
11004	}
11005
11006	case OpSDiv:
11007	GLSL_BOP_CAST(/, int_type);
11008	break;
11009
11010	case OpUDiv:
11011	GLSL_BOP_CAST(/, uint_type);
11012	break;
11013
11014	case OpIAddCarry:
11015	case OpISubBorrow:
11016	{
11017	if (options.es && options.version < `310`)
11018	SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
11019	else if (!options.es && options.version < `400`)
11020	SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400.");
11021
11022	uint32_t result_type = ops[`0`];
11023	uint32_t result_id = ops[`1`];
11024	uint32_t op0 = ops[`2`];
11025	uint32_t op1 = ops[`3`];
11026	auto &type = get<SPIRType>(result_type);
11027	emit_uninitialized_temporary_expression(result_type, result_id);
11028	const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow";
11029
11030	statement(to_expression(result_id), ".", to_member_name(type, `0`), " = ", op, "(", to_expression(op0), ", ",
11031	to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, `1`), ");");
11032	break;
11033	}
11034
11035	case OpUMulExtended:
11036	case OpSMulExtended:
11037	{
11038	if (options.es && options.version < `310`)
11039	SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310.");
11040	else if (!options.es && options.version < `400`)
11041	SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000.");
11042
11043	uint32_t result_type = ops[`0`];
11044	uint32_t result_id = ops[`1`];
11045	uint32_t op0 = ops[`2`];
11046	uint32_t op1 = ops[`3`];
11047	auto &type = get<SPIRType>(result_type);
11048	emit_uninitialized_temporary_expression(result_type, result_id);
11049	const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended";
11050
11051	statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".",
11052	to_member_name(type, `1`), ", ", to_expression(result_id), ".", to_member_name(type, `0`), ");");
11053	break;
11054	}
11055
11056	case OpFDiv:
11057	GLSL_BOP(/);
11058	break;
11059
11060	case OpShiftRightLogical:
11061	GLSL_BOP_CAST(>>, uint_type);
11062	break;
11063
11064	case OpShiftRightArithmetic:
11065	GLSL_BOP_CAST(>>, int_type);
11066	break;
11067
11068	case OpShiftLeftLogical:
11069	{
11070	auto type = get<SPIRType>(ops[`0`]).basetype;
11071	GLSL_BOP_CAST(<<, type);
11072	break;
11073	}
11074
11075	case OpBitwiseOr:
11076	{
11077	auto type = get<SPIRType>(ops[`0`]).basetype;
11078	GLSL_BOP_CAST(\|, type);
11079	break;
11080	}
11081
11082	case OpBitwiseXor:
11083	{
11084	auto type = get<SPIRType>(ops[`0`]).basetype;
11085	GLSL_BOP_CAST(^, type);
11086	break;
11087	}
11088
11089	case OpBitwiseAnd:
11090	{
11091	auto type = get<SPIRType>(ops[`0`]).basetype;
11092	GLSL_BOP_CAST(&, type);
11093	break;
11094	}
11095
11096	case OpNot:
11097	GLSL_UOP(~);
11098	break;
11099
11100	case OpUMod:
11101	GLSL_BOP_CAST(%, uint_type);
11102	break;
11103
11104	case OpSMod:
11105	GLSL_BOP_CAST(%, int_type);
11106	break;
11107
11108	case OpFMod:
11109	GLSL_BFOP(mod);
11110	break;
11111
11112	case OpFRem:
11113	{
11114	if (is_legacy())
11115	SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
11116	"needed for legacy.");
11117
11118	uint32_t result_type = ops[`0`];
11119	uint32_t result_id = ops[`1`];
11120	uint32_t op0 = ops[`2`];
11121	uint32_t op1 = ops[`3`];
11122
11123	// Needs special handling.
11124	bool forward = should_forward(op0) && should_forward(op1);
11125	auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
11126	to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
11127
11128	emit_op(result_type, result_id, expr, forward);
11129	inherit_expression_dependencies(result_id, op0);
11130	inherit_expression_dependencies(result_id, op1);
11131	break;
11132	}
11133
11134	// Relational
11135	case OpAny:
11136	GLSL_UFOP(any);
11137	break;
11138
11139	case OpAll:
11140	GLSL_UFOP(all);
11141	break;
11142
11143	case OpSelect:
11144	emit_mix_op(ops[`0`], ops[`1`], ops[`4`], ops[`3`], ops[`2`]);
11145	break;
11146
11147	case OpLogicalOr:
11148	{
11149	// No vector variant in GLSL for logical OR.
11150	auto result_type = ops[`0`];
11151	auto id = ops[`1`];
11152	auto &type = get<SPIRType>(result_type);
11153
11154	if (type.vecsize > `1`)
11155	emit_unrolled_binary_op(result_type, id, ops[`2`], ops[`3`], "\|\|", false, SPIRType::Unknown);
11156	else
11157	GLSL_BOP(\|\|);
11158	break;
11159	}
11160
11161	case OpLogicalAnd:
11162	{
11163	// No vector variant in GLSL for logical AND.
11164	auto result_type = ops[`0`];
11165	auto id = ops[`1`];
11166	auto &type = get<SPIRType>(result_type);
11167
11168	if (type.vecsize > `1`)
11169	emit_unrolled_binary_op(result_type, id, ops[`2`], ops[`3`], "&&", false, SPIRType::Unknown);
11170	else
11171	GLSL_BOP(&&);
11172	break;
11173	}
11174
11175	case OpLogicalNot:
11176	{
11177	auto &type = get<SPIRType>(ops[`0`]);
11178	if (type.vecsize > `1`)
11179	GLSL_UFOP(not );
11180	else
11181	GLSL_UOP(!);
11182	break;
11183	}
11184
11185	case OpIEqual:
11186	{
11187	if (expression_type(ops[`2`]).vecsize > `1`)
11188	GLSL_BFOP_CAST(equal, int_type);
11189	else
11190	GLSL_BOP_CAST(==, int_type);
11191	break;
11192	}
11193
11194	case OpLogicalEqual:
11195	case OpFOrdEqual:
11196	{
11197	if (expression_type(ops[`2`]).vecsize > `1`)
11198	GLSL_BFOP(equal);
11199	else
11200	GLSL_BOP(==);
11201	break;
11202	}
11203
11204	case OpINotEqual:
11205	{
11206	if (expression_type(ops[`2`]).vecsize > `1`)
11207	GLSL_BFOP_CAST(notEqual, int_type);
11208	else
11209	GLSL_BOP_CAST(!=, int_type);
11210	break;
11211	}
11212
11213	case OpLogicalNotEqual:
11214	case OpFOrdNotEqual:
11215	{
11216	if (expression_type(ops[`2`]).vecsize > `1`)
11217	GLSL_BFOP(notEqual);
11218	else
11219	GLSL_BOP(!=);
11220	break;
11221	}
11222
11223	case OpUGreaterThan:
11224	case OpSGreaterThan:
11225	{
11226	auto type = opcode == OpUGreaterThan ? uint_type : int_type;
11227	if (expression_type(ops[`2`]).vecsize > `1`)
11228	GLSL_BFOP_CAST(greaterThan, type);
11229	else
11230	GLSL_BOP_CAST(>, type);
11231	break;
11232	}
11233
11234	case OpFOrdGreaterThan:
11235	{
11236	if (expression_type(ops[`2`]).vecsize > `1`)
11237	GLSL_BFOP(greaterThan);
11238	else
11239	GLSL_BOP(>);
11240	break;
11241	}
11242
11243	case OpUGreaterThanEqual:
11244	case OpSGreaterThanEqual:
11245	{
11246	auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type;
11247	if (expression_type(ops[`2`]).vecsize > `1`)
11248	GLSL_BFOP_CAST(greaterThanEqual, type);
11249	else
11250	GLSL_BOP_CAST(>=, type);
11251	break;
11252	}
11253
11254	case OpFOrdGreaterThanEqual:
11255	{
11256	if (expression_type(ops[`2`]).vecsize > `1`)
11257	GLSL_BFOP(greaterThanEqual);
11258	else
11259	GLSL_BOP(>=);
11260	break;
11261	}
11262
11263	case OpULessThan:
11264	case OpSLessThan:
11265	{
11266	auto type = opcode == OpULessThan ? uint_type : int_type;
11267	if (expression_type(ops[`2`]).vecsize > `1`)
11268	GLSL_BFOP_CAST(lessThan, type);
11269	else
11270	GLSL_BOP_CAST(<, type);
11271	break;
11272	}
11273
11274	case OpFOrdLessThan:
11275	{
11276	if (expression_type(ops[`2`]).vecsize > `1`)
11277	GLSL_BFOP(lessThan);
11278	else
11279	GLSL_BOP(<);
11280	break;
11281	}
11282
11283	case OpULessThanEqual:
11284	case OpSLessThanEqual:
11285	{
11286	auto type = opcode == OpULessThanEqual ? uint_type : int_type;
11287	if (expression_type(ops[`2`]).vecsize > `1`)
11288	GLSL_BFOP_CAST(lessThanEqual, type);
11289	else
11290	GLSL_BOP_CAST(<=, type);
11291	break;
11292	}
11293
11294	case OpFOrdLessThanEqual:
11295	{
11296	if (expression_type(ops[`2`]).vecsize > `1`)
11297	GLSL_BFOP(lessThanEqual);
11298	else
11299	GLSL_BOP(<=);
11300	break;
11301	}
11302
11303	// Conversion
11304	case OpSConvert:
11305	case OpConvertSToF:
11306	case OpUConvert:
11307	case OpConvertUToF:
11308	{
11309	auto input_type = opcode == OpSConvert \|\| opcode == OpConvertSToF ? int_type : uint_type;
11310	uint32_t result_type = ops[`0`];
11311	uint32_t id = ops[`1`];
11312
11313	auto &type = get<SPIRType>(result_type);
11314	auto &arg_type = expression_type(ops[`2`]);
11315	auto func = type_to_glsl_constructor(type);
11316
11317	if (arg_type.width < type.width \|\| type_is_floating_point(type))
11318	emit_unary_func_op_cast(result_type, id, ops[`2`], func.c_str(), input_type, type.basetype);
11319	else
11320	emit_unary_func_op(result_type, id, ops[`2`], func.c_str());
11321	break;
11322	}
11323
11324	case OpConvertFToU:
11325	case OpConvertFToS:
11326	{
11327	// Cast to expected arithmetic type, then potentially bitcast away to desired signedness.
11328	uint32_t result_type = ops[`0`];
11329	uint32_t id = ops[`1`];
11330	auto &type = get<SPIRType>(result_type);
11331	auto expected_type = type;
11332	auto &float_type = expression_type(ops[`2`]);
11333	expected_type.basetype =
11334	opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width);
11335
11336	auto func = type_to_glsl_constructor(expected_type);
11337	emit_unary_func_op_cast(result_type, id, ops[`2`], func.c_str(), float_type.basetype, expected_type.basetype);
11338	break;
11339	}
11340
11341	case OpFConvert:
11342	{
11343	uint32_t result_type = ops[`0`];
11344	uint32_t id = ops[`1`];
11345
11346	auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
11347	emit_unary_func_op(result_type, id, ops[`2`], func.c_str());
11348	break;
11349	}
11350
11351	case OpBitcast:
11352	{
11353	uint32_t result_type = ops[`0`];
11354	uint32_t id = ops[`1`];
11355	uint32_t arg = ops[`2`];
11356
11357	if (!emit_complex_bitcast(result_type, id, arg))
11358	{
11359	auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
11360	emit_unary_func_op(result_type, id, arg, op.c_str());
11361	}
11362	break;
11363	}
11364
11365	case OpQuantizeToF16:
11366	{
11367	uint32_t result_type = ops[`0`];
11368	uint32_t id = ops[`1`];
11369	uint32_t arg = ops[`2`];
11370
11371	string op;
11372	auto &type = get<SPIRType>(result_type);
11373
11374	switch (type.vecsize)
11375	{
11376	case `1`:
11377	op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
11378	break;
11379	case `2`:
11380	op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
11381	break;
11382	case `3`:
11383	{
11384	auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11385	auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
11386	op = join("vec3(", op0, ", ", op1, ")");
11387	break;
11388	}
11389	case `4`:
11390	{
11391	auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
11392	auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
11393	op = join("vec4(", op0, ", ", op1, ")");
11394	break;
11395	}
11396	default:
11397	SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
11398	}
11399
11400	emit_op(result_type, id, op, should_forward(arg));
11401	inherit_expression_dependencies(id, arg);
11402	break;
11403	}
11404
11405	// Derivatives
11406	case OpDPdx:
11407	GLSL_UFOP(dFdx);
11408	if (is_legacy_es())
11409	require_extension_internal("GL_OES_standard_derivatives");
11410	register_control_dependent_expression(ops[`1`]);
11411	break;
11412
11413	case OpDPdy:
11414	GLSL_UFOP(dFdy);
11415	if (is_legacy_es())
11416	require_extension_internal("GL_OES_standard_derivatives");
11417	register_control_dependent_expression(ops[`1`]);
11418	break;
11419
11420	case OpDPdxFine:
11421	GLSL_UFOP(dFdxFine);
11422	if (options.es)
11423	{
11424	SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11425	}
11426	if (options.version < `450`)
11427	require_extension_internal("GL_ARB_derivative_control");
11428	register_control_dependent_expression(ops[`1`]);
11429	break;
11430
11431	case OpDPdyFine:
11432	GLSL_UFOP(dFdyFine);
11433	if (options.es)
11434	{
11435	SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11436	}
11437	if (options.version < `450`)
11438	require_extension_internal("GL_ARB_derivative_control");
11439	register_control_dependent_expression(ops[`1`]);
11440	break;
11441
11442	case OpDPdxCoarse:
11443	if (options.es)
11444	{
11445	SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11446	}
11447	GLSL_UFOP(dFdxCoarse);
11448	if (options.version < `450`)
11449	require_extension_internal("GL_ARB_derivative_control");
11450	register_control_dependent_expression(ops[`1`]);
11451	break;
11452
11453	case OpDPdyCoarse:
11454	GLSL_UFOP(dFdyCoarse);
11455	if (options.es)
11456	{
11457	SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11458	}
11459	if (options.version < `450`)
11460	require_extension_internal("GL_ARB_derivative_control");
11461	register_control_dependent_expression(ops[`1`]);
11462	break;
11463
11464	case OpFwidth:
11465	GLSL_UFOP(fwidth);
11466	if (is_legacy_es())
11467	require_extension_internal("GL_OES_standard_derivatives");
11468	register_control_dependent_expression(ops[`1`]);
11469	break;
11470
11471	case OpFwidthCoarse:
11472	GLSL_UFOP(fwidthCoarse);
11473	if (options.es)
11474	{
11475	SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11476	}
11477	if (options.version < `450`)
11478	require_extension_internal("GL_ARB_derivative_control");
11479	register_control_dependent_expression(ops[`1`]);
11480	break;
11481
11482	case OpFwidthFine:
11483	GLSL_UFOP(fwidthFine);
11484	if (options.es)
11485	{
11486	SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
11487	}
11488	if (options.version < `450`)
11489	require_extension_internal("GL_ARB_derivative_control");
11490	register_control_dependent_expression(ops[`1`]);
11491	break;
11492
11493	// Bitfield
11494	case OpBitFieldInsert:
11495	{
11496	emit_bitfield_insert_op(ops[`0`], ops[`1`], ops[`2`], ops[`3`], ops[`4`], ops[`5`], "bitfieldInsert", SPIRType::Int);
11497	break;
11498	}
11499
11500	case OpBitFieldSExtract:
11501	{
11502	emit_trinary_func_op_bitextract(ops[`0`], ops[`1`], ops[`2`], ops[`3`], ops[`4`], "bitfieldExtract", int_type, int_type,
11503	SPIRType::Int, SPIRType::Int);
11504	break;
11505	}
11506
11507	case OpBitFieldUExtract:
11508	{
11509	emit_trinary_func_op_bitextract(ops[`0`], ops[`1`], ops[`2`], ops[`3`], ops[`4`], "bitfieldExtract", uint_type, uint_type,
11510	SPIRType::Int, SPIRType::Int);
11511	break;
11512	}
11513
11514	case OpBitReverse:
11515	// BitReverse does not have issues with sign since result type must match input type.
11516	GLSL_UFOP(bitfieldReverse);
11517	break;
11518
11519	case OpBitCount:
11520	{
11521	auto basetype = expression_type(ops[`2`]).basetype;
11522	emit_unary_func_op_cast(ops[`0`], ops[`1`], ops[`2`], "bitCount", basetype, int_type);
11523	break;
11524	}
11525
11526	// Atomics
11527	case OpAtomicExchange:
11528	{
11529	uint32_t result_type = ops[`0`];
11530	uint32_t id = ops[`1`];
11531	uint32_t ptr = ops[`2`];
11532	// Ignore semantics for now, probably only relevant to CL.
11533	uint32_t val = ops[`5`];
11534	const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11535
11536	emit_atomic_func_op(result_type, id, ptr, val, op);
11537	break;
11538	}
11539
11540	case OpAtomicCompareExchange:
11541	{
11542	uint32_t result_type = ops[`0`];
11543	uint32_t id = ops[`1`];
11544	uint32_t ptr = ops[`2`];
11545	uint32_t val = ops[`6`];
11546	uint32_t comp = ops[`7`];
11547	const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
11548
11549	emit_atomic_func_op(result_type, id, ptr, comp, val, op);
11550	break;
11551	}
11552
11553	case OpAtomicLoad:
11554	{
11555	// In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out.
11556	// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11557	auto &type = expression_type(ops[`2`]);
11558	forced_temporaries.insert(ops[`1`]);
11559	bool atomic_image = check_atomic_image(ops[`2`]);
11560	bool unsigned_type = (type.basetype == SPIRType::UInt) \|\|
11561	(atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11562	const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11563	const char *increment = unsigned_type ? "0u" : "0";
11564	emit_op(ops[`0`], ops[`1`],
11565	join(op, "(",
11566	to_non_uniform_aware_expression(ops[`2`]), ", ", increment, ")"), false);
11567	flush_all_atomic_capable_variables();
11568	break;
11569	}
11570
11571	case OpAtomicStore:
11572	{
11573	// In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result.
11574	// Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL.
11575	uint32_t ptr = ops[`0`];
11576	// Ignore semantics for now, probably only relevant to CL.
11577	uint32_t val = ops[`3`];
11578	const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
11579	statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
11580	flush_all_atomic_capable_variables();
11581	break;
11582	}
11583
11584	case OpAtomicIIncrement:
11585	case OpAtomicIDecrement:
11586	{
11587	forced_temporaries.insert(ops[`1`]);
11588	auto &type = expression_type(ops[`2`]);
11589	if (type.storage == StorageClassAtomicCounter)
11590	{
11591	// Legacy GLSL stuff, not sure if this is relevant to support.
11592	if (opcode == OpAtomicIIncrement)
11593	GLSL_UFOP(atomicCounterIncrement);
11594	else
11595	GLSL_UFOP(atomicCounterDecrement);
11596	}
11597	else
11598	{
11599	bool atomic_image = check_atomic_image(ops[`2`]);
11600	bool unsigned_type = (type.basetype == SPIRType::UInt) \|\|
11601	(atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt);
11602	const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd";
11603
11604	const char increment = nullptr*;
11605	if (opcode == OpAtomicIIncrement && unsigned_type)
11606	increment = "1u";
11607	else if (opcode == OpAtomicIIncrement)
11608	increment = "1";
11609	else if (unsigned_type)
11610	increment = "uint(-1)";
11611	else
11612	increment = "-1";
11613
11614	emit_op(ops[`0`], ops[`1`],
11615	join(op, "(", to_non_uniform_aware_expression(ops[`2`]), ", ", increment, ")"), false);
11616	}
11617
11618	flush_all_atomic_capable_variables();
11619	break;
11620	}
11621
11622	case OpAtomicIAdd:
11623	{
11624	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicAdd" : "atomicAdd";
11625	emit_atomic_func_op(ops[`0`], ops[`1`], ops[`2`], ops[`5`], op);
11626	break;
11627	}
11628
11629	case OpAtomicISub:
11630	{
11631	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicAdd" : "atomicAdd";
11632	forced_temporaries.insert(ops[`1`]);
11633	auto expr = join(op, "(", to_non_uniform_aware_expression(ops[`2`]), ", -", to_enclosed_expression(ops[`5`]), ")");
11634	emit_op(ops[`0`], ops[`1`], expr, should_forward(ops[`2`]) && should_forward(ops[`5`]));
11635	flush_all_atomic_capable_variables();
11636	break;
11637	}
11638
11639	case OpAtomicSMin:
11640	case OpAtomicUMin:
11641	{
11642	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicMin" : "atomicMin";
11643	emit_atomic_func_op(ops[`0`], ops[`1`], ops[`2`], ops[`5`], op);
11644	break;
11645	}
11646
11647	case OpAtomicSMax:
11648	case OpAtomicUMax:
11649	{
11650	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicMax" : "atomicMax";
11651	emit_atomic_func_op(ops[`0`], ops[`1`], ops[`2`], ops[`5`], op);
11652	break;
11653	}
11654
11655	case OpAtomicAnd:
11656	{
11657	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicAnd" : "atomicAnd";
11658	emit_atomic_func_op(ops[`0`], ops[`1`], ops[`2`], ops[`5`], op);
11659	break;
11660	}
11661
11662	case OpAtomicOr:
11663	{
11664	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicOr" : "atomicOr";
11665	emit_atomic_func_op(ops[`0`], ops[`1`], ops[`2`], ops[`5`], op);
11666	break;
11667	}
11668
11669	case OpAtomicXor:
11670	{
11671	const char *op = check_atomic_image(ops[`2`]) ? "imageAtomicXor" : "atomicXor";
11672	emit_atomic_func_op(ops[`0`], ops[`1`], ops[`2`], ops[`5`], op);
11673	break;
11674	}
11675
11676	// Geometry shaders
11677	case OpEmitVertex:
11678	statement("EmitVertex();");
11679	break;
11680
11681	case OpEndPrimitive:
11682	statement("EndPrimitive();");
11683	break;
11684
11685	case OpEmitStreamVertex:
11686	{
11687	if (options.es)
11688	SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11689	else if (!options.es && options.version < `400`)
11690	SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11691
11692	auto stream_expr = to_expression(ops[`0`]);
11693	if (expression_type(ops[`0`]).basetype != SPIRType::Int)
11694	stream_expr = join("int(", stream_expr, ")");
11695	statement("EmitStreamVertex(", stream_expr, ");");
11696	break;
11697	}
11698
11699	case OpEndStreamPrimitive:
11700	{
11701	if (options.es)
11702	SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES.");
11703	else if (!options.es && options.version < `400`)
11704	SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400.");
11705
11706	auto stream_expr = to_expression(ops[`0`]);
11707	if (expression_type(ops[`0`]).basetype != SPIRType::Int)
11708	stream_expr = join("int(", stream_expr, ")");
11709	statement("EndStreamPrimitive(", stream_expr, ");");
11710	break;
11711	}
11712
11713	// Textures
11714	case OpImageSampleExplicitLod:
11715	case OpImageSampleProjExplicitLod:
11716	case OpImageSampleDrefExplicitLod:
11717	case OpImageSampleProjDrefExplicitLod:
11718	case OpImageSampleImplicitLod:
11719	case OpImageSampleProjImplicitLod:
11720	case OpImageSampleDrefImplicitLod:
11721	case OpImageSampleProjDrefImplicitLod:
11722	case OpImageFetch:
11723	case OpImageGather:
11724	case OpImageDrefGather:
11725	// Gets a bit hairy, so move this to a separate instruction.
11726	emit_texture_op(instruction, false);
11727	break;
11728
11729	case OpImageSparseSampleExplicitLod:
11730	case OpImageSparseSampleProjExplicitLod:
11731	case OpImageSparseSampleDrefExplicitLod:
11732	case OpImageSparseSampleProjDrefExplicitLod:
11733	case OpImageSparseSampleImplicitLod:
11734	case OpImageSparseSampleProjImplicitLod:
11735	case OpImageSparseSampleDrefImplicitLod:
11736	case OpImageSparseSampleProjDrefImplicitLod:
11737	case OpImageSparseFetch:
11738	case OpImageSparseGather:
11739	case OpImageSparseDrefGather:
11740	// Gets a bit hairy, so move this to a separate instruction.
11741	emit_texture_op(instruction, true);
11742	break;
11743
11744	case OpImageSparseTexelsResident:
11745	if (options.es)
11746	SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL.");
11747	require_extension_internal("GL_ARB_sparse_texture2");
11748	emit_unary_func_op_cast(ops[`0`], ops[`1`], ops[`2`], "sparseTexelsResidentARB", int_type, SPIRType::Boolean);
11749	break;
11750
11751	case OpImage:
11752	{
11753	uint32_t result_type = ops[`0`];
11754	uint32_t id = ops[`1`];
11755
11756	// Suppress usage tracking.
11757	auto &e = emit_op(result_type, id, to_expression(ops[`2`]), true, true);
11758
11759	// When using the image, we need to know which variable it is actually loaded from.
11760	auto *var = maybe_get_backing_variable(ops[`2`]);
11761	e.loaded_from = var ? var->self : ID (`0`);
11762	break;
11763	}
11764
11765	case OpImageQueryLod:
11766	{
11767	const char op = nullptr*;
11768	if (!options.es && options.version < `400`)
11769	{
11770	require_extension_internal("GL_ARB_texture_query_lod");
11771	// For some reason, the ARB spec is all-caps.
11772	op = "textureQueryLOD";
11773	}
11774	else if (options.es)
11775	SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
11776	else
11777	op = "textureQueryLod";
11778
11779	auto sampler_expr = to_expression(ops[`2`]);
11780	if (has_decoration(ops[`2`], DecorationNonUniform))
11781	{
11782	if (maybe_get_backing_variable(ops[`2`]))
11783	convert_non_uniform_expression(sampler_expr, ops[`2`]);
11784	else if (*backend.nonuniform_qualifier != `'\0'`)
11785	sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")");
11786	}
11787
11788	bool forward = should_forward(ops[`3`]);
11789	emit_op(ops[`0`], ops[`1`],
11790	join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[`3`]), ")"),
11791	forward);
11792	inherit_expression_dependencies(ops[`1`], ops[`2`]);
11793	inherit_expression_dependencies(ops[`1`], ops[`3`]);
11794	register_control_dependent_expression(ops[`1`]);
11795	break;
11796	}
11797
11798	case OpImageQueryLevels:
11799	{
11800	uint32_t result_type = ops[`0`];
11801	uint32_t id = ops[`1`];
11802
11803	if (!options.es && options.version < `430`)
11804	require_extension_internal("GL_ARB_texture_query_levels");
11805	if (options.es)
11806	SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
11807
11808	auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[`2`]), ")");
11809	auto &restype = get<SPIRType>(ops[`0`]);
11810	expr = bitcast_expression(restype, SPIRType::Int, expr);
11811	emit_op(result_type, id, expr, true);
11812	break;
11813	}
11814
11815	case OpImageQuerySamples:
11816	{
11817	auto &type = expression_type(ops[`2`]);
11818	uint32_t result_type = ops[`0`];
11819	uint32_t id = ops[`1`];
11820
11821	string expr;
11822	if (type.image.sampled == `2`)
11823	expr = join("imageSamples(", to_non_uniform_aware_expression(ops[`2`]), ")");
11824	else
11825	expr = join("textureSamples(", convert_separate_image_to_expression(ops[`2`]), ")");
11826
11827	auto &restype = get<SPIRType>(ops[`0`]);
11828	expr = bitcast_expression(restype, SPIRType::Int, expr);
11829	emit_op(result_type, id, expr, true);
11830	break;
11831	}
11832
11833	case OpSampledImage:
11834	{
11835	uint32_t result_type = ops[`0`];
11836	uint32_t id = ops[`1`];
11837	emit_sampled_image_op(result_type, id, ops[`2`], ops[`3`]);
11838	inherit_expression_dependencies(id, ops[`2`]);
11839	inherit_expression_dependencies(id, ops[`3`]);
11840	break;
11841	}
11842
11843	case OpImageQuerySizeLod:
11844	{
11845	uint32_t result_type = ops[`0`];
11846	uint32_t id = ops[`1`];
11847	uint32_t img = ops[`2`];
11848
11849	std::string fname = "textureSize";
11850	if (is_legacy_desktop())
11851	{
11852	auto &type = expression_type(img);
11853	auto &imgtype = get<SPIRType>(type.self);
11854	fname = legacy_tex_op(fname, imgtype, img);
11855	}
11856	else if (is_legacy_es())
11857	SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100.");
11858
11859	auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ",
11860	bitcast_expression(SPIRType::Int, ops[`3`]), ")");
11861	auto &restype = get<SPIRType>(ops[`0`]);
11862	expr = bitcast_expression(restype, SPIRType::Int, expr);
11863	emit_op(result_type, id, expr, true);
11864	break;
11865	}
11866
11867	// Image load/store
11868	case OpImageRead:
11869	case OpImageSparseRead:
11870	{
11871	// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
11872	// not adding the proper qualifiers.
11873	// If it turns out we need to read the image after all, remove the qualifier and recompile.
11874	auto *var = maybe_get_backing_variable(ops[`2`]);
11875	if (var)
11876	{
11877	auto &flags = ir.meta [var->self].decoration.decoration_flags;
11878	if (flags.get(DecorationNonReadable))
11879	{
11880	flags.clear(DecorationNonReadable);
11881	force_recompile();
11882	}
11883	}
11884
11885	uint32_t result_type = ops[`0`];
11886	uint32_t id = ops[`1`];
11887
11888	bool pure;
11889	string imgexpr;
11890	auto &type = expression_type(ops[`2`]);
11891
11892	if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
11893	{
11894	if (type.image.ms)
11895	SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
11896
11897	auto itr =
11898	find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
11899
11900	if (itr == end(pls_inputs))
11901	{
11902	// For non-PLS inputs, we rely on subpass type remapping information to get it right
11903	// since ImageRead always returns 4-component vectors and the backing type is opaque.
11904	if (!var->remapped_components)
11905	SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
11906	imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[`2`]));
11907	}
11908	else
11909	{
11910	// PLS input could have different number of components than what the SPIR expects, swizzle to
11911	// the appropriate vector size.
11912	uint32_t components = pls_format_to_components(itr ->format);
11913	imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[`2`]));
11914	}
11915	pure = true;
11916	}
11917	else if (type.image.dim == DimSubpassData)
11918	{
11919	if (var && subpass_input_is_framebuffer_fetch(var->self))
11920	{
11921	imgexpr = to_expression(var->self);
11922	}
11923	else if (options.vulkan_semantics)
11924	{
11925	// With Vulkan semantics, use the proper Vulkan GLSL construct.
11926	if (type.image.ms)
11927	{
11928	uint32_t operands = ops[`4`];
11929	if (operands != ImageOperandsSampleMask \|\| length != `6`)
11930	SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11931	"operand mask was used.");
11932
11933	uint32_t samples = ops[`5`];
11934	imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[`2`]), ", ", to_expression(samples), ")");
11935	}
11936	else
11937	imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[`2`]), ")");
11938	}
11939	else
11940	{
11941	if (type.image.ms)
11942	{
11943	uint32_t operands = ops[`4`];
11944	if (operands != ImageOperandsSampleMask \|\| length != `6`)
11945	SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11946	"operand mask was used.");
11947
11948	uint32_t samples = ops[`5`];
11949	imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[`2`]), ", ivec2(gl_FragCoord.xy), ",
11950	to_expression(samples), ")");
11951	}
11952	else
11953	{
11954	// Implement subpass loads via texture barrier style sampling.
11955	imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[`2`]), ", ivec2(gl_FragCoord.xy), 0)");
11956	}
11957	}
11958	imgexpr = remap_swizzle(get<SPIRType>(result_type), `4`, imgexpr);
11959	pure = true;
11960	}
11961	else
11962	{
11963	bool sparse = opcode == OpImageSparseRead;
11964	uint32_t sparse_code_id = `0`;
11965	uint32_t sparse_texel_id = `0`;
11966	if (sparse)
11967	emit_sparse_feedback_temporaries(ops[`0`], ops[`1`], sparse_code_id, sparse_texel_id);
11968
11969	// imageLoad only accepts int coords, not uint.
11970	auto coord_expr = to_expression(ops[`3`]);
11971	auto target_coord_type = expression_type(ops[`3`]);
11972	target_coord_type.basetype = SPIRType::Int;
11973	coord_expr = bitcast_expression(target_coord_type, expression_type(ops[`3`]).basetype, coord_expr);
11974
11975	// Plain image load/store.
11976	if (sparse)
11977	{
11978	if (type.image.ms)
11979	{
11980	uint32_t operands = ops[`4`];
11981	if (operands != ImageOperandsSampleMask \|\| length != `6`)
11982	SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
11983	"operand mask was used.");
11984
11985	uint32_t samples = ops[`5`];
11986	statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[`2`]), ", ",
11987	coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");");
11988	}
11989	else
11990	{
11991	statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[`2`]), ", ",
11992	coord_expr, ", ", to_expression(sparse_texel_id), ");");
11993	}
11994	imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ",
11995	to_expression(sparse_texel_id), ")");
11996	}
11997	else
11998	{
11999	if (type.image.ms)
12000	{
12001	uint32_t operands = ops[`4`];
12002	if (operands != ImageOperandsSampleMask \|\| length != `6`)
12003	SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected "
12004	"operand mask was used.");
12005
12006	uint32_t samples = ops[`5`];
12007	imgexpr =
12008	join("imageLoad(", to_non_uniform_aware_expression(ops[`2`]), ", ", coord_expr, ", ", to_expression(samples), ")");
12009	}
12010	else
12011	imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[`2`]), ", ", coord_expr, ")");
12012	}
12013
12014	if (!sparse)
12015	imgexpr = remap_swizzle(get<SPIRType>(result_type), `4`, imgexpr);
12016	pure = false;
12017	}
12018
12019	if (var && var->forwardable)
12020	{
12021	bool forward = forced_temporaries.find(id) == end(forced_temporaries);
12022	auto &e = emit_op(result_type, id, imgexpr, forward);
12023
12024	// We only need to track dependencies if we're reading from image load/store.
12025	if (!pure)
12026	{
12027	e.loaded_from = var->self;
12028	if (forward)
12029	var->dependees.push_back(id);
12030	}
12031	}
12032	else
12033	emit_op(result_type, id, imgexpr, false);
12034
12035	inherit_expression_dependencies(id, ops[`2`]);
12036	if (type.image.ms)
12037	inherit_expression_dependencies(id, ops[`5`]);
12038	break;
12039	}
12040
12041	case OpImageTexelPointer:
12042	{
12043	uint32_t result_type = ops[`0`];
12044	uint32_t id = ops[`1`];
12045
12046	auto coord_expr = to_expression(ops[`3`]);
12047	auto target_coord_type = expression_type(ops[`3`]);
12048	target_coord_type.basetype = SPIRType::Int;
12049	coord_expr = bitcast_expression(target_coord_type, expression_type(ops[`3`]).basetype, coord_expr);
12050
12051	auto expr = join(to_expression(ops[`2`]), ", ", coord_expr);
12052	auto &e = set<SPIRExpression>(id, expr, result_type, true);
12053
12054	// When using the pointer, we need to know which variable it is actually loaded from.
12055	auto *var = maybe_get_backing_variable(ops[`2`]);
12056	e.loaded_from = var ? var->self : ID (`0`);
12057	inherit_expression_dependencies(id, ops[`3`]);
12058	break;
12059	}
12060
12061	case OpImageWrite:
12062	{
12063	// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
12064	// not adding the proper qualifiers.
12065	// If it turns out we need to write to the image after all, remove the qualifier and recompile.
12066	auto *var = maybe_get_backing_variable(ops[`0`]);
12067	if (var)
12068	{
12069	auto &flags = ir.meta [var->self].decoration.decoration_flags;
12070	if (flags.get(DecorationNonWritable))
12071	{
12072	flags.clear(DecorationNonWritable);
12073	force_recompile();
12074	}
12075	}
12076
12077	auto &type = expression_type(ops[`0`]);
12078	auto &value_type = expression_type(ops[`2`]);
12079	auto store_type = value_type;
12080	store_type.vecsize = `4`;
12081
12082	// imageStore only accepts int coords, not uint.
12083	auto coord_expr = to_expression(ops[`1`]);
12084	auto target_coord_type = expression_type(ops[`1`]);
12085	target_coord_type.basetype = SPIRType::Int;
12086	coord_expr = bitcast_expression(target_coord_type, expression_type(ops[`1`]).basetype, coord_expr);
12087
12088	if (type.image.ms)
12089	{
12090	uint32_t operands = ops[`3`];
12091	if (operands != ImageOperandsSampleMask \|\| length != `5`)
12092	SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
12093	uint32_t samples = ops[`4`];
12094	statement("imageStore(", to_non_uniform_aware_expression(ops[`0`]), ", ", coord_expr, ", ", to_expression(samples), ", ",
12095	remap_swizzle(store_type, value_type.vecsize, to_expression(ops[`2`])), ");");
12096	}
12097	else
12098	statement("imageStore(", to_non_uniform_aware_expression(ops[`0`]), ", ", coord_expr, ", ",
12099	remap_swizzle(store_type, value_type.vecsize, to_expression(ops[`2`])), ");");
12100
12101	if (var && variable_storage_is_aliased(*var))
12102	flush_all_aliased_variables();
12103	break;
12104	}
12105
12106	case OpImageQuerySize:
12107	{
12108	auto &type = expression_type(ops[`2`]);
12109	uint32_t result_type = ops[`0`];
12110	uint32_t id = ops[`1`];
12111
12112	if (type.basetype == SPIRType::Image)
12113	{
12114	string expr;
12115	if (type.image.sampled == `2`)
12116	{
12117	if (!options.es && options.version < `430`)
12118	require_extension_internal("GL_ARB_shader_image_size");
12119	else if (options.es && options.version < `310`)
12120	SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize.");
12121
12122	// The size of an image is always constant.
12123	expr = join("imageSize(", to_non_uniform_aware_expression(ops[`2`]), ")");
12124	}
12125	else
12126	{
12127	// This path is hit for samplerBuffers and multisampled images which do not have LOD.
12128	std::string fname = "textureSize";
12129	if (is_legacy())
12130	{
12131	auto &imgtype = get<SPIRType>(type.self);
12132	fname = legacy_tex_op(fname, imgtype, ops[`2`]);
12133	}
12134	expr = join(fname, "(", convert_separate_image_to_expression(ops[`2`]), ")");
12135	}
12136
12137	auto &restype = get<SPIRType>(ops[`0`]);
12138	expr = bitcast_expression(restype, SPIRType::Int, expr);
12139	emit_op(result_type, id, expr, true);
12140	}
12141	else
12142	SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
12143	break;
12144	}
12145
12146	// Compute
12147	case OpControlBarrier:
12148	case OpMemoryBarrier:
12149	{
12150	uint32_t execution_scope = `0`;
12151	uint32_t memory;
12152	uint32_t semantics;
12153
12154	if (opcode == OpMemoryBarrier)
12155	{
12156	memory = evaluate_constant_u32(ops[`0`]);
12157	semantics = evaluate_constant_u32(ops[`1`]);
12158	}
12159	else
12160	{
12161	execution_scope = evaluate_constant_u32(ops[`0`]);
12162	memory = evaluate_constant_u32(ops[`1`]);
12163	semantics = evaluate_constant_u32(ops[`2`]);
12164	}
12165
12166	if (execution_scope == ScopeSubgroup \|\| memory == ScopeSubgroup)
12167	{
12168	// OpControlBarrier with ScopeSubgroup is subgroupBarrier()
12169	if (opcode != OpControlBarrier)
12170	{
12171	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier);
12172	}
12173	else
12174	{
12175	request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier);
12176	}
12177	}
12178
12179	if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
12180	{
12181	// Control shaders only have barriers, and it implies memory barriers.
12182	if (opcode == OpControlBarrier)
12183	statement("barrier();");
12184	break;
12185	}
12186
12187	// We only care about these flags, acquire/release and friends are not relevant to GLSL.
12188	semantics = mask_relevant_memory_semantics(semantics);
12189
12190	if (opcode == OpMemoryBarrier)
12191	{
12192	// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
12193	// does what we need, so we avoid redundant barriers.
12194	const Instruction *next = get_next_instruction_in_block(instruction);
12195	if (next && next->op == OpControlBarrier)
12196	{
12197	auto next_ops = stream(next);
12198	uint32_t next_memory = evaluate_constant_u32(next_ops[`1`]);
12199	uint32_t next_semantics = evaluate_constant_u32(next_ops[`2`]);
12200	next_semantics = mask_relevant_memory_semantics(next_semantics);
12201
12202	bool memory_scope_covered = false;
12203	if (next_memory == memory)
12204	memory_scope_covered = true;
12205	else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
12206	{
12207	// If we only care about workgroup memory, either Device or Workgroup scope is fine,
12208	// scope does not have to match.
12209	if ((next_memory == ScopeDevice \|\| next_memory == ScopeWorkgroup) &&
12210	(memory == ScopeDevice \|\| memory == ScopeWorkgroup))
12211	{
12212	memory_scope_covered = true;
12213	}
12214	}
12215	else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
12216	{
12217	// The control barrier has device scope, but the memory barrier just has workgroup scope.
12218	memory_scope_covered = true;
12219	}
12220
12221	// If we have the same memory scope, and all memory types are covered, we're good.
12222	if (memory_scope_covered && (semantics & next_semantics) == semantics)
12223	break;
12224	}
12225	}
12226
12227	// We are synchronizing some memory or syncing execution,
12228	// so we cannot forward any loads beyond the memory barrier.
12229	if (semantics \|\| opcode == OpControlBarrier)
12230	{
12231	assert(current_emitting_block);
12232	flush_control_dependent_expressions(current_emitting_block->self);
12233	flush_all_active_variables();
12234	}
12235
12236	if (memory == ScopeWorkgroup) // Only need to consider memory within a group
12237	{
12238	if (semantics == MemorySemanticsWorkgroupMemoryMask)
12239	{
12240	// OpControlBarrier implies a memory barrier for shared memory as well.
12241	bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup;
12242	if (!implies_shared_barrier)
12243	statement("memoryBarrierShared();");
12244	}
12245	else if (semantics != `0`)
12246	statement("groupMemoryBarrier();");
12247	}
12248	else if (memory == ScopeSubgroup)
12249	{
12250	const uint32_t all_barriers =
12251	MemorySemanticsWorkgroupMemoryMask \| MemorySemanticsUniformMemoryMask \| MemorySemanticsImageMemoryMask;
12252
12253	if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask \| MemorySemanticsSubgroupMemoryMask))
12254	{
12255	// These are not relevant for GLSL, but assume it means memoryBarrier().
12256	// memoryBarrier() does everything, so no need to test anything else.
12257	statement("subgroupMemoryBarrier();");
12258	}
12259	else if ((semantics & all_barriers) == all_barriers)
12260	{
12261	// Short-hand instead of emitting 3 barriers.
12262	statement("subgroupMemoryBarrier();");
12263	}
12264	else
12265	{
12266	// Pick out individual barriers.
12267	if (semantics & MemorySemanticsWorkgroupMemoryMask)
12268	statement("subgroupMemoryBarrierShared();");
12269	if (semantics & MemorySemanticsUniformMemoryMask)
12270	statement("subgroupMemoryBarrierBuffer();");
12271	if (semantics & MemorySemanticsImageMemoryMask)
12272	statement("subgroupMemoryBarrierImage();");
12273	}
12274	}
12275	else
12276	{
12277	const uint32_t all_barriers =
12278	MemorySemanticsWorkgroupMemoryMask \| MemorySemanticsUniformMemoryMask \| MemorySemanticsImageMemoryMask;
12279
12280	if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask \| MemorySemanticsSubgroupMemoryMask))
12281	{
12282	// These are not relevant for GLSL, but assume it means memoryBarrier().
12283	// memoryBarrier() does everything, so no need to test anything else.
12284	statement("memoryBarrier();");
12285	}
12286	else if ((semantics & all_barriers) == all_barriers)
12287	{
12288	// Short-hand instead of emitting 4 barriers.
12289	statement("memoryBarrier();");
12290	}
12291	else
12292	{
12293	// Pick out individual barriers.
12294	if (semantics & MemorySemanticsWorkgroupMemoryMask)
12295	statement("memoryBarrierShared();");
12296	if (semantics & MemorySemanticsUniformMemoryMask)
12297	statement("memoryBarrierBuffer();");
12298	if (semantics & MemorySemanticsImageMemoryMask)
12299	statement("memoryBarrierImage();");
12300	}
12301	}
12302
12303	if (opcode == OpControlBarrier)
12304	{
12305	if (execution_scope == ScopeSubgroup)
12306	statement("subgroupBarrier();");
12307	else
12308	statement("barrier();");
12309	}
12310	break;
12311	}
12312
12313	case OpExtInst:
12314	{
12315	uint32_t extension_set = ops[`2`];
12316
12317	if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
12318	{
12319	emit_glsl_op(ops[`0`], ops[`1`], ops[`3`], &ops[`4`], length - `4`);
12320	}
12321	else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
12322	{
12323	emit_spv_amd_shader_ballot_op(ops[`0`], ops[`1`], ops[`3`], &ops[`4`], length - `4`);
12324	}
12325	else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
12326	{
12327	emit_spv_amd_shader_explicit_vertex_parameter_op(ops[`0`], ops[`1`], ops[`3`], &ops[`4`], length - `4`);
12328	}
12329	else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
12330	{
12331	emit_spv_amd_shader_trinary_minmax_op(ops[`0`], ops[`1`], ops[`3`], &ops[`4`], length - `4`);
12332	}
12333	else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
12334	{
12335	emit_spv_amd_gcn_shader_op(ops[`0`], ops[`1`], ops[`3`], &ops[`4`], length - `4`);
12336	}
12337	else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_debug_info)
12338	{
12339	break; // Ignore SPIR-V debug information extended instructions.
12340	}
12341	else
12342	{
12343	statement("// unimplemented ext op ", instruction.op);
12344	break;
12345	}
12346
12347	break;
12348	}
12349
12350	// Legacy sub-group stuff ...
12351	case OpSubgroupBallotKHR:
12352	{
12353	uint32_t result_type = ops[`0`];
12354	uint32_t id = ops[`1`];
12355	string expr;
12356	expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[`2`]) + ")), 0u, 0u)");
12357	emit_op(result_type, id, expr, should_forward(ops[`2`]));
12358
12359	require_extension_internal("GL_ARB_shader_ballot");
12360	inherit_expression_dependencies(id, ops[`2`]);
12361	register_control_dependent_expression(ops[`1`]);
12362	break;
12363	}
12364
12365	case OpSubgroupFirstInvocationKHR:
12366	{
12367	uint32_t result_type = ops[`0`];
12368	uint32_t id = ops[`1`];
12369	emit_unary_func_op(result_type, id, ops[`2`], "readFirstInvocationARB");
12370
12371	require_extension_internal("GL_ARB_shader_ballot");
12372	register_control_dependent_expression(ops[`1`]);
12373	break;
12374	}
12375
12376	case OpSubgroupReadInvocationKHR:
12377	{
12378	uint32_t result_type = ops[`0`];
12379	uint32_t id = ops[`1`];
12380	emit_binary_func_op(result_type, id, ops[`2`], ops[`3`], "readInvocationARB");
12381
12382	require_extension_internal("GL_ARB_shader_ballot");
12383	register_control_dependent_expression(ops[`1`]);
12384	break;
12385	}
12386
12387	case OpSubgroupAllKHR:
12388	{
12389	uint32_t result_type = ops[`0`];
12390	uint32_t id = ops[`1`];
12391	emit_unary_func_op(result_type, id, ops[`2`], "allInvocationsARB");
12392
12393	require_extension_internal("GL_ARB_shader_group_vote");
12394	register_control_dependent_expression(ops[`1`]);
12395	break;
12396	}
12397
12398	case OpSubgroupAnyKHR:
12399	{
12400	uint32_t result_type = ops[`0`];
12401	uint32_t id = ops[`1`];
12402	emit_unary_func_op(result_type, id, ops[`2`], "anyInvocationARB");
12403
12404	require_extension_internal("GL_ARB_shader_group_vote");
12405	register_control_dependent_expression(ops[`1`]);
12406	break;
12407	}
12408
12409	case OpSubgroupAllEqualKHR:
12410	{
12411	uint32_t result_type = ops[`0`];
12412	uint32_t id = ops[`1`];
12413	emit_unary_func_op(result_type, id, ops[`2`], "allInvocationsEqualARB");
12414
12415	require_extension_internal("GL_ARB_shader_group_vote");
12416	register_control_dependent_expression(ops[`1`]);
12417	break;
12418	}
12419
12420	case OpGroupIAddNonUniformAMD:
12421	case OpGroupFAddNonUniformAMD:
12422	{
12423	uint32_t result_type = ops[`0`];
12424	uint32_t id = ops[`1`];
12425	emit_unary_func_op(result_type, id, ops[`4`], "addInvocationsNonUniformAMD");
12426
12427	require_extension_internal("GL_AMD_shader_ballot");
12428	register_control_dependent_expression(ops[`1`]);
12429	break;
12430	}
12431
12432	case OpGroupFMinNonUniformAMD:
12433	case OpGroupUMinNonUniformAMD:
12434	case OpGroupSMinNonUniformAMD:
12435	{
12436	uint32_t result_type = ops[`0`];
12437	uint32_t id = ops[`1`];
12438	emit_unary_func_op(result_type, id, ops[`4`], "minInvocationsNonUniformAMD");
12439
12440	require_extension_internal("GL_AMD_shader_ballot");
12441	register_control_dependent_expression(ops[`1`]);
12442	break;
12443	}
12444
12445	case OpGroupFMaxNonUniformAMD:
12446	case OpGroupUMaxNonUniformAMD:
12447	case OpGroupSMaxNonUniformAMD:
12448	{
12449	uint32_t result_type = ops[`0`];
12450	uint32_t id = ops[`1`];
12451	emit_unary_func_op(result_type, id, ops[`4`], "maxInvocationsNonUniformAMD");
12452
12453	require_extension_internal("GL_AMD_shader_ballot");
12454	register_control_dependent_expression(ops[`1`]);
12455	break;
12456	}
12457
12458	case OpFragmentMaskFetchAMD:
12459	{
12460	auto &type = expression_type(ops[`2`]);
12461	uint32_t result_type = ops[`0`];
12462	uint32_t id = ops[`1`];
12463
12464	if (type.image.dim == spv::DimSubpassData)
12465	{
12466	emit_unary_func_op(result_type, id, ops[`2`], "fragmentMaskFetchAMD");
12467	}
12468	else
12469	{
12470	emit_binary_func_op(result_type, id, ops[`2`], ops[`3`], "fragmentMaskFetchAMD");
12471	}
12472
12473	require_extension_internal("GL_AMD_shader_fragment_mask");
12474	break;
12475	}
12476
12477	case OpFragmentFetchAMD:
12478	{
12479	auto &type = expression_type(ops[`2`]);
12480	uint32_t result_type = ops[`0`];
12481	uint32_t id = ops[`1`];
12482
12483	if (type.image.dim == spv::DimSubpassData)
12484	{
12485	emit_binary_func_op(result_type, id, ops[`2`], ops[`4`], "fragmentFetchAMD");
12486	}
12487	else
12488	{
12489	emit_trinary_func_op(result_type, id, ops[`2`], ops[`3`], ops[`4`], "fragmentFetchAMD");
12490	}
12491
12492	require_extension_internal("GL_AMD_shader_fragment_mask");
12493	break;
12494	}
12495
12496	// Vulkan 1.1 sub-group stuff ...
12497	case OpGroupNonUniformElect:
12498	case OpGroupNonUniformBroadcast:
12499	case OpGroupNonUniformBroadcastFirst:
12500	case OpGroupNonUniformBallot:
12501	case OpGroupNonUniformInverseBallot:
12502	case OpGroupNonUniformBallotBitExtract:
12503	case OpGroupNonUniformBallotBitCount:
12504	case OpGroupNonUniformBallotFindLSB:
12505	case OpGroupNonUniformBallotFindMSB:
12506	case OpGroupNonUniformShuffle:
12507	case OpGroupNonUniformShuffleXor:
12508	case OpGroupNonUniformShuffleUp:
12509	case OpGroupNonUniformShuffleDown:
12510	case OpGroupNonUniformAll:
12511	case OpGroupNonUniformAny:
12512	case OpGroupNonUniformAllEqual:
12513	case OpGroupNonUniformFAdd:
12514	case OpGroupNonUniformIAdd:
12515	case OpGroupNonUniformFMul:
12516	case OpGroupNonUniformIMul:
12517	case OpGroupNonUniformFMin:
12518	case OpGroupNonUniformFMax:
12519	case OpGroupNonUniformSMin:
12520	case OpGroupNonUniformSMax:
12521	case OpGroupNonUniformUMin:
12522	case OpGroupNonUniformUMax:
12523	case OpGroupNonUniformBitwiseAnd:
12524	case OpGroupNonUniformBitwiseOr:
12525	case OpGroupNonUniformBitwiseXor:
12526	case OpGroupNonUniformLogicalAnd:
12527	case OpGroupNonUniformLogicalOr:
12528	case OpGroupNonUniformLogicalXor:
12529	case OpGroupNonUniformQuadSwap:
12530	case OpGroupNonUniformQuadBroadcast:
12531	emit_subgroup_op(instruction);
12532	break;
12533
12534	case OpFUnordEqual:
12535	case OpFUnordNotEqual:
12536	case OpFUnordLessThan:
12537	case OpFUnordGreaterThan:
12538	case OpFUnordLessThanEqual:
12539	case OpFUnordGreaterThanEqual:
12540	{
12541	// GLSL doesn't specify if floating point comparisons are ordered or unordered,
12542	// but glslang always emits ordered floating point compares for GLSL.
12543	// To get unordered compares, we can test the opposite thing and invert the result.
12544	// This way, we force true when there is any NaN present.
12545	uint32_t op0 = ops[`2`];
12546	uint32_t op1 = ops[`3`];
12547
12548	string expr;
12549	if (expression_type(op0).vecsize > `1`)
12550	{
12551	const char comp_op = nullptr*;
12552	switch (opcode)
12553	{
12554	case OpFUnordEqual:
12555	comp_op = "notEqual";
12556	break;
12557
12558	case OpFUnordNotEqual:
12559	comp_op = "equal";
12560	break;
12561
12562	case OpFUnordLessThan:
12563	comp_op = "greaterThanEqual";
12564	break;
12565
12566	case OpFUnordLessThanEqual:
12567	comp_op = "greaterThan";
12568	break;
12569
12570	case OpFUnordGreaterThan:
12571	comp_op = "lessThanEqual";
12572	break;
12573
12574	case OpFUnordGreaterThanEqual:
12575	comp_op = "lessThan";
12576	break;
12577
12578	default:
12579	assert(`0`);
12580	break;
12581	}
12582
12583	expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))");
12584	}
12585	else
12586	{
12587	const char comp_op = nullptr*;
12588	switch (opcode)
12589	{
12590	case OpFUnordEqual:
12591	comp_op = " != ";
12592	break;
12593
12594	case OpFUnordNotEqual:
12595	comp_op = " == ";
12596	break;
12597
12598	case OpFUnordLessThan:
12599	comp_op = " >= ";
12600	break;
12601
12602	case OpFUnordLessThanEqual:
12603	comp_op = " > ";
12604	break;
12605
12606	case OpFUnordGreaterThan:
12607	comp_op = " <= ";
12608	break;
12609
12610	case OpFUnordGreaterThanEqual:
12611	comp_op = " < ";
12612	break;
12613
12614	default:
12615	assert(`0`);
12616	break;
12617	}
12618
12619	expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")");
12620	}
12621
12622	emit_op(ops[`0`], ops[`1`], expr, should_forward(op0) && should_forward(op1));
12623	inherit_expression_dependencies(ops[`1`], op0);
12624	inherit_expression_dependencies(ops[`1`], op1);
12625	break;
12626	}
12627
12628	case OpReportIntersectionKHR:
12629	// NV is same opcode.
12630	forced_temporaries.insert(ops[`1`]);
12631	if (ray_tracing_is_khr)
12632	GLSL_BFOP(reportIntersectionEXT);
12633	else
12634	GLSL_BFOP(reportIntersectionNV);
12635	flush_control_dependent_expressions(current_emitting_block->self);
12636	break;
12637	case OpIgnoreIntersectionNV:
12638	// KHR variant is a terminator.
12639	statement("ignoreIntersectionNV();");
12640	flush_control_dependent_expressions(current_emitting_block->self);
12641	break;
12642	case OpTerminateRayNV:
12643	// KHR variant is a terminator.
12644	statement("terminateRayNV();");
12645	flush_control_dependent_expressions(current_emitting_block->self);
12646	break;
12647	case OpTraceNV:
12648	statement("traceNV(", to_non_uniform_aware_expression(ops[`0`]), ", ", to_expression(ops[`1`]), ", ", to_expression(ops[`2`]), ", ",
12649	to_expression(ops[`3`]), ", ", to_expression(ops[`4`]), ", ", to_expression(ops[`5`]), ", ",
12650	to_expression(ops[`6`]), ", ", to_expression(ops[`7`]), ", ", to_expression(ops[`8`]), ", ",
12651	to_expression(ops[`9`]), ", ", to_expression(ops[`10`]), ");");
12652	flush_control_dependent_expressions(current_emitting_block->self);
12653	break;
12654	case OpTraceRayKHR:
12655	if (!has_decoration(ops[`10`], DecorationLocation))
12656	SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR.");
12657	statement("traceRayEXT(", to_non_uniform_aware_expression(ops[`0`]), ", ", to_expression(ops[`1`]), ", ", to_expression(ops[`2`]), ", ",
12658	to_expression(ops[`3`]), ", ", to_expression(ops[`4`]), ", ", to_expression(ops[`5`]), ", ",
12659	to_expression(ops[`6`]), ", ", to_expression(ops[`7`]), ", ", to_expression(ops[`8`]), ", ",
12660	to_expression(ops[`9`]), ", ", get_decoration(ops[`10`], DecorationLocation), ");");
12661	flush_control_dependent_expressions(current_emitting_block->self);
12662	break;
12663	case OpExecuteCallableNV:
12664	statement("executeCallableNV(", to_expression(ops[`0`]), ", ", to_expression(ops[`1`]), ");");
12665	flush_control_dependent_expressions(current_emitting_block->self);
12666	break;
12667	case OpExecuteCallableKHR:
12668	if (!has_decoration(ops[`1`], DecorationLocation))
12669	SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR.");
12670	statement("executeCallableEXT(", to_expression(ops[`0`]), ", ", get_decoration(ops[`1`], DecorationLocation), ");");
12671	flush_control_dependent_expressions(current_emitting_block->self);
12672	break;
12673
12674	// Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects.
12675	case OpRayQueryInitializeKHR:
12676	flush_variable_declaration(ops[`0`]);
12677	statement("rayQueryInitializeEXT(",
12678	to_expression(ops[`0`]), ", ", to_expression(ops[`1`]), ", ",
12679	to_expression(ops[`2`]), ", ", to_expression(ops[`3`]), ", ",
12680	to_expression(ops[`4`]), ", ", to_expression(ops[`5`]), ", ",
12681	to_expression(ops[`6`]), ", ", to_expression(ops[`7`]), ");");
12682	break;
12683	case OpRayQueryProceedKHR:
12684	flush_variable_declaration(ops[`0`]);
12685	emit_op(ops[`0`], ops[`1`], join("rayQueryProceedEXT(", to_expression(ops[`2`]), ")"), false);
12686	break;
12687	case OpRayQueryTerminateKHR:
12688	flush_variable_declaration(ops[`0`]);
12689	statement("rayQueryTerminateEXT(", to_expression(ops[`0`]), ");");
12690	break;
12691	case OpRayQueryGenerateIntersectionKHR:
12692	flush_variable_declaration(ops[`0`]);
12693	statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[`0`]), ", ", to_expression(ops[`1`]), ");");
12694	break;
12695	case OpRayQueryConfirmIntersectionKHR:
12696	flush_variable_declaration(ops[`0`]);
12697	statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[`0`]), ");");
12698	break;
12699	#define GLSL_RAY_QUERY_GET_OP(op) \
12700	case OpRayQueryGet##op##KHR: \
12701	flush_variable_declaration(ops[2]); \
12702	emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \
12703	break
12704	#define GLSL_RAY_QUERY_GET_OP2(op) \
12705	case OpRayQueryGet##op##KHR: \
12706	flush_variable_declaration(ops[2]); \
12707	emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \
12708	break
12709	GLSL_RAY_QUERY_GET_OP(RayTMin);
12710	GLSL_RAY_QUERY_GET_OP(RayFlags);
12711	GLSL_RAY_QUERY_GET_OP(WorldRayOrigin);
12712	GLSL_RAY_QUERY_GET_OP(WorldRayDirection);
12713	GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque);
12714	GLSL_RAY_QUERY_GET_OP2(IntersectionType);
12715	GLSL_RAY_QUERY_GET_OP2(IntersectionT);
12716	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex);
12717	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId);
12718	GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset);
12719	GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex);
12720	GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex);
12721	GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics);
12722	GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace);
12723	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection);
12724	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin);
12725	GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld);
12726	GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject);
12727	#undef GLSL_RAY_QUERY_GET_OP
12728	#undef GLSL_RAY_QUERY_GET_OP2
12729
12730	case OpConvertUToAccelerationStructureKHR:
12731	require_extension_internal("GL_EXT_ray_tracing");
12732	GLSL_UFOP(accelerationStructureEXT);
12733	break;
12734
12735	case OpConvertUToPtr:
12736	{
12737	auto &type = get<SPIRType>(ops[`0`]);
12738	if (type.storage != StorageClassPhysicalStorageBufferEXT)
12739	SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
12740
12741	auto &in_type = expression_type(ops[`2`]);
12742	if (in_type.vecsize == `2`)
12743	require_extension_internal("GL_EXT_buffer_reference_uvec2");
12744
12745	auto op = type_to_glsl(type);
12746	emit_unary_func_op(ops[`0`], ops[`1`], ops[`2`], op.c_str());
12747	break;
12748	}
12749
12750	case OpConvertPtrToU:
12751	{
12752	auto &type = get<SPIRType>(ops[`0`]);
12753	auto &ptr_type = expression_type(ops[`2`]);
12754	if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
12755	SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
12756
12757	if (type.vecsize == `2`)
12758	require_extension_internal("GL_EXT_buffer_reference_uvec2");
12759
12760	auto op = type_to_glsl(type);
12761	emit_unary_func_op(ops[`0`], ops[`1`], ops[`2`], op.c_str());
12762	break;
12763	}
12764
12765	case OpUndef:
12766	// Undefined value has been declared.
12767	break;
12768
12769	case OpLine:
12770	{
12771	emit_line_directive(ops[`0`], ops[`1`]);
12772	break;
12773	}
12774
12775	case OpNoLine:
12776	break;
12777
12778	case OpDemoteToHelperInvocationEXT:
12779	if (!options.vulkan_semantics)
12780	SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12781	require_extension_internal("GL_EXT_demote_to_helper_invocation");
12782	statement(backend.demote_literal, ";");
12783	break;
12784
12785	case OpIsHelperInvocationEXT:
12786	if (!options.vulkan_semantics)
12787	SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL.");
12788	require_extension_internal("GL_EXT_demote_to_helper_invocation");
12789	emit_op(ops[`0`], ops[`1`], "helperInvocationEXT()", false);
12790	break;
12791
12792	case OpBeginInvocationInterlockEXT:
12793	// If the interlock is complex, we emit this elsewhere.
12794	if (!interlocked_is_complex)
12795	{
12796	statement("SPIRV_Cross_beginInvocationInterlock();");
12797	flush_all_active_variables();
12798	// Make sure forwarding doesn't propagate outside interlock region.
12799	}
12800	break;
12801
12802	case OpEndInvocationInterlockEXT:
12803	// If the interlock is complex, we emit this elsewhere.
12804	if (!interlocked_is_complex)
12805	{
12806	statement("SPIRV_Cross_endInvocationInterlock();");
12807	flush_all_active_variables();
12808	// Make sure forwarding doesn't propagate outside interlock region.
12809	}
12810	break;
12811
12812	default:
12813	statement("// unimplemented op ", instruction.op);
12814	break;
12815	}
12816	}
12817
12818	// Appends function arguments, mapped from global variables, beyond the specified arg index.
12819	// This is used when a function call uses fewer arguments than the function defines.
12820	// This situation may occur if the function signature has been dynamically modified to
12821	// extract global variables referenced from within the function, and convert them to
12822	// function arguments. This is necessary for shader languages that do not support global
12823	// access to shader input content from within a function (eg. Metal). Each additional
12824	// function args uses the name of the global variable. Function nesting will modify the
12825	// functions and function calls all the way up the nesting chain.
12826	void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist)
12827	{
12828	auto &args = func.arguments;
12829	uint32_t arg_cnt = uint32_t(args.size());
12830	for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
12831	{
12832	auto &arg = args [arg_idx];
12833	assert(arg.alias_global_variable);
12834
12835	// If the underlying variable needs to be declared
12836	// (ie. a local variable with deferred declaration), do so now.
12837	uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
12838	if (var_id)
12839	flush_variable_declaration(var_id);
12840
12841	arglist.push_back(to_func_call_arg(arg, arg.id));
12842	}
12843	}
12844
12845	string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
12846	{
12847	if (type.type_alias != TypeID (`0`) &&
12848	!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
12849	{
12850	return to_member_name(get<SPIRType>(type.type_alias), index);
12851	}
12852
12853	auto &memb = ir.meta [type.self].members;
12854	if (index < memb.size() && !memb [index].alias.empty())
12855	return memb [index].alias;
12856	else
12857	return join("_m", index);
12858	}
12859
12860	string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool)
12861	{
12862	return join(".", to_member_name(type, index));
12863	}
12864
12865	string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices)
12866	{
12867	string ret;
12868	auto *member_type = &type;
12869	for (auto &index : indices)
12870	{
12871	ret += join(".", to_member_name(*member_type, index));
12872	member_type = &get<SPIRType>(member_type->member_types [index]);
12873	}
12874	return ret;
12875	}
12876
12877	void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
12878	{
12879	auto &memb = ir.meta [type.self].members;
12880	if (index < memb.size() && !memb [index].alias.empty())
12881	{
12882	auto &name = memb [index].alias;
12883	if (name.empty())
12884	return;
12885
12886	ParsedIR::sanitize_identifier(name, true, true);
12887	update_name_cache(type.member_name_cache, name);
12888	}
12889	}
12890
12891	// Checks whether the ID is a row_major matrix that requires conversion before use
12892	bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
12893	{
12894	// Natively supported row-major matrices do not need to be converted.
12895	// Legacy targets do not support row major.
12896	if (backend.native_row_major_matrix && !is_legacy())
12897	return false;
12898
12899	auto *e = maybe_get<SPIRExpression>(id);
12900	if (e)
12901	return e->need_transpose;
12902	else
12903	return has_decoration(id, DecorationRowMajor);
12904	}
12905
12906	// Checks whether the member is a row_major matrix that requires conversion before use
12907	bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
12908	{
12909	// Natively supported row-major matrices do not need to be converted.
12910	if (backend.native_row_major_matrix && !is_legacy())
12911	return false;
12912
12913	// Non-matrix or column-major matrix types do not need to be converted.
12914	if (!has_member_decoration(type.self, index, DecorationRowMajor))
12915	return false;
12916
12917	// Only square row-major matrices can be converted at this time.
12918	// Converting non-square matrices will require defining custom GLSL function that
12919	// swaps matrix elements while retaining the original dimensional form of the matrix.
12920	const auto mbr_type = get<SPIRType>(type.member_types [index]);
12921	if (mbr_type.columns != mbr_type.vecsize)
12922	SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
12923
12924	return true;
12925	}
12926
12927	// Checks if we need to remap physical type IDs when declaring the type in a buffer.
12928	bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const
12929	{
12930	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID);
12931	}
12932
12933	// Checks whether the member is in packed data type, that might need to be unpacked.
12934	bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const
12935	{
12936	return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked);
12937	}
12938
12939	// Wraps the expression string in a function call that converts the
12940	// row_major matrix result of the expression to a column_major matrix.
12941	// Base implementation uses the standard library transpose() function.
12942	// Subclasses may override to use a different function.
12943	string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t / physical_type_id /,
12944	bool /is_packed/)
12945	{
12946	strip_enclosed_expression(exp_str);
12947	if (!is_matrix(exp_type))
12948	{
12949	auto column_index = exp_str.find_last_of(`'['`);
12950	if (column_index == string::npos)
12951	return exp_str;
12952
12953	auto column_expr = exp_str.substr(column_index);
12954	exp_str.resize(column_index);
12955
12956	auto transposed_expr = type_to_glsl_constructor(exp_type) + "(";
12957
12958	// Loading a column from a row-major matrix. Unroll the load.
12959	for (uint32_t c = `0`; c < exp_type.vecsize; c++)
12960	{
12961	transposed_expr += join(exp_str, `'['`, c, `']'`, column_expr);
12962	if (c + `1` < exp_type.vecsize)
12963	transposed_expr += ", ";
12964	}
12965
12966	transposed_expr += ")";
12967	return transposed_expr;
12968	}
12969	else if (options.version < `120`)
12970	{
12971	// GLSL 110, ES 100 do not have transpose(), so emulate it. Note that
12972	// these GLSL versions do not support non-square matrices.
12973	if (exp_type.vecsize == `2` && exp_type.columns == `2`)
12974	{
12975	if (!requires_transpose_2x2)
12976	{
12977	requires_transpose_2x2 = true;
12978	force_recompile();
12979	}
12980	}
12981	else if (exp_type.vecsize == `3` && exp_type.columns == `3`)
12982	{
12983	if (!requires_transpose_3x3)
12984	{
12985	requires_transpose_3x3 = true;
12986	force_recompile();
12987	}
12988	}
12989	else if (exp_type.vecsize == `4` && exp_type.columns == `4`)
12990	{
12991	if (!requires_transpose_4x4)
12992	{
12993	requires_transpose_4x4 = true;
12994	force_recompile();
12995	}
12996	}
12997	else
12998	SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose.");
12999	return join("spvTranspose(", exp_str, ")");
13000	}
13001	else
13002	return join("transpose(", exp_str, ")");
13003	}
13004
13005	string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
13006	{
13007	string type_name = type_to_glsl(type, id);
13008	remap_variable_type_name(type, name, type_name);
13009	return join(type_name, " ", name, type_to_array_glsl(type));
13010	}
13011
13012	bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const
13013	{
13014	return var.storage == storage;
13015	}
13016
13017	// Emit a structure member. Subclasses may override to modify output,
13018	// or to dynamically add a padding member if needed.
13019	void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
13020	const string &qualifier, uint32_t)
13021	{
13022	auto &membertype = get<SPIRType>(member_type_id);
13023
13024	Bitset memberflags;
13025	auto &memb = ir.meta [type.self].members;
13026	if (index < memb.size())
13027	memberflags = memb [index].decoration_flags;
13028
13029	string qualifiers;
13030	bool is_block = ir.meta [type.self].decoration.decoration_flags.get(DecorationBlock) \|\|
13031	ir.meta [type.self].decoration.decoration_flags.get(DecorationBufferBlock);
13032
13033	if (is_block)
13034	qualifiers = to_interpolation_qualifiers(memberflags);
13035
13036	statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags),
13037	variable_decl(membertype, to_member_name(type, index)), ";");
13038	}
13039
13040	void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
13041	{
13042	}
13043
13044	string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
13045	{
13046	// GL_EXT_buffer_reference variables can be marked as restrict.
13047	if (flags.get(DecorationRestrictPointerEXT))
13048	return "restrict ";
13049
13050	string qual;
13051
13052	if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
13053	qual = "precise ";
13054
13055	// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
13056	bool type_supports_precision =
13057	type.basetype == SPIRType::Float \|\| type.basetype == SPIRType::Int \|\| type.basetype == SPIRType::UInt \|\|
13058	type.basetype == SPIRType::Image \|\| type.basetype == SPIRType::SampledImage \|\|
13059	type.basetype == SPIRType::Sampler;
13060
13061	if (!type_supports_precision)
13062	return qual;
13063
13064	if (options.es)
13065	{
13066	auto &execution = get_entry_point();
13067
13068	if (flags.get(DecorationRelaxedPrecision))
13069	{
13070	bool implied_fmediump = type.basetype == SPIRType::Float &&
13071	options.fragment.default_float_precision == Options::Mediump &&
13072	execution.model == ExecutionModelFragment;
13073
13074	bool implied_imediump = (type.basetype == SPIRType::Int \|\| type.basetype == SPIRType::UInt) &&
13075	options.fragment.default_int_precision == Options::Mediump &&
13076	execution.model == ExecutionModelFragment;
13077
13078	qual += (implied_fmediump \|\| implied_imediump) ? "" : "mediump ";
13079	}
13080	else
13081	{
13082	bool implied_fhighp =
13083	type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
13084	execution.model == ExecutionModelFragment) \|\|
13085	(execution.model != ExecutionModelFragment));
13086
13087	bool implied_ihighp = (type.basetype == SPIRType::Int \|\| type.basetype == SPIRType::UInt) &&
13088	((options.fragment.default_int_precision == Options::Highp &&
13089	execution.model == ExecutionModelFragment) \|\|
13090	(execution.model != ExecutionModelFragment));
13091
13092	qual += (implied_fhighp \|\| implied_ihighp) ? "" : "highp ";
13093	}
13094	}
13095	else if (backend.allow_precision_qualifiers)
13096	{
13097	// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
13098	// The default is highp however, so only emit mediump in the rare case that a shader has these.
13099	if (flags.get(DecorationRelaxedPrecision))
13100	qual += "mediump ";
13101	}
13102
13103	return qual;
13104	}
13105
13106	string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
13107	{
13108	auto &type = expression_type(id);
13109	bool use_precision_qualifiers = backend.allow_precision_qualifiers;
13110	if (use_precision_qualifiers && (type.basetype == SPIRType::Image \|\| type.basetype == SPIRType::SampledImage))
13111	{
13112	// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
13113	auto &result_type = get<SPIRType>(type.image.type);
13114	if (result_type.width < `32`)
13115	return "mediump ";
13116	}
13117	return flags_to_qualifiers_glsl(type, ir.meta [id].decoration.decoration_flags);
13118	}
13119
13120	void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var)
13121	{
13122	// Works around weird behavior in glslangValidator where
13123	// a patch out block is translated to just block members getting the decoration.
13124	// To make glslang not complain when we compile again, we have to transform this back to a case where
13125	// the variable itself has Patch decoration, and not members.
13126	auto &type = get<SPIRType>(var.basetype);
13127	if (has_decoration(type.self, DecorationBlock))
13128	{
13129	uint32_t member_count = uint32_t(type.member_types.size());
13130	for (uint32_t i = `0`; i < member_count; i++)
13131	{
13132	if (has_member_decoration(type.self, i, DecorationPatch))
13133	{
13134	set_decoration(var.self, DecorationPatch);
13135	break;
13136	}
13137	}
13138
13139	if (has_decoration(var.self, DecorationPatch))
13140	for (uint32_t i = `0`; i < member_count; i++)
13141	unset_member_decoration(type.self, i, DecorationPatch);
13142	}
13143	}
13144
13145	string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
13146	{
13147	auto &flags = ir.meta [id].decoration.decoration_flags;
13148	string res;
13149
13150	auto *var = maybe_get<SPIRVariable>(id);
13151
13152	if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
13153	res += "shared ";
13154
13155	res += to_interpolation_qualifiers(flags);
13156	if (var)
13157	res += to_storage_qualifiers_glsl(*var);
13158
13159	auto &type = expression_type(id);
13160	if (type.image.dim != DimSubpassData && type.image.sampled == `2`)
13161	{
13162	if (flags.get(DecorationCoherent))
13163	res += "coherent ";
13164	if (flags.get(DecorationRestrict))
13165	res += "restrict ";
13166
13167	if (flags.get(DecorationNonWritable))
13168	res += "readonly ";
13169
13170	bool formatted_load = type.image.format == ImageFormatUnknown;
13171	if (flags.get(DecorationNonReadable))
13172	{
13173	res += "writeonly ";
13174	formatted_load = false;
13175	}
13176
13177	if (formatted_load)
13178	{
13179	if (!options.es)
13180	require_extension_internal("GL_EXT_shader_image_load_formatted");
13181	else
13182	SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL.");
13183	}
13184	}
13185
13186	res += to_precision_qualifiers_glsl(id);
13187
13188	return res;
13189	}
13190
13191	string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
13192	{
13193	// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
13194	auto &type = expression_type(arg.id);
13195	const char *direction = "";
13196
13197	if (type.pointer)
13198	{
13199	if (arg.write_count && arg.read_count)
13200	direction = "inout ";
13201	else if (arg.write_count)
13202	direction = "out ";
13203	}
13204
13205	return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
13206	}
13207
13208	string CompilerGLSL::to_initializer_expression(const SPIRVariable &var)
13209	{
13210	return to_unpacked_expression(var.initializer);
13211	}
13212
13213	string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id)
13214	{
13215	#ifndef NDEBUG
13216	auto &type = get<SPIRType>(type_id);
13217	assert(type.storage == StorageClassPrivate \|\| type.storage == StorageClassFunction \|\|
13218	type.storage == StorageClassGeneric);
13219	#endif
13220	uint32_t id = ir.increase_bound_by(`1`);
13221	ir.make_constant_null(id, type_id, false);
13222	return constant_expression(get<SPIRConstant>(id));
13223	}
13224
13225	bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const
13226	{
13227	if (type.pointer)
13228	return false;
13229
13230	if (!type.array.empty() && options.flatten_multidimensional_arrays)
13231	return false;
13232
13233	for (auto &literal : type.array_size_literal)
13234	if (!literal)
13235	return false;
13236
13237	for (auto &memb : type.member_types)
13238	if (!type_can_zero_initialize(get<SPIRType>(memb)))
13239	return false;
13240
13241	return true;
13242	}
13243
13244	string CompilerGLSL::variable_decl(const SPIRVariable &variable)
13245	{
13246	// Ignore the pointer type since GLSL doesn't have pointers.
13247	auto &type = get_variable_data_type(variable);
13248
13249	if (type.pointer_depth > `1` && !backend.support_pointer_to_pointer)
13250	SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types.");
13251
13252	auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
13253
13254	if (variable.loop_variable && variable.static_expression)
13255	{
13256	uint32_t expr = variable.static_expression;
13257	if (ir.ids [expr].get_type() != TypeUndef)
13258	res += join(" = ", to_unpacked_expression(variable.static_expression));
13259	else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13260	res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13261	}
13262	else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
13263	{
13264	uint32_t expr = variable.initializer;
13265	if (ir.ids [expr].get_type() != TypeUndef)
13266	res += join(" = ", to_initializer_expression(variable));
13267	else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
13268	res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
13269	}
13270
13271	return res;
13272	}
13273
13274	const char CompilerGLSL::to_pls_qualifiers_glsl(const* SPIRVariable &variable)
13275	{
13276	auto &flags = ir.meta [variable.self].decoration.decoration_flags;
13277	if (flags.get(DecorationRelaxedPrecision))
13278	return "mediump ";
13279	else
13280	return "highp ";
13281	}
13282
13283	string CompilerGLSL::pls_decl(const PlsRemap &var)
13284	{
13285	auto &variable = get<SPIRVariable>(var.id);
13286
13287	SPIRType type;
13288	type.vecsize = pls_format_to_components(var.format);
13289	type.basetype = pls_format_to_basetype(var.format);
13290
13291	return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
13292	to_name(variable.self));
13293	}
13294
13295	uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const
13296	{
13297	return to_array_size_literal(type, uint32_t(type.array.size() - `1`));
13298	}
13299
13300	uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
13301	{
13302	assert(type.array.size() == type.array_size_literal.size());
13303
13304	if (type.array_size_literal [index])
13305	{
13306	return type.array [index];
13307	}
13308	else
13309	{
13310	// Use the default spec constant value.
13311	// This is the best we can do.
13312	return evaluate_constant_u32(type.array [index]);
13313	}
13314	}
13315
13316	string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
13317	{
13318	assert(type.array.size() == type.array_size_literal.size());
13319
13320	auto &size = type.array [index];
13321	if (!type.array_size_literal [index])
13322	return to_expression(size);
13323	else if (size)
13324	return convert_to_string(size);
13325	else if (!backend.unsized_array_supported)
13326	{
13327	// For runtime-sized arrays, we can work around
13328	// lack of standard support for this by simply having
13329	// a single element array.
13330	//
13331	// Runtime length arrays must always be the last element
13332	// in an interface block.
13333	return "1";
13334	}
13335	else
13336	return "";
13337	}
13338
13339	string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
13340	{
13341	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13342	{
13343	// We are using a wrapped pointer type, and we should not emit any array declarations here.
13344	return "";
13345	}
13346
13347	if (type.array.empty())
13348	return "";
13349
13350	if (options.flatten_multidimensional_arrays)
13351	{
13352	string res;
13353	res += "[";
13354	for (auto i = uint32_t(type.array.size()); i; i--)
13355	{
13356	res += enclose_expression(to_array_size(type, i - `1`));
13357	if (i > `1`)
13358	res += " * ";
13359	}
13360	res += "]";
13361	return res;
13362	}
13363	else
13364	{
13365	if (type.array.size() > `1`)
13366	{
13367	if (!options.es && options.version < `430`)
13368	require_extension_internal("GL_ARB_arrays_of_arrays");
13369	else if (options.es && options.version < `310`)
13370	SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
13371	"Try using --flatten-multidimensional-arrays or set "
13372	"options.flatten_multidimensional_arrays to true.");
13373	}
13374
13375	string res;
13376	for (auto i = uint32_t(type.array.size()); i; i--)
13377	{
13378	res += "[";
13379	res += to_array_size(type, i - `1`);
13380	res += "]";
13381	}
13382	return res;
13383	}
13384	}
13385
13386	string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id)
13387	{
13388	auto &imagetype = get<SPIRType>(type.image.type);
13389	string res;
13390
13391	switch (imagetype.basetype)
13392	{
13393	case SPIRType::Int:
13394	case SPIRType::Short:
13395	case SPIRType::SByte:
13396	res = "i";
13397	break;
13398	case SPIRType::UInt:
13399	case SPIRType::UShort:
13400	case SPIRType::UByte:
13401	res = "u";
13402	break;
13403	default:
13404	break;
13405	}
13406
13407	// For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation.
13408	// We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter.
13409
13410	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
13411	return res + "subpassInput" + (type.image.ms ? "MS" : "");
13412	else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData &&
13413	subpass_input_is_framebuffer_fetch(id))
13414	{
13415	SPIRType sampled_type = get<SPIRType>(type.image.type);
13416	sampled_type.vecsize = `4`;
13417	return type_to_glsl(sampled_type);
13418	}
13419
13420	// If we're emulating subpassInput with samplers, force sampler2D
13421	// so we don't have to specify format.
13422	if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
13423	{
13424	// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
13425	if (type.image.dim == DimBuffer && type.image.sampled == `1`)
13426	res += "sampler";
13427	else
13428	res += type.image.sampled == `2` ? "image" : "texture";
13429	}
13430	else
13431	res += "sampler";
13432
13433	switch (type.image.dim)
13434	{
13435	case Dim1D:
13436	res += "1D";
13437	break;
13438	case Dim2D:
13439	res += "2D";
13440	break;
13441	case Dim3D:
13442	res += "3D";
13443	break;
13444	case DimCube:
13445	res += "Cube";
13446	break;
13447	case DimRect:
13448	if (options.es)
13449	SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES.");
13450
13451	if (is_legacy_desktop())
13452	require_extension_internal("GL_ARB_texture_rectangle");
13453
13454	res += "2DRect";
13455	break;
13456
13457	case DimBuffer:
13458	if (options.es && options.version < `320`)
13459	require_extension_internal("GL_EXT_texture_buffer");
13460	else if (!options.es && options.version < `300`)
13461	require_extension_internal("GL_EXT_texture_buffer_object");
13462	res += "Buffer";
13463	break;
13464
13465	case DimSubpassData:
13466	res += "2D";
13467	break;
13468	default:
13469	SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported.");
13470	}
13471
13472	if (type.image.ms)
13473	res += "MS";
13474	if (type.image.arrayed)
13475	{
13476	if (is_legacy_desktop())
13477	require_extension_internal("GL_EXT_texture_array");
13478	res += "Array";
13479	}
13480
13481	// "Shadow" state in GLSL only exists for samplers and combined image samplers.
13482	if (((type.basetype == SPIRType::SampledImage) \|\| (type.basetype == SPIRType::Sampler)) &&
13483	is_depth_image(type, id))
13484	{
13485	res += "Shadow";
13486	}
13487
13488	return res;
13489	}
13490
13491	string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
13492	{
13493	if (backend.use_array_constructor && type.array.size() > `1`)
13494	{
13495	if (options.flatten_multidimensional_arrays)
13496	SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, "
13497	"e.g. float[][]().");
13498	else if (!options.es && options.version < `430`)
13499	require_extension_internal("GL_ARB_arrays_of_arrays");
13500	else if (options.es && options.version < `310`)
13501	SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
13502	}
13503
13504	auto e = type_to_glsl(type);
13505	if (backend.use_array_constructor)
13506	{
13507	for (uint32_t i = `0`; i < type.array.size(); i++)
13508	e += "[]";
13509	}
13510	return e;
13511	}
13512
13513	// The optional id parameter indicates the object whose type we are trying
13514	// to find the description for. It is optional. Most type descriptions do not
13515	// depend on a specific object's use of that type.
13516	string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
13517	{
13518	if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
13519	{
13520	// Need to create a magic type name which compacts the entire type information.
13521	string name = type_to_glsl(get_pointee_type(type));
13522	for (size_t i = `0`; i < type.array.size(); i++)
13523	{
13524	if (type.array_size_literal [i])
13525	name += join(type.array [i], "_");
13526	else
13527	name += join("id", type.array [i], "_");
13528	}
13529	name += "Pointer";
13530	return name;
13531	}
13532
13533	switch (type.basetype)
13534	{
13535	case SPIRType::Struct:
13536	// Need OpName lookup here to get a "sensible" name for a struct.
13537	if (backend.explicit_struct_type)
13538	return join("struct ", to_name(type.self));
13539	else
13540	return to_name(type.self);
13541
13542	case SPIRType::Image:
13543	case SPIRType::SampledImage:
13544	return image_type_glsl(type, id);
13545
13546	case SPIRType::Sampler:
13547	// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
13548	// this distinction into the type system.
13549	return comparison_ids.count(id) ? "samplerShadow" : "sampler";
13550
13551	case SPIRType::AccelerationStructure:
13552	return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV";
13553
13554	case SPIRType::RayQuery:
13555	return "rayQueryEXT";
13556
13557	case SPIRType::Void:
13558	return "void";
13559
13560	default:
13561	break;
13562	}
13563
13564	if (type.basetype == SPIRType::UInt && is_legacy())
13565	SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
13566
13567	if (type.vecsize == `1` && type.columns == `1`) // Scalar builtin
13568	{
13569	switch (type.basetype)
13570	{
13571	case SPIRType::Boolean:
13572	return "bool";
13573	case SPIRType::SByte:
13574	return backend.basic_int8_type;
13575	case SPIRType::UByte:
13576	return backend.basic_uint8_type;
13577	case SPIRType::Short:
13578	return backend.basic_int16_type;
13579	case SPIRType::UShort:
13580	return backend.basic_uint16_type;
13581	case SPIRType::Int:
13582	return backend.basic_int_type;
13583	case SPIRType::UInt:
13584	return backend.basic_uint_type;
13585	case SPIRType::AtomicCounter:
13586	return "atomic_uint";
13587	case SPIRType::Half:
13588	return "float16_t";
13589	case SPIRType::Float:
13590	return "float";
13591	case SPIRType::Double:
13592	return "double";
13593	case SPIRType::Int64:
13594	return "int64_t";
13595	case SPIRType::UInt64:
13596	return "uint64_t";
13597	default:
13598	return "???";
13599	}
13600	}
13601	else if (type.vecsize > `1` && type.columns == `1`) // Vector builtin
13602	{
13603	switch (type.basetype)
13604	{
13605	case SPIRType::Boolean:
13606	return join("bvec", type.vecsize);
13607	case SPIRType::SByte:
13608	return join("i8vec", type.vecsize);
13609	case SPIRType::UByte:
13610	return join("u8vec", type.vecsize);
13611	case SPIRType::Short:
13612	return join("i16vec", type.vecsize);
13613	case SPIRType::UShort:
13614	return join("u16vec", type.vecsize);
13615	case SPIRType::Int:
13616	return join("ivec", type.vecsize);
13617	case SPIRType::UInt:
13618	return join("uvec", type.vecsize);
13619	case SPIRType::Half:
13620	return join("f16vec", type.vecsize);
13621	case SPIRType::Float:
13622	return join("vec", type.vecsize);
13623	case SPIRType::Double:
13624	return join("dvec", type.vecsize);
13625	case SPIRType::Int64:
13626	return join("i64vec", type.vecsize);
13627	case SPIRType::UInt64:
13628	return join("u64vec", type.vecsize);
13629	default:
13630	return "???";
13631	}
13632	}
13633	else if (type.vecsize == type.columns) // Simple Matrix builtin
13634	{
13635	switch (type.basetype)
13636	{
13637	case SPIRType::Boolean:
13638	return join("bmat", type.vecsize);
13639	case SPIRType::Int:
13640	return join("imat", type.vecsize);
13641	case SPIRType::UInt:
13642	return join("umat", type.vecsize);
13643	case SPIRType::Half:
13644	return join("f16mat", type.vecsize);
13645	case SPIRType::Float:
13646	return join("mat", type.vecsize);
13647	case SPIRType::Double:
13648	return join("dmat", type.vecsize);
13649	// Matrix types not supported for int64/uint64.
13650	default:
13651	return "???";
13652	}
13653	}
13654	else
13655	{
13656	switch (type.basetype)
13657	{
13658	case SPIRType::Boolean:
13659	return join("bmat", type.columns, "x", type.vecsize);
13660	case SPIRType::Int:
13661	return join("imat", type.columns, "x", type.vecsize);
13662	case SPIRType::UInt:
13663	return join("umat", type.columns, "x", type.vecsize);
13664	case SPIRType::Half:
13665	return join("f16mat", type.columns, "x", type.vecsize);
13666	case SPIRType::Float:
13667	return join("mat", type.columns, "x", type.vecsize);
13668	case SPIRType::Double:
13669	return join("dmat", type.columns, "x", type.vecsize);
13670	// Matrix types not supported for int64/uint64.
13671	default:
13672	return "???";
13673	}
13674	}
13675	}
13676
13677	void CompilerGLSL::add_variable(unordered_set<string> &variables_primary,
13678	const unordered_set<string> &variables_secondary, string &name)
13679	{
13680	if (name.empty())
13681	return;
13682
13683	ParsedIR::sanitize_underscores(name);
13684	if (ParsedIR::is_globally_reserved_identifier(name, true))
13685	{
13686	name.clear();
13687	return;
13688	}
13689
13690	update_name_cache(variables_primary, variables_secondary, name);
13691	}
13692
13693	void CompilerGLSL::add_local_variable_name(uint32_t id)
13694	{
13695	add_variable(local_variable_names, block_names, ir.meta [id].decoration.alias);
13696	}
13697
13698	void CompilerGLSL::add_resource_name(uint32_t id)
13699	{
13700	add_variable(resource_names, block_names, ir.meta [id].decoration.alias);
13701	}
13702
13703	void CompilerGLSL::add_header_line(const std::string &line)
13704	{
13705	header_lines.push_back(line);
13706	}
13707
13708	bool CompilerGLSL::has_extension(const std::string &ext) const
13709	{
13710	auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
13711	return itr != end(forced_extensions);
13712	}
13713
13714	void CompilerGLSL::require_extension(const std::string &ext)
13715	{
13716	if (!has_extension(ext))
13717	forced_extensions.push_back(ext);
13718	}
13719
13720	void CompilerGLSL::require_extension_internal(const string &ext)
13721	{
13722	if (backend.supports_extensions && !has_extension(ext))
13723	{
13724	forced_extensions.push_back(ext);
13725	force_recompile();
13726	}
13727	}
13728
13729	void CompilerGLSL::flatten_buffer_block(VariableID id)
13730	{
13731	auto &var = get<SPIRVariable>(id);
13732	auto &type = get<SPIRType>(var.basetype);
13733	auto name = to_name(type.self, false);
13734	auto &flags = ir.meta [type.self].decoration.decoration_flags;
13735
13736	if (!type.array.empty())
13737	SPIRV_CROSS_THROW(name + " is an array of UBOs.");
13738	if (type.basetype != SPIRType::Struct)
13739	SPIRV_CROSS_THROW(name + " is not a struct.");
13740	if (!flags.get(DecorationBlock))
13741	SPIRV_CROSS_THROW(name + " is not a block.");
13742	if (type.member_types.empty())
13743	SPIRV_CROSS_THROW(name + " is an empty struct.");
13744
13745	flattened_buffer_blocks.insert(id);
13746	}
13747
13748	bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /builtin/) const
13749	{
13750	return false; // GLSL itself does not need to translate array builtin types to non-array builtin types
13751	}
13752
13753	bool CompilerGLSL::check_atomic_image(uint32_t id)
13754	{
13755	auto &type = expression_type(id);
13756	if (type.storage == StorageClassImage)
13757	{
13758	if (options.es && options.version < `320`)
13759	require_extension_internal("GL_OES_shader_image_atomic");
13760
13761	auto *var = maybe_get_backing_variable(id);
13762	if (var)
13763	{
13764	auto &flags = ir.meta [var->self].decoration.decoration_flags;
13765	if (flags.get(DecorationNonWritable) \|\| flags.get(DecorationNonReadable))
13766	{
13767	flags.clear(DecorationNonWritable);
13768	flags.clear(DecorationNonReadable);
13769	force_recompile();
13770	}
13771	}
13772	return true;
13773	}
13774	else
13775	return false;
13776	}
13777
13778	void CompilerGLSL::add_function_overload(const SPIRFunction &func)
13779	{
13780	Hasher hasher;
13781	for (auto &arg : func.arguments)
13782	{
13783	// Parameters can vary with pointer type or not,
13784	// but that will not change the signature in GLSL/HLSL,
13785	// so strip the pointer type before hashing.
13786	uint32_t type_id = get_pointee_type_id(arg.type);
13787	auto &type = get<SPIRType>(type_id);
13788
13789	if (!combined_image_samplers.empty())
13790	{
13791	// If we have combined image samplers, we cannot really trust the image and sampler arguments
13792	// we pass down to callees, because they may be shuffled around.
13793	// Ignore these arguments, to make sure that functions need to differ in some other way
13794	// to be considered different overloads.
13795	if (type.basetype == SPIRType::SampledImage \|\|
13796	(type.basetype == SPIRType::Image && type.image.sampled == `1`) \|\| type.basetype == SPIRType::Sampler)
13797	{
13798	continue;
13799	}
13800	}
13801
13802	hasher.u32(type_id);
13803	}
13804	uint64_t types_hash = hasher.get();
13805
13806	auto function_name = to_name(func.self);
13807	auto itr = function_overloads.find(function_name);
13808	if (itr != end(function_overloads))
13809	{
13810	// There exists a function with this name already.
13811	auto &overloads = itr ->second;
13812	if (overloads.count(types_hash) != `0`)
13813	{
13814	// Overload conflict, assign a new name.
13815	add_resource_name(func.self);
13816	function_overloads [to_name(func.self)].insert(types_hash);
13817	}
13818	else
13819	{
13820	// Can reuse the name.
13821	overloads.insert(types_hash);
13822	}
13823	}
13824	else
13825	{
13826	// First time we see this function name.
13827	add_resource_name(func.self);
13828	function_overloads [to_name(func.self)].insert(types_hash);
13829	}
13830	}
13831
13832	void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
13833	{
13834	if (func.self != ir.default_entry_point)
13835	add_function_overload(func);
13836
13837	// Avoid shadow declarations.
13838	local_variable_names = resource_names;
13839
13840	string decl;
13841
13842	auto &type = get<SPIRType>(func.return_type);
13843	decl += flags_to_qualifiers_glsl(type, return_flags);
13844	decl += type_to_glsl(type);
13845	decl += type_to_array_glsl(type);
13846	decl += " ";
13847
13848	if (func.self == ir.default_entry_point)
13849	{
13850	// If we need complex fallback in GLSL, we just wrap main() in a function
13851	// and interlock the entire shader ...
13852	if (interlocked_is_complex)
13853	decl += "spvMainInterlockedBody";
13854	else
13855	decl += "main";
13856
13857	processing_entry_point = true;
13858	}
13859	else
13860	decl += to_name(func.self);
13861
13862	decl += "(";
13863	SmallVector<string> arglist;
13864	for (auto &arg : func.arguments)
13865	{
13866	// Do not pass in separate images or samplers if we're remapping
13867	// to combined image samplers.
13868	if (skip_argument(arg.id))
13869	continue;
13870
13871	// Might change the variable name if it already exists in this function.
13872	// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13873	// to use same name for variables.
13874	// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13875	add_local_variable_name(arg.id);
13876
13877	arglist.push_back(argument_decl(arg));
13878
13879	// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13880	auto *var = maybe_get<SPIRVariable>(arg.id);
13881	if (var)
13882	var->parameter = &arg;
13883	}
13884
13885	for (auto &arg : func.shadow_arguments)
13886	{
13887	// Might change the variable name if it already exists in this function.
13888	// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
13889	// to use same name for variables.
13890	// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
13891	add_local_variable_name(arg.id);
13892
13893	arglist.push_back(argument_decl(arg));
13894
13895	// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
13896	auto *var = maybe_get<SPIRVariable>(arg.id);
13897	if (var)
13898	var->parameter = &arg;
13899	}
13900
13901	decl += merge(arglist);
13902	decl += ")";
13903	statement(decl);
13904	}
13905
13906	void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
13907	{
13908	// Avoid potential cycles.
13909	if (func.active)
13910	return;
13911	func.active = true;
13912
13913	// If we depend on a function, emit that function before we emit our own function.
13914	for (auto block : func.blocks)
13915	{
13916	auto &b = get<SPIRBlock>(block);
13917	for (auto &i : b.ops)
13918	{
13919	auto ops = stream(i);
13920	auto op = static_cast<Op>(i.op);
13921
13922	if (op == OpFunctionCall)
13923	{
13924	// Recursively emit functions which are called.
13925	uint32_t id = ops[`2`];
13926	emit_function(get<SPIRFunction>(id), ir.meta [ops[`1`]].decoration.decoration_flags);
13927	}
13928	}
13929	}
13930
13931	if (func.entry_line.file_id != `0`)
13932	emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal);
13933	emit_function_prototype(func, return_flags);
13934	begin_scope();
13935
13936	if (func.self == ir.default_entry_point)
13937	emit_entry_point_declarations();
13938
13939	current_function = &func;
13940	auto &entry_block = get<SPIRBlock>(func.entry_block);
13941
13942	sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
13943	for (auto &array : func.constant_arrays_needed_on_stack)
13944	{
13945	auto &c = get<SPIRConstant>(array);
13946	auto &type = get<SPIRType>(c.constant_type);
13947	statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
13948	}
13949
13950	for (auto &v : func.local_variables)
13951	{
13952	auto &var = get<SPIRVariable>(v);
13953	var.deferred_declaration = false;
13954
13955	if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup))
13956	{
13957	// Special variable type which cannot have initializer,
13958	// need to be declared as standalone variables.
13959	// Comes from MSL which can push global variables as local variables in main function.
13960	add_local_variable_name(var.self);
13961	statement(variable_decl(var), ";");
13962	var.deferred_declaration = false;
13963	}
13964	else if (var.storage == StorageClassPrivate)
13965	{
13966	// These variables will not have had their CFG usage analyzed, so move it to the entry block.
13967	// Comes from MSL which can push global variables as local variables in main function.
13968	// We could just declare them right now, but we would miss out on an important initialization case which is
13969	// LUT declaration in MSL.
13970	// If we don't declare the variable when it is assigned we're forced to go through a helper function
13971	// which copies elements one by one.
13972	add_local_variable_name(var.self);
13973
13974	if (var.initializer)
13975	{
13976	statement(variable_decl(var), ";");
13977	var.deferred_declaration = false;
13978	}
13979	else
13980	{
13981	auto &dominated = entry_block.dominated_variables;
13982	if (find(begin(dominated), end(dominated), var.self) == end(dominated))
13983	entry_block.dominated_variables.push_back(var.self);
13984	var.deferred_declaration = true;
13985	}
13986	}
13987	else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression)
13988	{
13989	// No need to declare this variable, it has a static expression.
13990	var.deferred_declaration = false;
13991	}
13992	else if (expression_is_lvalue(v))
13993	{
13994	add_local_variable_name(var.self);
13995
13996	// Loop variables should never be declared early, they are explicitly emitted in a loop.
13997	if (var.initializer && !var.loop_variable)
13998	statement(variable_decl_function_local(var), ";");
13999	else
14000	{
14001	// Don't declare variable until first use to declutter the GLSL output quite a lot.
14002	// If we don't touch the variable before first branch,
14003	// declare it then since we need variable declaration to be in top scope.
14004	var.deferred_declaration = true;
14005	}
14006	}
14007	else
14008	{
14009	// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
14010	// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
14011	// This means that when we OpStore to these variables, we just write in the expression ID directly.
14012	// This breaks any kind of branching, since the variable must be statically assigned.
14013	// Branching on samplers and images would be pretty much impossible to fake in GLSL.
14014	var.statically_assigned = true;
14015	}
14016
14017	var.loop_variable_enable = false;
14018
14019	// Loop variables are never declared outside their for-loop, so block any implicit declaration.
14020	if (var.loop_variable)
14021	var.deferred_declaration = false;
14022	}
14023
14024	// Enforce declaration order for regression testing purposes.
14025	for (auto &block_id : func.blocks)
14026	{
14027	auto &block = get<SPIRBlock>(block_id);
14028	sort(begin(block.dominated_variables), end(block.dominated_variables));
14029	}
14030
14031	for (auto &line : current_function->fixup_hooks_in)
14032	line ();
14033
14034	emit_block_chain(entry_block);
14035
14036	end_scope();
14037	processing_entry_point = false;
14038	statement("");
14039
14040	// Make sure deferred declaration state for local variables is cleared when we are done with function.
14041	// We risk declaring Private/Workgroup variables in places we are not supposed to otherwise.
14042	for (auto &v : func.local_variables)
14043	{
14044	auto &var = get<SPIRVariable>(v);
14045	var.deferred_declaration = false;
14046	}
14047	}
14048
14049	void CompilerGLSL::emit_fixup()
14050	{
14051	if (is_vertex_like_shader())
14052	{
14053	if (options.vertex.fixup_clipspace)
14054	{
14055	const char *suffix = backend.float_literal_suffix ? "f" : "";
14056	statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
14057	}
14058
14059	if (options.vertex.flip_vert_y)
14060	statement("gl_Position.y = -gl_Position.y;");
14061	}
14062	}
14063
14064	void CompilerGLSL::flush_phi(BlockID from, BlockID to)
14065	{
14066	auto &child = get<SPIRBlock>(to);
14067	if (child.ignore_phi_from_block == from)
14068	return;
14069
14070	unordered_set<uint32_t> temporary_phi_variables;
14071
14072	for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr)
14073	{
14074	auto &phi = *itr;
14075
14076	if (phi.parent == from)
14077	{
14078	auto &var = get<SPIRVariable>(phi.function_variable);
14079
14080	// A Phi variable might be a loop variable, so flush to static expression.
14081	if (var.loop_variable && !var.loop_variable_enable)
14082	var.static_expression = phi.local_variable;
14083	else
14084	{
14085	flush_variable_declaration(phi.function_variable);
14086
14087	// Check if we are going to write to a Phi variable that another statement will read from
14088	// as part of another Phi node in our target block.
14089	// For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads.
14090	// This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm.
14091	bool need_saved_temporary =
14092	find_if(itr + `1`, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool {
14093	return future_phi.local_variable == ID (phi.function_variable) && future_phi.parent == from;
14094	}) != end(child.phi_variables);
14095
14096	if (need_saved_temporary)
14097	{
14098	// Need to make sure we declare the phi variable with a copy at the right scope.
14099	// We cannot safely declare a temporary here since we might be inside a continue block.
14100	if (!var.allocate_temporary_copy)
14101	{
14102	var.allocate_temporary_copy = true;
14103	force_recompile();
14104	}
14105	statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";");
14106	temporary_phi_variables.insert(phi.function_variable);
14107	}
14108
14109	// This might be called in continue block, so make sure we
14110	// use this to emit ESSL 1.0 compliant increments/decrements.
14111	auto lhs = to_expression(phi.function_variable);
14112
14113	string rhs;
14114	if (temporary_phi_variables.count(phi.local_variable))
14115	rhs = join("_", phi.local_variable, "_copy");
14116	else
14117	rhs = to_pointer_expression(phi.local_variable);
14118
14119	if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
14120	statement(lhs, " = ", rhs, ";");
14121	}
14122
14123	register_write(phi.function_variable);
14124	}
14125	}
14126	}
14127
14128	void CompilerGLSL::branch_to_continue(BlockID from, BlockID to)
14129	{
14130	auto &to_block = get<SPIRBlock>(to);
14131	if (from == to)
14132	return;
14133
14134	assert(is_continue(to));
14135	if (to_block.complex_continue)
14136	{
14137	// Just emit the whole block chain as is.
14138	auto usage_counts = expression_usage_counts;
14139
14140	emit_block_chain(to_block);
14141
14142	// Expression usage counts are moot after returning from the continue block.
14143	expression_usage_counts = usage_counts;
14144	}
14145	else
14146	{
14147	auto &from_block = get<SPIRBlock>(from);
14148	bool outside_control_flow = false;
14149	uint32_t loop_dominator = `0`;
14150
14151	// FIXME: Refactor this to not use the old loop_dominator tracking.
14152	if (from_block.merge_block)
14153	{
14154	// If we are a loop header, we don't set the loop dominator,
14155	// so just use "self" here.
14156	loop_dominator = from;
14157	}
14158	else if (from_block.loop_dominator != BlockID (SPIRBlock::NoDominator))
14159	{
14160	loop_dominator = from_block.loop_dominator;
14161	}
14162
14163	if (loop_dominator != `0`)
14164	{
14165	auto &cfg = get_cfg_for_current_function();
14166
14167	// For non-complex continue blocks, we implicitly branch to the continue block
14168	// by having the continue block be part of the loop header in for (; ; continue-block).
14169	outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from);
14170	}
14171
14172	// Some simplification for for-loops. We always end up with a useless continue;
14173	// statement since we branch to a loop block.
14174	// Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block,
14175	// we can avoid writing out an explicit continue statement.
14176	// Similar optimization to return statements if we know we're outside flow control.
14177	if (!outside_control_flow)
14178	statement("continue;");
14179	}
14180	}
14181
14182	void CompilerGLSL::branch(BlockID from, BlockID to)
14183	{
14184	flush_phi(from, to);
14185	flush_control_dependent_expressions(from);
14186
14187	bool to_is_continue = is_continue(to);
14188
14189	// This is only a continue if we branch to our loop dominator.
14190	if ((ir.block_meta [to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != `0` && get<SPIRBlock>(from).loop_dominator == to)
14191	{
14192	// This can happen if we had a complex continue block which was emitted.
14193	// Once the continue block tries to branch to the loop header, just emit continue;
14194	// and end the chain here.
14195	statement("continue;");
14196	}
14197	else if (from != to && is_break(to))
14198	{
14199	// We cannot break to ourselves, so check explicitly for from != to.
14200	// This case can trigger if a loop header is all three of these things:
14201	// - Continue block
14202	// - Loop header
14203	// - Break merge target all at once ...
14204
14205	// Very dirty workaround.
14206	// Switch constructs are able to break, but they cannot break out of a loop at the same time.
14207	// Only sensible solution is to make a ladder variable, which we declare at the top of the switch block,
14208	// write to the ladder here, and defer the break.
14209	// The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case.
14210	if (current_emitting_switch && is_loop_break(to) &&
14211	current_emitting_switch->loop_dominator != BlockID (SPIRBlock::NoDominator) &&
14212	get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to)
14213	{
14214	if (!current_emitting_switch->need_ladder_break)
14215	{
14216	force_recompile();
14217	current_emitting_switch->need_ladder_break = true;
14218	}
14219
14220	statement("_", current_emitting_switch->self, "_ladder_break = true;");
14221	}
14222	statement("break;");
14223	}
14224	else if (to_is_continue \|\| from == to)
14225	{
14226	// For from == to case can happen for a do-while loop which branches into itself.
14227	// We don't mark these cases as continue blocks, but the only possible way to branch into
14228	// ourselves is through means of continue blocks.
14229
14230	// If we are merging to a continue block, there is no need to emit the block chain for continue here.
14231	// We can branch to the continue block after we merge execution.
14232
14233	// Here we make use of structured control flow rules from spec:
14234	// 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block
14235	// - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG
14236	// If we are branching to a merge block, we must be inside a construct which dominates the merge block.
14237	auto &block_meta = ir.block_meta [to];
14238	bool branching_to_merge =
14239	(block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT \| ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT \|
14240	ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != `0`;
14241	if (!to_is_continue \|\| !branching_to_merge)
14242	branch_to_continue(from, to);
14243	}
14244	else if (!is_conditional(to))
14245	emit_block_chain(get<SPIRBlock>(to));
14246
14247	// It is important that we check for break before continue.
14248	// A block might serve two purposes, a break block for the inner scope, and
14249	// a continue block in the outer scope.
14250	// Inner scope always takes precedence.
14251	}
14252
14253	void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block)
14254	{
14255	auto &from_block = get<SPIRBlock>(from);
14256	BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID (`0`);
14257
14258	// If we branch directly to our selection merge target, we don't need a code path.
14259	bool true_block_needs_code = true_block != merge_block \|\| flush_phi_required(from, true_block);
14260	bool false_block_needs_code = false_block != merge_block \|\| flush_phi_required(from, false_block);
14261
14262	if (!true_block_needs_code && !false_block_needs_code)
14263	return;
14264
14265	// We might have a loop merge here. Only consider selection flattening constructs.
14266	// Loop hints are handled explicitly elsewhere.
14267	if (from_block.hint == SPIRBlock::HintFlatten \|\| from_block.hint == SPIRBlock::HintDontFlatten)
14268	emit_block_hints(from_block);
14269
14270	if (true_block_needs_code)
14271	{
14272	statement("if (", to_expression(cond), ")");
14273	begin_scope();
14274	branch(from, true_block);
14275	end_scope();
14276
14277	if (false_block_needs_code)
14278	{
14279	statement("else");
14280	begin_scope();
14281	branch(from, false_block);
14282	end_scope();
14283	}
14284	}
14285	else if (false_block_needs_code)
14286	{
14287	// Only need false path, use negative conditional.
14288	statement("if (!", to_enclosed_expression(cond), ")");
14289	begin_scope();
14290	branch(from, false_block);
14291	end_scope();
14292	}
14293	}
14294
14295	// FIXME: This currently cannot handle complex continue blocks
14296	// as in do-while.
14297	// This should be seen as a "trivial" continue block.
14298	string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block)
14299	{
14300	auto *block = &get<SPIRBlock>(continue_block);
14301
14302	// While emitting the continue block, declare_temporary will check this
14303	// if we have to emit temporaries.
14304	current_continue_block = block;
14305
14306	SmallVector<string> statements;
14307
14308	// Capture all statements into our list.
14309	auto *old = redirect_statement;
14310	redirect_statement = &statements;
14311
14312	// Stamp out all blocks one after each other.
14313	while ((ir.block_meta [block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == `0`)
14314	{
14315	// Write out all instructions we have in this block.
14316	emit_block_instructions(*block);
14317
14318	// For plain branchless for/while continue blocks.
14319	if (block->next_block)
14320	{
14321	flush_phi(continue_block, block->next_block);
14322	block = &get<SPIRBlock>(block->next_block);
14323	}
14324	// For do while blocks. The last block will be a select block.
14325	else if (block->true_block && follow_true_block)
14326	{
14327	flush_phi(continue_block, block->true_block);
14328	block = &get<SPIRBlock>(block->true_block);
14329	}
14330	else if (block->false_block && follow_false_block)
14331	{
14332	flush_phi(continue_block, block->false_block);
14333	block = &get<SPIRBlock>(block->false_block);
14334	}
14335	else
14336	{
14337	SPIRV_CROSS_THROW("Invalid continue block detected!");
14338	}
14339	}
14340
14341	// Restore old pointer.
14342	redirect_statement = old;
14343
14344	// Somewhat ugly, strip off the last ';' since we use ',' instead.
14345	// Ideally, we should select this behavior in statement().
14346	for (auto &s : statements)
14347	{
14348	if (!s.empty() && s.back() == `';'`)
14349	s.erase(s.size() - `1`, `1`);
14350	}
14351
14352	current_continue_block = nullptr;
14353	return merge(statements);
14354	}
14355
14356	void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block)
14357	{
14358	// While loops do not take initializers, so declare all of them outside.
14359	for (auto &loop_var : block.loop_variables)
14360	{
14361	auto &var = get<SPIRVariable>(loop_var);
14362	statement(variable_decl(var), ";");
14363	}
14364	}
14365
14366	string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
14367	{
14368	if (block.loop_variables.empty())
14369	return "";
14370
14371	bool same_types = for_loop_initializers_are_same_type(block);
14372	// We can only declare for loop initializers if all variables are of same type.
14373	// If we cannot do this, declare individual variables before the loop header.
14374
14375	// We might have a loop variable candidate which was not assigned to for some reason.
14376	uint32_t missing_initializers = `0`;
14377	for (auto &variable : block.loop_variables)
14378	{
14379	uint32_t expr = get<SPIRVariable>(variable).static_expression;
14380
14381	// Sometimes loop variables are initialized with OpUndef, but we can just declare
14382	// a plain variable without initializer in this case.
14383	if (expr == `0` \|\| ir.ids [expr].get_type() == TypeUndef)
14384	missing_initializers++;
14385	}
14386
14387	if (block.loop_variables.size() == `1` && missing_initializers == `0`)
14388	{
14389	return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
14390	}
14391	else if (!same_types \|\| missing_initializers == uint32_t(block.loop_variables.size()))
14392	{
14393	for (auto &loop_var : block.loop_variables)
14394	statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14395	return "";
14396	}
14397	else
14398	{
14399	// We have a mix of loop variables, either ones with a clear initializer, or ones without.
14400	// Separate the two streams.
14401	string expr;
14402
14403	for (auto &loop_var : block.loop_variables)
14404	{
14405	uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
14406	if (static_expr == `0` \|\| ir.ids [static_expr].get_type() == TypeUndef)
14407	{
14408	statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
14409	}
14410	else
14411	{
14412	auto &var = get<SPIRVariable>(loop_var);
14413	auto &type = get_variable_data_type(var);
14414	if (expr.empty())
14415	{
14416	// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
14417	expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
14418	}
14419	else
14420	{
14421	expr += ", ";
14422	// In MSL, being based on C++, the asterisk marking a pointer
14423	// binds to the identifier, not the type.
14424	if (type.pointer)
14425	expr += "* ";
14426	}
14427
14428	expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression));
14429	}
14430	}
14431	return expr;
14432	}
14433	}
14434
14435	bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
14436	{
14437	if (block.loop_variables.size() <= `1`)
14438	return true;
14439
14440	uint32_t expected = `0`;
14441	Bitset expected_flags;
14442	for (auto &var : block.loop_variables)
14443	{
14444	// Don't care about uninitialized variables as they will not be part of the initializers.
14445	uint32_t expr = get<SPIRVariable>(var).static_expression;
14446	if (expr == `0` \|\| ir.ids [expr].get_type() == TypeUndef)
14447	continue;
14448
14449	if (expected == `0`)
14450	{
14451	expected = get<SPIRVariable>(var).basetype;
14452	expected_flags = get_decoration_bitset(var);
14453	}
14454	else if (expected != get<SPIRVariable>(var).basetype)
14455	return false;
14456
14457	// Precision flags and things like that must also match.
14458	if (expected_flags != get_decoration_bitset(var))
14459	return false;
14460	}
14461
14462	return true;
14463	}
14464
14465	bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
14466	{
14467	SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14468
14469	if (method == SPIRBlock::MergeToSelectForLoop \|\| method == SPIRBlock::MergeToSelectContinueForLoop)
14470	{
14471	uint32_t current_count = statement_count;
14472	// If we're trying to create a true for loop,
14473	// we need to make sure that all opcodes before branch statement do not actually emit any code.
14474	// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14475	emit_block_instructions(block);
14476
14477	bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
14478
14479	// This can work! We only did trivial things which could be forwarded in block body!
14480	if (current_count == statement_count && condition_is_temporary)
14481	{
14482	switch (continue_type)
14483	{
14484	case SPIRBlock::ForLoop:
14485	{
14486	// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14487	flush_undeclared_variables(block);
14488
14489	// Important that we do this in this order because
14490	// emitting the continue block can invalidate the condition expression.
14491	auto initializer = emit_for_loop_initializers(block);
14492	auto condition = to_expression(block.condition);
14493
14494	// Condition might have to be inverted.
14495	if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14496	condition = join("!", enclose_expression(condition));
14497
14498	emit_block_hints(block);
14499	if (method != SPIRBlock::MergeToSelectContinueForLoop)
14500	{
14501	auto continue_block = emit_continue_block(block.continue_block, false, false);
14502	statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14503	}
14504	else
14505	statement("for (", initializer, "; ", condition, "; )");
14506	break;
14507	}
14508
14509	case SPIRBlock::WhileLoop:
14510	{
14511	// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
14512	flush_undeclared_variables(block);
14513	emit_while_loop_initializers(block);
14514	emit_block_hints(block);
14515
14516	auto condition = to_expression(block.condition);
14517	// Condition might have to be inverted.
14518	if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14519	condition = join("!", enclose_expression(condition));
14520
14521	statement("while (", condition, ")");
14522	break;
14523	}
14524
14525	default:
14526	block.disable_block_optimization = true;
14527	force_recompile();
14528	begin_scope(); // We'll see an end_scope() later.
14529	return false;
14530	}
14531
14532	begin_scope();
14533	return true;
14534	}
14535	else
14536	{
14537	block.disable_block_optimization = true;
14538	force_recompile();
14539	begin_scope(); // We'll see an end_scope() later.
14540	return false;
14541	}
14542	}
14543	else if (method == SPIRBlock::MergeToDirectForLoop)
14544	{
14545	auto &child = get<SPIRBlock>(block.next_block);
14546
14547	// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
14548	flush_undeclared_variables(child);
14549
14550	uint32_t current_count = statement_count;
14551
14552	// If we're trying to create a true for loop,
14553	// we need to make sure that all opcodes before branch statement do not actually emit any code.
14554	// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
14555	emit_block_instructions(child);
14556
14557	bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
14558
14559	if (current_count == statement_count && condition_is_temporary)
14560	{
14561	uint32_t target_block = child.true_block;
14562
14563	switch (continue_type)
14564	{
14565	case SPIRBlock::ForLoop:
14566	{
14567	// Important that we do this in this order because
14568	// emitting the continue block can invalidate the condition expression.
14569	auto initializer = emit_for_loop_initializers(block);
14570	auto condition = to_expression(child.condition);
14571
14572	// Condition might have to be inverted.
14573	if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14574	{
14575	condition = join("!", enclose_expression(condition));
14576	target_block = child.false_block;
14577	}
14578
14579	auto continue_block = emit_continue_block(block.continue_block, false, false);
14580	emit_block_hints(block);
14581	statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
14582	break;
14583	}
14584
14585	case SPIRBlock::WhileLoop:
14586	{
14587	emit_while_loop_initializers(block);
14588	emit_block_hints(block);
14589
14590	auto condition = to_expression(child.condition);
14591	// Condition might have to be inverted.
14592	if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block)))
14593	{
14594	condition = join("!", enclose_expression(condition));
14595	target_block = child.false_block;
14596	}
14597
14598	statement("while (", condition, ")");
14599	break;
14600	}
14601
14602	default:
14603	block.disable_block_optimization = true;
14604	force_recompile();
14605	begin_scope(); // We'll see an end_scope() later.
14606	return false;
14607	}
14608
14609	begin_scope();
14610	branch(child.self, target_block);
14611	return true;
14612	}
14613	else
14614	{
14615	block.disable_block_optimization = true;
14616	force_recompile();
14617	begin_scope(); // We'll see an end_scope() later.
14618	return false;
14619	}
14620	}
14621	else
14622	return false;
14623	}
14624
14625	void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
14626	{
14627	for (auto &v : block.dominated_variables)
14628	flush_variable_declaration(v);
14629	}
14630
14631	void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries)
14632	{
14633	// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
14634	// Need to sort these to ensure that reference output is stable.
14635	sort(begin(temporaries), end(temporaries),
14636	[](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; });
14637
14638	for (auto &tmp : temporaries)
14639	{
14640	add_local_variable_name(tmp.second);
14641	auto &flags = ir.meta [tmp.second].decoration.decoration_flags;
14642	auto &type = get<SPIRType>(tmp.first);
14643
14644	// Not all targets support pointer literals, so don't bother with that case.
14645	string initializer;
14646	if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
14647	initializer = join(" = ", to_zero_initialized_expression(tmp.first));
14648
14649	statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";");
14650
14651	hoisted_temporaries.insert(tmp.second);
14652	forced_temporaries.insert(tmp.second);
14653
14654	// The temporary might be read from before it's assigned, set up the expression now.
14655	set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
14656	}
14657	}
14658
14659	void CompilerGLSL::emit_block_chain(SPIRBlock &block)
14660	{
14661	bool select_branch_to_true_block = false;
14662	bool select_branch_to_false_block = false;
14663	bool skip_direct_branch = false;
14664	bool emitted_loop_header_variables = false;
14665	bool force_complex_continue_block = false;
14666	ValueSaver<uint32_t> loop_level_saver(current_loop_level);
14667
14668	if (block.merge == SPIRBlock::MergeLoop)
14669	add_loop_level();
14670
14671	emit_hoisted_temporaries(block.declare_temporary);
14672
14673	SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
14674	if (block.continue_block)
14675	{
14676	continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
14677	// If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles.
14678	if (continue_type == SPIRBlock::ComplexLoop)
14679	block.complex_continue = true;
14680	}
14681
14682	// If we have loop variables, stop masking out access to the variable now.
14683	for (auto var_id : block.loop_variables)
14684	{
14685	auto &var = get<SPIRVariable>(var_id);
14686	var.loop_variable_enable = true;
14687	// We're not going to declare the variable directly, so emit a copy here.
14688	emit_variable_temporary_copies(var);
14689	}
14690
14691	// Remember deferred declaration state. We will restore it before returning.
14692	SmallVector<bool, `64`> rearm_dominated_variables(block.dominated_variables.size());
14693	for (size_t i = `0`; i < block.dominated_variables.size(); i++)
14694	{
14695	uint32_t var_id = block.dominated_variables [i];
14696	auto &var = get<SPIRVariable>(var_id);
14697	rearm_dominated_variables [i] = var.deferred_declaration;
14698	}
14699
14700	// This is the method often used by spirv-opt to implement loops.
14701	// The loop header goes straight into the continue block.
14702	// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
14703	// it MUST* be used in the continue block. This loop method will not work.*
14704	if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
14705	{
14706	flush_undeclared_variables(block);
14707	if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
14708	{
14709	if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14710	select_branch_to_false_block = true;
14711	else
14712	select_branch_to_true_block = true;
14713
14714	emitted_loop_header_variables = true;
14715	force_complex_continue_block = true;
14716	}
14717	}
14718	// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
14719	else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
14720	{
14721	flush_undeclared_variables(block);
14722	if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
14723	{
14724	// The body of while, is actually just the true (or false) block, so always branch there unconditionally.
14725	if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block)))
14726	select_branch_to_false_block = true;
14727	else
14728	select_branch_to_true_block = true;
14729
14730	emitted_loop_header_variables = true;
14731	}
14732	}
14733	// This is the newer loop behavior in glslang which branches from Loop header directly to
14734	// a new block, which in turn has a OpBranchSelection without a selection merge.
14735	else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
14736	{
14737	flush_undeclared_variables(block);
14738	if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
14739	{
14740	skip_direct_branch = true;
14741	emitted_loop_header_variables = true;
14742	}
14743	}
14744	else if (continue_type == SPIRBlock::DoWhileLoop)
14745	{
14746	flush_undeclared_variables(block);
14747	emit_while_loop_initializers(block);
14748	emitted_loop_header_variables = true;
14749	// We have some temporaries where the loop header is the dominator.
14750	// We risk a case where we have code like:
14751	// for (;;) { create-temporary; break; } consume-temporary;
14752	// so force-declare temporaries here.
14753	emit_hoisted_temporaries(block.potential_declare_temporary);
14754	statement("do");
14755	begin_scope();
14756
14757	emit_block_instructions(block);
14758	}
14759	else if (block.merge == SPIRBlock::MergeLoop)
14760	{
14761	flush_undeclared_variables(block);
14762	emit_while_loop_initializers(block);
14763	emitted_loop_header_variables = true;
14764
14765	// We have a generic loop without any distinguishable pattern like for, while or do while.
14766	get<SPIRBlock>(block.continue_block).complex_continue = true;
14767	continue_type = SPIRBlock::ComplexLoop;
14768
14769	// We have some temporaries where the loop header is the dominator.
14770	// We risk a case where we have code like:
14771	// for (;;) { create-temporary; break; } consume-temporary;
14772	// so force-declare temporaries here.
14773	emit_hoisted_temporaries(block.potential_declare_temporary);
14774	emit_block_hints(block);
14775	statement("for (;;)");
14776	begin_scope();
14777
14778	emit_block_instructions(block);
14779	}
14780	else
14781	{
14782	emit_block_instructions(block);
14783	}
14784
14785	// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
14786	// as writes to said loop variables might have been masked out, we need a recompile.
14787	if (!emitted_loop_header_variables && !block.loop_variables.empty())
14788	{
14789	force_recompile_guarantee_forward_progress();
14790	for (auto var : block.loop_variables)
14791	get<SPIRVariable>(var).loop_variable = false;
14792	block.loop_variables.clear();
14793	}
14794
14795	flush_undeclared_variables(block);
14796	bool emit_next_block = true;
14797
14798	// Handle end of block.
14799	switch (block.terminator)
14800	{
14801	case SPIRBlock::Direct:
14802	// True when emitting complex continue block.
14803	if (block.loop_dominator == block.next_block)
14804	{
14805	branch(block.self, block.next_block);
14806	emit_next_block = false;
14807	}
14808	// True if MergeToDirectForLoop succeeded.
14809	else if (skip_direct_branch)
14810	emit_next_block = false;
14811	else if (is_continue(block.next_block) \|\| is_break(block.next_block) \|\| is_conditional(block.next_block))
14812	{
14813	branch(block.self, block.next_block);
14814	emit_next_block = false;
14815	}
14816	break;
14817
14818	case SPIRBlock::Select:
14819	// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
14820	if (select_branch_to_true_block)
14821	{
14822	if (force_complex_continue_block)
14823	{
14824	assert(block.true_block == block.continue_block);
14825
14826	// We're going to emit a continue block directly here, so make sure it's marked as complex.
14827	auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14828	bool old_complex = complex_continue;
14829	complex_continue = true;
14830	branch(block.self, block.true_block);
14831	complex_continue = old_complex;
14832	}
14833	else
14834	branch(block.self, block.true_block);
14835	}
14836	else if (select_branch_to_false_block)
14837	{
14838	if (force_complex_continue_block)
14839	{
14840	assert(block.false_block == block.continue_block);
14841
14842	// We're going to emit a continue block directly here, so make sure it's marked as complex.
14843	auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
14844	bool old_complex = complex_continue;
14845	complex_continue = true;
14846	branch(block.self, block.false_block);
14847	complex_continue = old_complex;
14848	}
14849	else
14850	branch(block.self, block.false_block);
14851	}
14852	else
14853	branch(block.self, block.condition, block.true_block, block.false_block);
14854	break;
14855
14856	case SPIRBlock::MultiSelect:
14857	{
14858	auto &type = expression_type(block.condition);
14859	bool unsigned_case = type.basetype == SPIRType::UInt \|\| type.basetype == SPIRType::UShort \|\|
14860	type.basetype == SPIRType::UByte \|\| type.basetype == SPIRType::UInt64;
14861
14862	if (block.merge == SPIRBlock::MergeNone)
14863	SPIRV_CROSS_THROW("Switch statement is not structured");
14864
14865	if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 \|\| type.basetype == SPIRType::Int64))
14866	{
14867	// SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages.
14868	SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors.");
14869	}
14870
14871	const char *label_suffix = "";
14872	if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix)
14873	label_suffix = "u";
14874	else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch)
14875	label_suffix = "l";
14876	else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch)
14877	label_suffix = "ul";
14878	else if (type.basetype == SPIRType::UShort)
14879	label_suffix = backend.uint16_t_literal_suffix;
14880	else if (type.basetype == SPIRType::Short)
14881	label_suffix = backend.int16_t_literal_suffix;
14882
14883	SPIRBlock *old_emitting_switch = current_emitting_switch;
14884	current_emitting_switch = &block;
14885
14886	if (block.need_ladder_break)
14887	statement("bool _", block.self, "_ladder_break = false;");
14888
14889	// Find all unique case constructs.
14890	unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs;
14891	SmallVector<uint32_t> block_declaration_order;
14892	SmallVector<uint64_t> literals_to_merge;
14893
14894	// If a switch case branches to the default block for some reason, we can just remove that literal from consideration
14895	// and let the default: block handle it.
14896	// 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here.
14897	// We only need to consider possible fallthrough if order[i] branches to order[i + 1].
14898	auto &cases = get_case_list(block);
14899	for (auto &c : cases)
14900	{
14901	if (c.block != block.next_block && c.block != block.default_block)
14902	{
14903	if (!case_constructs.count(c.block))
14904	block_declaration_order.push_back(c.block);
14905	case_constructs [c.block].push_back(c.value);
14906	}
14907	else if (c.block == block.next_block && block.default_block != block.next_block)
14908	{
14909	// We might have to flush phi inside specific case labels.
14910	// If we can piggyback on default:, do so instead.
14911	literals_to_merge.push_back(c.value);
14912	}
14913	}
14914
14915	// Empty literal array -> default.
14916	if (block.default_block != block.next_block)
14917	{
14918	auto &default_block = get<SPIRBlock>(block.default_block);
14919
14920	// We need to slide in the default block somewhere in this chain
14921	// if there are fall-through scenarios since the default is declared separately in OpSwitch.
14922	// Only consider trivial fall-through cases here.
14923	size_t num_blocks = block_declaration_order.size();
14924	bool injected_block = false;
14925
14926	for (size_t i = `0`; i < num_blocks; i++)
14927	{
14928	auto &case_block = get<SPIRBlock>(block_declaration_order [i]);
14929	if (execution_is_direct_branch(case_block, default_block))
14930	{
14931	// Fallthrough to default block, we must inject the default block here.
14932	block_declaration_order.insert(begin(block_declaration_order) + i + `1`, block.default_block);
14933	injected_block = true;
14934	break;
14935	}
14936	else if (execution_is_direct_branch(default_block, case_block))
14937	{
14938	// Default case is falling through to another case label, we must inject the default block here.
14939	block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block);
14940	injected_block = true;
14941	break;
14942	}
14943	}
14944
14945	// Order does not matter.
14946	if (!injected_block)
14947	block_declaration_order.push_back(block.default_block);
14948	else if (is_legacy_es())
14949	SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0.");
14950
14951	case_constructs [block.default_block] = {};
14952	}
14953
14954	size_t num_blocks = block_declaration_order.size();
14955
14956	const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string
14957	{
14958	if (is_unsigned_case)
14959	return convert_to_string(literal);
14960
14961	// For smaller cases, the literals are compiled as 32 bit wide
14962	// literals so we don't need to care for all sizes specifically.
14963	if (width <= `32`)
14964	{
14965	return convert_to_string(int64_t(int32_t(literal)));
14966	}
14967
14968	return convert_to_string(int64_t(literal));
14969	};
14970
14971	const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels,
14972	const char *suffix) -> string {
14973	string ret;
14974	size_t count = labels.size();
14975	for (size_t i = `0`; i < count; i++)
14976	{
14977	if (i)
14978	ret += " \|\| ";
14979	ret += join(count > `1` ? "(" : "", to_enclosed_expression(condition), " == ", labels [i], suffix,
14980	count > `1` ? ")" : "");
14981	}
14982	return ret;
14983	};
14984
14985	// We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture,
14986	// we need to flush phi nodes outside the switch block in a branch,
14987	// and skip any Phi handling inside the case label to make fall-through work as expected.
14988	// This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this
14989	// inside the case label if at all possible.
14990	for (size_t i = `1`; backend.support_case_fallthrough && i < num_blocks; i++)
14991	{
14992	if (flush_phi_required(block.self, block_declaration_order [i]) &&
14993	flush_phi_required(block_declaration_order [i - `1`], block_declaration_order [i]))
14994	{
14995	uint32_t target_block = block_declaration_order [i];
14996
14997	// Make sure we flush Phi, it might have been marked to be ignored earlier.
14998	get<SPIRBlock>(target_block).ignore_phi_from_block = `0`;
14999
15000	auto &literals = case_constructs [target_block];
15001
15002	if (literals.empty())
15003	{
15004	// Oh boy, gotta make a complete negative test instead! o.o
15005	// Find all possible literals that would not* make us enter the default block.*
15006	// If none of those literals match, we flush Phi ...
15007	SmallVector<string> conditions;
15008	for (size_t j = `0`; j < num_blocks; j++)
15009	{
15010	auto &negative_literals = case_constructs [block_declaration_order [j]];
15011	for (auto &case_label : negative_literals)
15012	conditions.push_back(join(to_enclosed_expression(block.condition),
15013	" != ", to_case_label (case_label, type.width, unsigned_case)));
15014	}
15015
15016	statement("if (", merge(conditions, " && "), ")");
15017	begin_scope();
15018	flush_phi(block.self, target_block);
15019	end_scope();
15020	}
15021	else
15022	{
15023	SmallVector<string> conditions;
15024	conditions.reserve(literals.size());
15025	for (auto &case_label : literals)
15026	conditions.push_back(join(to_enclosed_expression(block.condition),
15027	" == ", to_case_label (case_label, type.width, unsigned_case)));
15028	statement("if (", merge(conditions, " \|\| "), ")");
15029	begin_scope();
15030	flush_phi(block.self, target_block);
15031	end_scope();
15032	}
15033
15034	// Mark the block so that we don't flush Phi from header to case label.
15035	get<SPIRBlock>(target_block).ignore_phi_from_block = block.self;
15036	}
15037	}
15038
15039	// If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate
15040	// non-structured exits with the help of a switch block.
15041	// This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic.
15042	bool degenerate_switch = block.default_block != block.merge_block && cases.empty();
15043
15044	if (degenerate_switch \|\| is_legacy_es())
15045	{
15046	// ESSL 1.0 is not guaranteed to support do/while.
15047	if (is_legacy_es())
15048	{
15049	uint32_t counter = statement_count;
15050	statement("for (int spvDummy", counter, " = 0; spvDummy", counter,
15051	" < 1; spvDummy", counter, "++)");
15052	}
15053	else
15054	statement("do");
15055	}
15056	else
15057	{
15058	emit_block_hints(block);
15059	statement("switch (", to_unpacked_expression(block.condition), ")");
15060	}
15061	begin_scope();
15062
15063	for (size_t i = `0`; i < num_blocks; i++)
15064	{
15065	uint32_t target_block = block_declaration_order [i];
15066	auto &literals = case_constructs [target_block];
15067
15068	if (literals.empty())
15069	{
15070	// Default case.
15071	if (!degenerate_switch)
15072	{
15073	if (is_legacy_es())
15074	statement("else");
15075	else
15076	statement("default:");
15077	}
15078	}
15079	else
15080	{
15081	if (is_legacy_es())
15082	{
15083	statement((i ? "else " : ""), "if (", to_legacy_case_label (block.condition, literals, label_suffix),
15084	")");
15085	}
15086	else
15087	{
15088	for (auto &case_literal : literals)
15089	{
15090	// The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here.
15091	statement("case ", to_case_label (case_literal, type.width, unsigned_case), label_suffix, ":");
15092	}
15093	}
15094	}
15095
15096	auto &case_block = get<SPIRBlock>(target_block);
15097	if (backend.support_case_fallthrough && i + `1` < num_blocks &&
15098	execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order [i + `1`])))
15099	{
15100	// We will fall through here, so just terminate the block chain early.
15101	// We still need to deal with Phi potentially.
15102	// No need for a stack-like thing here since we only do fall-through when there is a
15103	// single trivial branch to fall-through target..
15104	current_emitting_switch_fallthrough = true;
15105	}
15106	else
15107	current_emitting_switch_fallthrough = false;
15108
15109	if (!degenerate_switch)
15110	begin_scope();
15111	branch(block.self, target_block);
15112	if (!degenerate_switch)
15113	end_scope();
15114
15115	current_emitting_switch_fallthrough = false;
15116	}
15117
15118	// Might still have to flush phi variables if we branch from loop header directly to merge target.
15119	// This is supposed to emit all cases where we branch from header to merge block directly.
15120	// There are two main scenarios where cannot rely on default fallthrough.
15121	// - There is an explicit default: label already.
15122	// In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block.
15123	// - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there.
15124	bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block);
15125	bool need_fallthrough_block = block.default_block == block.next_block \|\| !literals_to_merge.empty();
15126	if ((header_merge_requires_phi && need_fallthrough_block) \|\| !literals_to_merge.empty())
15127	{
15128	for (auto &case_literal : literals_to_merge)
15129	statement("case ", to_case_label (case_literal, type.width, unsigned_case), label_suffix, ":");
15130
15131	if (block.default_block == block.next_block)
15132	{
15133	if (is_legacy_es())
15134	statement("else");
15135	else
15136	statement("default:");
15137	}
15138
15139	begin_scope();
15140	flush_phi(block.self, block.next_block);
15141	statement("break;");
15142	end_scope();
15143	}
15144
15145	if (degenerate_switch && !is_legacy_es())
15146	end_scope_decl("while(false)");
15147	else
15148	end_scope();
15149
15150	if (block.need_ladder_break)
15151	{
15152	statement("if (_", block.self, "_ladder_break)");
15153	begin_scope();
15154	statement("break;");
15155	end_scope();
15156	}
15157
15158	current_emitting_switch = old_emitting_switch;
15159	break;
15160	}
15161
15162	case SPIRBlock::Return:
15163	{
15164	for (auto &line : current_function->fixup_hooks_out)
15165	line ();
15166
15167	if (processing_entry_point)
15168	emit_fixup();
15169
15170	auto &cfg = get_cfg_for_current_function();
15171
15172	if (block.return_value)
15173	{
15174	auto &type = expression_type(block.return_value);
15175	if (!type.array.empty() && !backend.can_return_array)
15176	{
15177	// If we cannot return arrays, we will have a special out argument we can write to instead.
15178	// The backend is responsible for setting this up, and redirection the return values as appropriate.
15179	if (ir.ids [block.return_value].get_type() != TypeUndef)
15180	{
15181	emit_array_copy("spvReturnValue", `0`, block.return_value, StorageClassFunction,
15182	get_expression_effective_storage_class(block.return_value));
15183	}
15184
15185	if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) \|\|
15186	block.loop_dominator != BlockID (SPIRBlock::NoDominator))
15187	{
15188	statement("return;");
15189	}
15190	}
15191	else
15192	{
15193	// OpReturnValue can return Undef, so don't emit anything for this case.
15194	if (ir.ids [block.return_value].get_type() != TypeUndef)
15195	statement("return ", to_unpacked_expression(block.return_value), ";");
15196	}
15197	}
15198	else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) \|\|
15199	block.loop_dominator != BlockID (SPIRBlock::NoDominator))
15200	{
15201	// If this block is the very final block and not called from control flow,
15202	// we do not need an explicit return which looks out of place. Just end the function here.
15203	// In the very weird case of for(;;) { return; } executing return is unconditional,
15204	// but we actually need a return here ...
15205	statement("return;");
15206	}
15207	break;
15208	}
15209
15210	// If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement.
15211	case SPIRBlock::Kill:
15212	statement(backend.discard_literal, ";");
15213	if (block.return_value)
15214	statement("return ", to_unpacked_expression(block.return_value), ";");
15215	break;
15216
15217	case SPIRBlock::Unreachable:
15218	emit_next_block = false;
15219	break;
15220
15221	case SPIRBlock::IgnoreIntersection:
15222	statement("ignoreIntersectionEXT;");
15223	break;
15224
15225	case SPIRBlock::TerminateRay:
15226	statement("terminateRayEXT;");
15227	break;
15228
15229	default:
15230	SPIRV_CROSS_THROW("Unimplemented block terminator.");
15231	}
15232
15233	if (block.next_block && emit_next_block)
15234	{
15235	// If we hit this case, we're dealing with an unconditional branch, which means we will output
15236	// that block after this. If we had selection merge, we already flushed phi variables.
15237	if (block.merge != SPIRBlock::MergeSelection)
15238	{
15239	flush_phi(block.self, block.next_block);
15240	// For a direct branch, need to remember to invalidate expressions in the next linear block instead.
15241	get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions;
15242	}
15243
15244	// For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi.
15245	if (!current_emitting_switch_fallthrough)
15246	{
15247	// For merge selects we might have ignored the fact that a merge target
15248	// could have been a break; or continue;
15249	// We will need to deal with it here.
15250	if (is_loop_break(block.next_block))
15251	{
15252	// Cannot check for just break, because switch statements will also use break.
15253	assert(block.merge == SPIRBlock::MergeSelection);
15254	statement("break;");
15255	}
15256	else if (is_continue(block.next_block))
15257	{
15258	assert(block.merge == SPIRBlock::MergeSelection);
15259	branch_to_continue(block.self, block.next_block);
15260	}
15261	else if (BlockID (block.self) != block.next_block)
15262	emit_block_chain(get<SPIRBlock>(block.next_block));
15263	}
15264	}
15265
15266	if (block.merge == SPIRBlock::MergeLoop)
15267	{
15268	if (continue_type == SPIRBlock::DoWhileLoop)
15269	{
15270	// Make sure that we run the continue block to get the expressions set, but this
15271	// should become an empty string.
15272	// We have no fallbacks if we cannot forward everything to temporaries ...
15273	const auto &continue_block = get<SPIRBlock>(block.continue_block);
15274	bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block),
15275	get<SPIRBlock>(continue_block.loop_dominator));
15276
15277	uint32_t current_count = statement_count;
15278	auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test);
15279	if (statement_count != current_count)
15280	{
15281	// The DoWhile block has side effects, force ComplexLoop pattern next pass.
15282	get<SPIRBlock>(block.continue_block).complex_continue = true;
15283	force_recompile();
15284	}
15285
15286	// Might have to invert the do-while test here.
15287	auto condition = to_expression(continue_block.condition);
15288	if (!positive_test)
15289	condition = join("!", enclose_expression(condition));
15290
15291	end_scope_decl(join("while (", condition, ")"));
15292	}
15293	else
15294	end_scope();
15295
15296	loop_level_saver.release();
15297
15298	// We cannot break out of two loops at once, so don't check for break; here.
15299	// Using block.self as the "from" block isn't quite right, but it has the same scope
15300	// and dominance structure, so it's fine.
15301	if (is_continue(block.merge_block))
15302	branch_to_continue(block.self, block.merge_block);
15303	else
15304	emit_block_chain(get<SPIRBlock>(block.merge_block));
15305	}
15306
15307	// Forget about control dependent expressions now.
15308	block.invalidate_expressions.clear();
15309
15310	// After we return, we must be out of scope, so if we somehow have to re-emit this function,
15311	// re-declare variables if necessary.
15312	assert(rearm_dominated_variables.size() == block.dominated_variables.size());
15313	for (size_t i = `0`; i < block.dominated_variables.size(); i++)
15314	{
15315	uint32_t var = block.dominated_variables [i];
15316	get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables [i];
15317	}
15318
15319	// Just like for deferred declaration, we need to forget about loop variable enable
15320	// if our block chain is reinstantiated later.
15321	for (auto &var_id : block.loop_variables)
15322	get<SPIRVariable>(var_id).loop_variable_enable = false;
15323	}
15324
15325	void CompilerGLSL::begin_scope()
15326	{
15327	statement("{");
15328	indent++;
15329	}
15330
15331	void CompilerGLSL::end_scope()
15332	{
15333	if (!indent)
15334	SPIRV_CROSS_THROW("Popping empty indent stack.");
15335	indent--;
15336	statement("}");
15337	}
15338
15339	void CompilerGLSL::end_scope(const string &trailer)
15340	{
15341	if (!indent)
15342	SPIRV_CROSS_THROW("Popping empty indent stack.");
15343	indent--;
15344	statement("}", trailer);
15345	}
15346
15347	void CompilerGLSL::end_scope_decl()
15348	{
15349	if (!indent)
15350	SPIRV_CROSS_THROW("Popping empty indent stack.");
15351	indent--;
15352	statement("};");
15353	}
15354
15355	void CompilerGLSL::end_scope_decl(const string &decl)
15356	{
15357	if (!indent)
15358	SPIRV_CROSS_THROW("Popping empty indent stack.");
15359	indent--;
15360	statement("} ", decl, ";");
15361	}
15362
15363	void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
15364	{
15365	// If our variable is remapped, and we rely on type-remapping information as
15366	// well, then we cannot pass the variable as a function parameter.
15367	// Fixing this is non-trivial without stamping out variants of the same function,
15368	// so for now warn about this and suggest workarounds instead.
15369	for (uint32_t i = `0`; i < length; i++)
15370	{
15371	auto *var = maybe_get<SPIRVariable>(args[i]);
15372	if (!var \|\| !var->remapped_variable)
15373	continue;
15374
15375	auto &type = get<SPIRType>(var->basetype);
15376	if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
15377	{
15378	SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
15379	"This will not work correctly because type-remapping information is lost. "
15380	"To workaround, please consider not passing the subpass input as a function parameter, "
15381	"or use in/out variables instead which do not need type remapping information.");
15382	}
15383	}
15384	}
15385
15386	const Instruction CompilerGLSL::get_next_instruction_in_block(const* Instruction &instr)
15387	{
15388	// FIXME: This is kind of hacky. There should be a cleaner way.
15389	auto offset = uint32_t(&instr - current_emitting_block->ops.data());
15390	if ((offset + `1`) < current_emitting_block->ops.size())
15391	return &current_emitting_block->ops [offset + `1`];
15392	else
15393	return nullptr;
15394	}
15395
15396	uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
15397	{
15398	return semantics & (MemorySemanticsAtomicCounterMemoryMask \| MemorySemanticsImageMemoryMask \|
15399	MemorySemanticsWorkgroupMemoryMask \| MemorySemanticsUniformMemoryMask \|
15400	MemorySemanticsCrossWorkgroupMemoryMask \| MemorySemanticsSubgroupMemoryMask);
15401	}
15402
15403	void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t, uint32_t rhs_id, StorageClass, StorageClass)
15404	{
15405	statement(lhs, " = ", to_expression(rhs_id), ";");
15406	}
15407
15408	bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id)
15409	{
15410	if (!backend.force_gl_in_out_block)
15411	return false;
15412	// This path is only relevant for GL backends.
15413
15414	auto *var = maybe_get<SPIRVariable>(target_id);
15415	if (!var \|\| var->storage != StorageClassOutput)
15416	return false;
15417
15418	if (!is_builtin_variable(*var) \|\| BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask)
15419	return false;
15420
15421	auto &type = expression_type(source_id);
15422	string array_expr;
15423	if (type.array_size_literal.back())
15424	{
15425	array_expr = convert_to_string(type.array.back());
15426	if (type.array.back() == `0`)
15427	SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15428	}
15429	else
15430	array_expr = to_expression(type.array.back());
15431
15432	SPIRType target_type;
15433	target_type.basetype = SPIRType::Int;
15434
15435	statement("for (int i = 0; i < int(", array_expr, "); i++)");
15436	begin_scope();
15437	statement(to_expression(target_id), "[i] = ",
15438	bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")),
15439	";");
15440	end_scope();
15441
15442	return true;
15443	}
15444
15445	void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr)
15446	{
15447	if (!backend.force_gl_in_out_block)
15448	return;
15449	// This path is only relevant for GL backends.
15450
15451	auto *var = maybe_get<SPIRVariable>(source_id);
15452	if (!var)
15453	return;
15454
15455	if (var->storage != StorageClassInput && var->storage != StorageClassOutput)
15456	return;
15457
15458	auto &type = get_variable_data_type(*var);
15459	if (type.array.empty())
15460	return;
15461
15462	auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn));
15463	bool is_builtin = is_builtin_variable(*var) &&
15464	(builtin == BuiltInPointSize \|\|
15465	builtin == BuiltInPosition \|\|
15466	builtin == BuiltInSampleMask);
15467	bool is_tess = is_tessellation_shader();
15468	bool is_patch = has_decoration(var->self, DecorationPatch);
15469	bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask;
15470
15471	// Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it.
15472	// We must unroll the array load.
15473	// For builtins, we couldn't catch this case normally,
15474	// because this is resolved in the OpAccessChain in most cases.
15475	// If we load the entire array, we have no choice but to unroll here.
15476	if (!is_patch && (is_builtin \|\| is_tess))
15477	{
15478	auto new_expr = join("_", target_id, "_unrolled");
15479	statement(variable_decl(type, new_expr, target_id), ";");
15480	string array_expr;
15481	if (type.array_size_literal.back())
15482	{
15483	array_expr = convert_to_string(type.array.back());
15484	if (type.array.back() == `0`)
15485	SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array.");
15486	}
15487	else
15488	array_expr = to_expression(type.array.back());
15489
15490	// The array size might be a specialization constant, so use a for-loop instead.
15491	statement("for (int i = 0; i < int(", array_expr, "); i++)");
15492	begin_scope();
15493	if (is_builtin && !is_sample_mask)
15494	statement(new_expr, "[i] = gl_in[i].", expr, ";");
15495	else if (is_sample_mask)
15496	{
15497	SPIRType target_type;
15498	target_type.basetype = SPIRType::Int;
15499	statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";");
15500	}
15501	else
15502	statement(new_expr, "[i] = ", expr, "[i];");
15503	end_scope();
15504
15505	expr = move(new_expr);
15506	}
15507	}
15508
15509	void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type)
15510	{
15511	// We will handle array cases elsewhere.
15512	if (!expr_type.array.empty())
15513	return;
15514
15515	auto *var = maybe_get_backing_variable(source_id);
15516	if (var)
15517	source_id = var->self;
15518
15519	// Only interested in standalone builtin variables.
15520	if (!has_decoration(source_id, DecorationBuiltIn))
15521	return;
15522
15523	auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn));
15524	auto expected_type = expr_type.basetype;
15525
15526	// TODO: Fill in for more builtins.
15527	switch (builtin)
15528	{
15529	case BuiltInLayer:
15530	case BuiltInPrimitiveId:
15531	case BuiltInViewportIndex:
15532	case BuiltInInstanceId:
15533	case BuiltInInstanceIndex:
15534	case BuiltInVertexId:
15535	case BuiltInVertexIndex:
15536	case BuiltInSampleId:
15537	case BuiltInBaseVertex:
15538	case BuiltInBaseInstance:
15539	case BuiltInDrawIndex:
15540	case BuiltInFragStencilRefEXT:
15541	case BuiltInInstanceCustomIndexNV:
15542	case BuiltInSampleMask:
15543	case BuiltInPrimitiveShadingRateKHR:
15544	case BuiltInShadingRateKHR:
15545	expected_type = SPIRType::Int;
15546	break;
15547
15548	case BuiltInGlobalInvocationId:
15549	case BuiltInLocalInvocationId:
15550	case BuiltInWorkgroupId:
15551	case BuiltInLocalInvocationIndex:
15552	case BuiltInWorkgroupSize:
15553	case BuiltInNumWorkgroups:
15554	case BuiltInIncomingRayFlagsNV:
15555	case BuiltInLaunchIdNV:
15556	case BuiltInLaunchSizeNV:
15557	expected_type = SPIRType::UInt;
15558	break;
15559
15560	default:
15561	break;
15562	}
15563
15564	if (expected_type != expr_type.basetype)
15565	expr = bitcast_expression(expr_type, expected_type, expr);
15566	}
15567
15568	void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type)
15569	{
15570	auto *var = maybe_get_backing_variable(target_id);
15571	if (var)
15572	target_id = var->self;
15573
15574	// Only interested in standalone builtin variables.
15575	if (!has_decoration(target_id, DecorationBuiltIn))
15576	return;
15577
15578	auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn));
15579	auto expected_type = expr_type.basetype;
15580
15581	// TODO: Fill in for more builtins.
15582	switch (builtin)
15583	{
15584	case BuiltInLayer:
15585	case BuiltInPrimitiveId:
15586	case BuiltInViewportIndex:
15587	case BuiltInFragStencilRefEXT:
15588	case BuiltInSampleMask:
15589	case BuiltInPrimitiveShadingRateKHR:
15590	case BuiltInShadingRateKHR:
15591	expected_type = SPIRType::Int;
15592	break;
15593
15594	default:
15595	break;
15596	}
15597
15598	if (expected_type != expr_type.basetype)
15599	{
15600	auto type = expr_type;
15601	type.basetype = expected_type;
15602	expr = bitcast_expression(type, expr_type.basetype, expr);
15603	}
15604	}
15605
15606	void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id)
15607	{
15608	if (*backend.nonuniform_qualifier == `'\0'`)
15609	return;
15610
15611	auto *var = maybe_get_backing_variable(ptr_id);
15612	if (!var)
15613	return;
15614
15615	if (var->storage != StorageClassUniformConstant &&
15616	var->storage != StorageClassStorageBuffer &&
15617	var->storage != StorageClassUniform)
15618	return;
15619
15620	auto &backing_type = get<SPIRType>(var->basetype);
15621	if (backing_type.array.empty())
15622	return;
15623
15624	// If we get here, we know we're accessing an arrayed resource which
15625	// might require nonuniform qualifier.
15626
15627	auto start_array_index = expr.find_first_of(`'['`);
15628
15629	if (start_array_index == string::npos)
15630	return;
15631
15632	// We've opened a bracket, track expressions until we can close the bracket.
15633	// This must be our resource index.
15634	size_t end_array_index = string::npos;
15635	unsigned bracket_count = `1`;
15636	for (size_t index = start_array_index + `1`; index < expr.size(); index++)
15637	{
15638	if (expr [index] == `']'`)
15639	{
15640	if (--bracket_count == `0`)
15641	{
15642	end_array_index = index;
15643	break;
15644	}
15645	}
15646	else if (expr [index] == `'['`)
15647	bracket_count++;
15648	}
15649
15650	assert(bracket_count == `0`);
15651
15652	// Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's
15653	// nothing we can do here to express that.
15654	if (start_array_index == string::npos \|\| end_array_index == string::npos \|\| end_array_index < start_array_index)
15655	return;
15656
15657	start_array_index++;
15658
15659	expr = join(expr.substr(`0`, start_array_index), backend.nonuniform_qualifier, "(",
15660	expr.substr(start_array_index, end_array_index - start_array_index), ")",
15661	expr.substr(end_array_index, string::npos));
15662	}
15663
15664	void CompilerGLSL::emit_block_hints(const SPIRBlock &block)
15665	{
15666	if ((options.es && options.version < `310`) \|\| (!options.es && options.version < `140`))
15667	return;
15668
15669	switch (block.hint)
15670	{
15671	case SPIRBlock::HintFlatten:
15672	require_extension_internal("GL_EXT_control_flow_attributes");
15673	statement("SPIRV_CROSS_FLATTEN");
15674	break;
15675	case SPIRBlock::HintDontFlatten:
15676	require_extension_internal("GL_EXT_control_flow_attributes");
15677	statement("SPIRV_CROSS_BRANCH");
15678	break;
15679	case SPIRBlock::HintUnroll:
15680	require_extension_internal("GL_EXT_control_flow_attributes");
15681	statement("SPIRV_CROSS_UNROLL");
15682	break;
15683	case SPIRBlock::HintDontUnroll:
15684	require_extension_internal("GL_EXT_control_flow_attributes");
15685	statement("SPIRV_CROSS_LOOP");
15686	break;
15687	default:
15688	break;
15689	}
15690	}
15691
15692	void CompilerGLSL::preserve_alias_on_reset(uint32_t id)
15693	{
15694	preserved_aliases [id] = get_name(id);
15695	}
15696
15697	void CompilerGLSL::reset_name_caches()
15698	{
15699	for (auto &preserved : preserved_aliases)
15700	set_name(preserved.first, preserved.second);
15701
15702	preserved_aliases.clear();
15703	resource_names.clear();
15704	block_input_names.clear();
15705	block_output_names.clear();
15706	block_ubo_names.clear();
15707	block_ssbo_names.clear();
15708	block_names.clear();
15709	function_overloads.clear();
15710	}
15711
15712	void CompilerGLSL::fixup_type_alias()
15713	{
15714	// Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists.
15715	ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) {
15716	if (!type.type_alias)
15717	return;
15718
15719	if (has_decoration(type.self, DecorationBlock) \|\| has_decoration(type.self, DecorationBufferBlock))
15720	{
15721	// Top-level block types should never alias anything else.
15722	type.type_alias = `0`;
15723	}
15724	else if (type_is_block_like(type) && type.self == ID (self))
15725	{
15726	// A block-like type is any type which contains Offset decoration, but not top-level blocks,
15727	// i.e. blocks which are placed inside buffers.
15728	// Become the master.
15729	ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) {
15730	if (other_id == self)
15731	return;
15732
15733	if (other_type.type_alias == type.type_alias)
15734	other_type.type_alias = self;
15735	});
15736
15737	this->get<SPIRType>(type.type_alias).type_alias = self;
15738	type.type_alias = `0`;
15739	}
15740	});
15741	}
15742
15743	void CompilerGLSL::reorder_type_alias()
15744	{
15745	// Reorder declaration of types so that the master of the type alias is always emitted first.
15746	// We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which
15747	// means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here.
15748	auto loop_lock = ir.create_loop_hard_lock();
15749
15750	auto &type_ids = ir.ids_for_type[TypeType];
15751	for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr)
15752	{
15753	auto &type = get<SPIRType>(*alias_itr);
15754	if (type.type_alias != TypeID (`0`) &&
15755	!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked))
15756	{
15757	// We will skip declaring this type, so make sure the type_alias type comes before.
15758	auto master_itr = find(begin(type_ids), end(type_ids), ID (type.type_alias));
15759	assert(master_itr != end(type_ids));
15760
15761	if (alias_itr < master_itr)
15762	{
15763	// Must also swap the type order for the constant-type joined array.
15764	auto &joined_types = ir.ids_for_constant_or_type;
15765	auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr);
15766	auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr);
15767	assert(alt_alias_itr != end(joined_types));
15768	assert(alt_master_itr != end(joined_types));
15769
15770	swap(alias_itr, master_itr);
15771	swap(alt_alias_itr, alt_master_itr);
15772	}
15773	}
15774	}
15775	}
15776
15777	void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal)
15778	{
15779	// If we are redirecting statements, ignore the line directive.
15780	// Common case here is continue blocks.
15781	if (redirect_statement)
15782	return;
15783
15784	if (options.emit_line_directives)
15785	{
15786	require_extension_internal("GL_GOOGLE_cpp_style_line_directive");
15787	statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\"");
15788	}
15789	}
15790
15791	void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id,
15792	SmallVector<uint32_t> chain)
15793	{
15794	// Fully unroll all member/array indices one by one.
15795
15796	auto &lhs_type = get<SPIRType>(lhs_type_id);
15797	auto &rhs_type = get<SPIRType>(rhs_type_id);
15798
15799	if (!lhs_type.array.empty())
15800	{
15801	// Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
15802	// and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
15803	uint32_t array_size = to_array_size_literal(lhs_type);
15804	chain.push_back(`0`);
15805
15806	for (uint32_t i = `0`; i < array_size; i++)
15807	{
15808	chain.back() = i;
15809	emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
15810	}
15811	}
15812	else if (lhs_type.basetype == SPIRType::Struct)
15813	{
15814	chain.push_back(`0`);
15815	uint32_t member_count = uint32_t(lhs_type.member_types.size());
15816	for (uint32_t i = `0`; i < member_count; i++)
15817	{
15818	chain.back() = i;
15819	emit_copy_logical_type(lhs_id, lhs_type.member_types [i], rhs_id, rhs_type.member_types [i], chain);
15820	}
15821	}
15822	else
15823	{
15824	// Need to handle unpack/packing fixups since this can differ wildly between the logical types,
15825	// particularly in MSL.
15826	// To deal with this, we emit access chains and go through emit_store_statement
15827	// to deal with all the special cases we can encounter.
15828
15829	AccessChainMeta lhs_meta, rhs_meta;
15830	auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()),
15831	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
15832	auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()),
15833	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
15834
15835	uint32_t id = ir.increase_bound_by(`2`);
15836	lhs_id = id;
15837	rhs_id = id + `1`;
15838
15839	{
15840	auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
15841	lhs_expr.need_transpose = lhs_meta.need_transpose;
15842
15843	if (lhs_meta.storage_is_packed)
15844	set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15845	if (lhs_meta.storage_physical_type != `0`)
15846	set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
15847
15848	forwarded_temporaries.insert(lhs_id);
15849	suppressed_usage_tracking.insert(lhs_id);
15850	}
15851
15852	{
15853	auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
15854	rhs_expr.need_transpose = rhs_meta.need_transpose;
15855
15856	if (rhs_meta.storage_is_packed)
15857	set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
15858	if (rhs_meta.storage_physical_type != `0`)
15859	set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
15860
15861	forwarded_temporaries.insert(rhs_id);
15862	suppressed_usage_tracking.insert(rhs_id);
15863	}
15864
15865	emit_store_statement(lhs_id, rhs_id);
15866	}
15867	}
15868
15869	bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const
15870	{
15871	if (!has_decoration(id, DecorationInputAttachmentIndex))
15872	return false;
15873
15874	uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex);
15875	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15876	if (remap.first == input_attachment_index)
15877	return true;
15878
15879	return false;
15880	}
15881
15882	const SPIRVariable CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const*
15883	{
15884	const SPIRVariable ret = nullptr*;
15885	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15886	if (has_decoration(var.self, DecorationInputAttachmentIndex) &&
15887	get_decoration(var.self, DecorationInputAttachmentIndex) == index)
15888	{
15889	ret = &var;
15890	}
15891	});
15892	return ret;
15893	}
15894
15895	const SPIRVariable CompilerGLSL::find_color_output_by_location(uint32_t location) const*
15896	{
15897	const SPIRVariable ret = nullptr*;
15898	ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) {
15899	if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location)
15900	ret = &var;
15901	});
15902	return ret;
15903	}
15904
15905	void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs()
15906	{
15907	for (auto &remap : subpass_to_framebuffer_fetch_attachment)
15908	{
15909	auto *subpass_var = find_subpass_input_by_attachment_index(remap.first);
15910	auto *output_var = find_color_output_by_location(remap.second);
15911	if (!subpass_var)
15912	continue;
15913	if (!output_var)
15914	SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able "
15915	"to read from it.");
15916	if (is_array(get<SPIRType>(output_var->basetype)))
15917	SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs.");
15918
15919	auto &func = get<SPIRFunction>(get_entry_point().self);
15920	func.fixup_hooks_in.push_back([=]() {
15921	if (is_legacy())
15922	{
15923	statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[",
15924	get_decoration(output_var->self, DecorationLocation), "];");
15925	}
15926	else
15927	{
15928	uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize;
15929	statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, `0`), " = ",
15930	to_expression(output_var->self), ";");
15931	}
15932	});
15933	}
15934	}
15935
15936	bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const
15937	{
15938	return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id);
15939	}
15940
15941	const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c)
15942	{
15943	static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot",
15944	"GL_KHR_shader_subgroup_basic",
15945	"GL_KHR_shader_subgroup_vote",
15946	"GL_NV_gpu_shader_5",
15947	"GL_NV_shader_thread_group",
15948	"GL_NV_shader_thread_shuffle",
15949	"GL_ARB_shader_ballot",
15950	"GL_ARB_shader_group_vote",
15951	"GL_AMD_gcn_shader" };
15952	return retval[c];
15953	}
15954
15955	SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c)
15956	{
15957	switch (c)
15958	{
15959	case ARB_shader_ballot:
15960	return { "GL_ARB_shader_int64" };
15961	case AMD_gcn_shader:
15962	return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" };
15963	default:
15964	return {};
15965	}
15966	}
15967
15968	const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c)
15969	{
15970	switch (c)
15971	{
15972	case ARB_shader_ballot:
15973	return "defined(GL_ARB_shader_int64)";
15974	case AMD_gcn_shader:
15975	return "(defined(GL_AMD_gpu_shader_int64) \|\| defined(GL_NV_gpu_shader5))";
15976	default:
15977	return "";
15978	}
15979	}
15980
15981	CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper::
15982	get_feature_dependencies(Feature feature)
15983	{
15984	switch (feature)
15985	{
15986	case SubgroupAllEqualT:
15987	return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool };
15988	case SubgroupElect:
15989	return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID };
15990	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
15991	return { SubgroupMask };
15992	case SubgroupBallotBitCount:
15993	return { SubgroupBallot };
15994	default:
15995	return {};
15996	}
15997	}
15998
15999	CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::
16000	get_feature_dependency_mask(Feature feature)
16001	{
16002	return build_mask(get_feature_dependencies(feature));
16003	}
16004
16005	bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature)
16006	{
16007	static const bool retval[FeatureCount] = { false, false, false, false, false, false,
16008	true, // SubgroupBalloFindLSB_MSB
16009	false, false, false, false,
16010	true, // SubgroupMemBarrier - replaced with workgroup memory barriers
16011	false, false, true, false };
16012
16013	return retval[feature];
16014	}
16015
16016	CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper::
16017	get_KHR_extension_for_feature(Feature feature)
16018	{
16019	static const Candidate extensions[FeatureCount] = {
16020	KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
16021	KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote,
16022	KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic,
16023	KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot
16024	};
16025
16026	return extensions[feature];
16027	}
16028
16029	void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature)
16030	{
16031	feature_mask \|= (FeatureMask(`1`) << feature) \| get_feature_dependency_mask(feature);
16032	}
16033
16034	bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const
16035	{
16036	return (feature_mask & (`1u` << feature)) != `0`;
16037	}
16038
16039	CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const
16040	{
16041	Result res;
16042
16043	for (uint32_t i = `0u`; i < FeatureCount; ++i)
16044	{
16045	if (feature_mask & (`1u` << i))
16046	{
16047	auto feature = static_cast<Feature>(i);
16048	std::unordered_set<uint32_t> unique_candidates;
16049
16050	auto candidates = get_candidates_for_feature(feature);
16051	unique_candidates.insert(candidates.begin(), candidates.end());
16052
16053	auto deps = get_feature_dependencies(feature);
16054	for (Feature d : deps)
16055	{
16056	candidates = get_candidates_for_feature(d);
16057	if (!candidates.empty())
16058	unique_candidates.insert(candidates.begin(), candidates.end());
16059	}
16060
16061	for (uint32_t c : unique_candidates)
16062	++res.weights[static_cast<Candidate>(c)];
16063	}
16064	}
16065
16066	return res;
16067	}
16068
16069	CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
16070	get_candidates_for_feature(Feature ft, const Result &r)
16071	{
16072	auto c = get_candidates_for_feature(ft);
16073	auto cmp = [&r](Candidate a, Candidate b) {
16074	if (r.weights[a] == r.weights[b])
16075	return a < b; // Prefer candidates with lower enum value
16076	return r.weights[a] > r.weights[b];
16077	};
16078	std::sort(c.begin(), c.end(), cmp);
16079	return c;
16080	}
16081
16082	CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper::
16083	get_candidates_for_feature(Feature feature)
16084	{
16085	switch (feature)
16086	{
16087	case SubgroupMask:
16088	return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
16089	case SubgroupSize:
16090	return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot };
16091	case SubgroupInvocationID:
16092	return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot };
16093	case SubgroupID:
16094	return { KHR_shader_subgroup_basic, NV_shader_thread_group };
16095	case NumSubgroups:
16096	return { KHR_shader_subgroup_basic, NV_shader_thread_group };
16097	case SubgroupBroadcast_First:
16098	return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot };
16099	case SubgroupBallotFindLSB_MSB:
16100	return { KHR_shader_subgroup_ballot, NV_shader_thread_group };
16101	case SubgroupAll_Any_AllEqualBool:
16102	return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader };
16103	case SubgroupAllEqualT:
16104	return {}; // depends on other features only
16105	case SubgroupElect:
16106	return {}; // depends on other features only
16107	case SubgroupBallot:
16108	return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot };
16109	case SubgroupBarrier:
16110	return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader };
16111	case SubgroupMemBarrier:
16112	return { KHR_shader_subgroup_basic };
16113	case SubgroupInverseBallot_InclBitCount_ExclBitCout:
16114	return {};
16115	case SubgroupBallotBitExtract:
16116	return { NV_shader_thread_group };
16117	case SubgroupBallotBitCount:
16118	return {};
16119	default:
16120	return {};
16121	}
16122	}
16123
16124	CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask(
16125	const SmallVector<Feature> &features)
16126	{
16127	FeatureMask mask = `0`;
16128	for (Feature f : features)
16129	mask \|= FeatureMask(`1`) << f;
16130	return mask;
16131	}
16132
16133	CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result()
16134	{
16135	for (auto &weight : weights)
16136	weight = `0`;
16137
16138	// Make sure KHR_shader_subgroup extensions are always prefered.
16139	const uint32_t big_num = FeatureCount;
16140	weights[KHR_shader_subgroup_ballot] = big_num;
16141	weights[KHR_shader_subgroup_basic] = big_num;
16142	weights[KHR_shader_subgroup_vote] = big_num;
16143	}
16144
16145	void CompilerGLSL::request_workaround_wrapper_overload(TypeID id)
16146	{
16147	// Must be ordered to maintain deterministic output, so vector is appropriate.
16148	if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) ==
16149	end(workaround_ubo_load_overload_types))
16150	{
16151	force_recompile();
16152	workaround_ubo_load_overload_types.push_back(id);
16153	}
16154	}
16155
16156	void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr)
16157	{
16158	// Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic.
16159	// To load these types correctly, we must first wrap them in a dummy function which only purpose is to
16160	// ensure row_major decoration is actually respected.
16161	auto *var = maybe_get_backing_variable(ptr);
16162	if (!var)
16163	return;
16164
16165	auto &backing_type = get<SPIRType>(var->basetype);
16166	bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform &&
16167	has_decoration(backing_type.self, DecorationBlock);
16168	if (!is_ubo)
16169	return;
16170
16171	auto *type = &get<SPIRType>(loaded_type);
16172	bool rewrite = false;
16173
16174	if (is_matrix(*type))
16175	{
16176	// To avoid adding a lot of unnecessary meta tracking to forward the row_major state,
16177	// we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state.
16178	// If there is any row-major action going on, we apply the workaround.
16179	// It is harmless to apply the workaround to column-major matrices, so this is still a valid solution.
16180	// If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround.
16181	type = &backing_type;
16182	}
16183
16184	if (type->basetype == SPIRType::Struct)
16185	{
16186	// If we're loading a struct where any member is a row-major matrix, apply the workaround.
16187	for (uint32_t i = `0`; i < uint32_t(type->member_types.size()); i++)
16188	{
16189	if (combined_decoration_for_member(*type, i).get(DecorationRowMajor))
16190	{
16191	rewrite = true;
16192	break;
16193	}
16194	}
16195	}
16196
16197	if (rewrite)
16198	{
16199	request_workaround_wrapper_overload(loaded_type);
16200	expr = join("spvWorkaroundRowMajor(", expr, ")");
16201	}
16202	}
16203
16204	void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component)
16205	{
16206	masked_output_locations.insert({ location, component });
16207	}
16208
16209	void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin)
16210	{
16211	masked_output_builtins.insert(builtin);
16212	}
16213
16214	bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const
16215	{
16216	auto &type = get<SPIRType>(var.basetype);
16217	bool is_block = has_decoration(type.self, DecorationBlock);
16218	// Blocks by themselves are never masked. Must be masked per-member.
16219	if (is_block)
16220	return false;
16221
16222	bool is_builtin = has_decoration(var.self, DecorationBuiltIn);
16223
16224	if (is_builtin)
16225	{
16226	return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn)));
16227	}
16228	else
16229	{
16230	if (!has_decoration(var.self, DecorationLocation))
16231	return false;
16232
16233	return is_stage_output_location_masked(
16234	get_decoration(var.self, DecorationLocation),
16235	get_decoration(var.self, DecorationComponent));
16236	}
16237	}
16238
16239	bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const
16240	{
16241	auto &type = get<SPIRType>(var.basetype);
16242	bool is_block = has_decoration(type.self, DecorationBlock);
16243	if (!is_block)
16244	return false;
16245
16246	BuiltIn builtin = BuiltInMax;
16247	if (is_member_builtin(type, index, &builtin))
16248	{
16249	return is_stage_output_builtin_masked(builtin);
16250	}
16251	else
16252	{
16253	uint32_t location = get_declared_member_location(var, index, strip_array);
16254	uint32_t component = get_member_decoration(type.self, index, DecorationComponent);
16255	return is_stage_output_location_masked(location, component);
16256	}
16257	}
16258
16259	bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const
16260	{
16261	return masked_output_locations.count({ location, component }) != `0`;
16262	}
16263
16264	bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const
16265	{
16266	return masked_output_builtins.count(builtin) != `0`;
16267	}
16268
16269	uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16270	{
16271	auto &block_type = get<SPIRType>(var.basetype);
16272	if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation))
16273	return get_member_decoration(block_type.self, mbr_idx, DecorationLocation);
16274	else
16275	return get_accumulated_member_location(var, mbr_idx, strip_array);
16276	}
16277
16278	uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const
16279	{
16280	auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var);
16281	uint32_t location = get_decoration(var.self, DecorationLocation);
16282
16283	for (uint32_t i = `0`; i < mbr_idx; i++)
16284	{
16285	auto &mbr_type = get<SPIRType>(type.member_types [i]);
16286
16287	// Start counting from any place we have a new location decoration.
16288	if (has_member_decoration(type.self, mbr_idx, DecorationLocation))
16289	location = get_member_decoration(type.self, mbr_idx, DecorationLocation);
16290
16291	uint32_t location_count = type_to_location_count(mbr_type);
16292	location += location_count;
16293	}
16294
16295	return location;
16296	}
16297
16298	StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr)
16299	{
16300	auto *var = maybe_get_backing_variable(ptr);
16301
16302	// If the expression has been lowered to a temporary, we need to use the Generic storage class.
16303	// We're looking for the effective storage class of a given expression.
16304	// An access chain or forwarded OpLoads from such access chains
16305	// will generally have the storage class of the underlying variable, but if the load was not forwarded
16306	// we have lost any address space qualifiers.
16307	bool forced_temporary = ir.ids [ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain &&
16308	(forced_temporaries.count(ptr) != `0` \|\| forwarded_temporaries.count(ptr) == `0`);
16309
16310	if (var && !forced_temporary)
16311	{
16312	if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup))
16313	return StorageClassWorkgroup;
16314	if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer))
16315	return StorageClassStorageBuffer;
16316
16317	// Normalize SSBOs to StorageBuffer here.
16318	if (var->storage == StorageClassUniform &&
16319	has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock))
16320	return StorageClassStorageBuffer;
16321	else
16322	return var->storage;
16323	}
16324	else
16325	return expression_type(ptr).storage;
16326	}
16327
16328	uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const
16329	{
16330	uint32_t count;
16331	if (type.basetype == SPIRType::Struct)
16332	{
16333	uint32_t mbr_count = uint32_t(type.member_types.size());
16334	count = `0`;
16335	for (uint32_t i = `0`; i < mbr_count; i++)
16336	count += type_to_location_count(get<SPIRType>(type.member_types [i]));
16337	}
16338	else
16339	{
16340	count = type.columns > `1` ? type.columns : `1`;
16341	}
16342
16343	uint32_t dim_count = uint32_t(type.array.size());
16344	for (uint32_t i = `0`; i < dim_count; i++)
16345	count *= to_array_size_literal(type, i);
16346
16347	return count;
16348	}
16349

Browse the source code of taichi/external/SPIRV-Cross/spirv_glsl.cpp