1 | /* |
2 | * Copyright 2015-2021 Arm Limited |
3 | * SPDX-License-Identifier: Apache-2.0 OR MIT |
4 | * |
5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | * you may not use this file except in compliance with the License. |
7 | * You may obtain a copy of the License at |
8 | * |
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | * |
11 | * Unless required by applicable law or agreed to in writing, software |
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | * See the License for the specific language governing permissions and |
15 | * limitations under the License. |
16 | */ |
17 | |
18 | /* |
19 | * At your option, you may choose to accept this material under either: |
20 | * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or |
21 | * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. |
22 | */ |
23 | |
24 | #include "spirv_cross.hpp" |
25 | #include "GLSL.std.450.h" |
26 | #include "spirv_cfg.hpp" |
27 | #include "spirv_common.hpp" |
28 | #include "spirv_parser.hpp" |
29 | #include <algorithm> |
30 | #include <cstring> |
31 | #include <utility> |
32 | |
33 | using namespace std; |
34 | using namespace spv; |
35 | using namespace SPIRV_CROSS_NAMESPACE; |
36 | |
37 | Compiler::Compiler(vector<uint32_t> ir_) |
38 | { |
39 | Parser parser(move(ir_)); |
40 | parser.parse(); |
41 | set_ir(move(parser.get_parsed_ir())); |
42 | } |
43 | |
44 | Compiler::Compiler(const uint32_t *ir_, size_t word_count) |
45 | { |
46 | Parser parser(ir_, word_count); |
47 | parser.parse(); |
48 | set_ir(move(parser.get_parsed_ir())); |
49 | } |
50 | |
51 | Compiler::Compiler(const ParsedIR &ir_) |
52 | { |
53 | set_ir(ir_); |
54 | } |
55 | |
56 | Compiler::Compiler(ParsedIR &&ir_) |
57 | { |
58 | set_ir(move(ir_)); |
59 | } |
60 | |
61 | void Compiler::set_ir(ParsedIR &&ir_) |
62 | { |
63 | ir = move(ir_); |
64 | parse_fixup(); |
65 | } |
66 | |
67 | void Compiler::set_ir(const ParsedIR &ir_) |
68 | { |
69 | ir = ir_; |
70 | parse_fixup(); |
71 | } |
72 | |
73 | string Compiler::compile() |
74 | { |
75 | return "" ; |
76 | } |
77 | |
78 | bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) |
79 | { |
80 | auto &type = get<SPIRType>(v.basetype); |
81 | bool ssbo = v.storage == StorageClassStorageBuffer || |
82 | ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); |
83 | bool image = type.basetype == SPIRType::Image; |
84 | bool counter = type.basetype == SPIRType::AtomicCounter; |
85 | bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT; |
86 | |
87 | bool is_restrict; |
88 | if (ssbo) |
89 | is_restrict = ir.get_buffer_block_flags(v).get(DecorationRestrict); |
90 | else |
91 | is_restrict = has_decoration(v.self, DecorationRestrict); |
92 | |
93 | return !is_restrict && (ssbo || image || counter || buffer_reference); |
94 | } |
95 | |
96 | bool Compiler::block_is_pure(const SPIRBlock &block) |
97 | { |
98 | // This is a global side effect of the function. |
99 | if (block.terminator == SPIRBlock::Kill || |
100 | block.terminator == SPIRBlock::TerminateRay || |
101 | block.terminator == SPIRBlock::IgnoreIntersection) |
102 | return false; |
103 | |
104 | for (auto &i : block.ops) |
105 | { |
106 | auto ops = stream(i); |
107 | auto op = static_cast<Op>(i.op); |
108 | |
109 | switch (op) |
110 | { |
111 | case OpFunctionCall: |
112 | { |
113 | uint32_t func = ops[2]; |
114 | if (!function_is_pure(get<SPIRFunction>(func))) |
115 | return false; |
116 | break; |
117 | } |
118 | |
119 | case OpCopyMemory: |
120 | case OpStore: |
121 | { |
122 | auto &type = expression_type(ops[0]); |
123 | if (type.storage != StorageClassFunction) |
124 | return false; |
125 | break; |
126 | } |
127 | |
128 | case OpImageWrite: |
129 | return false; |
130 | |
131 | // Atomics are impure. |
132 | case OpAtomicLoad: |
133 | case OpAtomicStore: |
134 | case OpAtomicExchange: |
135 | case OpAtomicCompareExchange: |
136 | case OpAtomicCompareExchangeWeak: |
137 | case OpAtomicIIncrement: |
138 | case OpAtomicIDecrement: |
139 | case OpAtomicIAdd: |
140 | case OpAtomicISub: |
141 | case OpAtomicSMin: |
142 | case OpAtomicUMin: |
143 | case OpAtomicSMax: |
144 | case OpAtomicUMax: |
145 | case OpAtomicAnd: |
146 | case OpAtomicOr: |
147 | case OpAtomicXor: |
148 | return false; |
149 | |
150 | // Geometry shader builtins modify global state. |
151 | case OpEndPrimitive: |
152 | case OpEmitStreamVertex: |
153 | case OpEndStreamPrimitive: |
154 | case OpEmitVertex: |
155 | return false; |
156 | |
157 | // Barriers disallow any reordering, so we should treat blocks with barrier as writing. |
158 | case OpControlBarrier: |
159 | case OpMemoryBarrier: |
160 | return false; |
161 | |
162 | // Ray tracing builtins are impure. |
163 | case OpReportIntersectionKHR: |
164 | case OpIgnoreIntersectionNV: |
165 | case OpTerminateRayNV: |
166 | case OpTraceNV: |
167 | case OpTraceRayKHR: |
168 | case OpExecuteCallableNV: |
169 | case OpExecuteCallableKHR: |
170 | case OpRayQueryInitializeKHR: |
171 | case OpRayQueryTerminateKHR: |
172 | case OpRayQueryGenerateIntersectionKHR: |
173 | case OpRayQueryConfirmIntersectionKHR: |
174 | case OpRayQueryProceedKHR: |
175 | // There are various getters in ray query, but they are considered pure. |
176 | return false; |
177 | |
178 | // OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure. |
179 | |
180 | case OpDemoteToHelperInvocationEXT: |
181 | // This is a global side effect of the function. |
182 | return false; |
183 | |
184 | case OpExtInst: |
185 | { |
186 | uint32_t extension_set = ops[2]; |
187 | if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL) |
188 | { |
189 | auto op_450 = static_cast<GLSLstd450>(ops[3]); |
190 | switch (op_450) |
191 | { |
192 | case GLSLstd450Modf: |
193 | case GLSLstd450Frexp: |
194 | { |
195 | auto &type = expression_type(ops[5]); |
196 | if (type.storage != StorageClassFunction) |
197 | return false; |
198 | break; |
199 | } |
200 | |
201 | default: |
202 | break; |
203 | } |
204 | } |
205 | break; |
206 | } |
207 | |
208 | default: |
209 | break; |
210 | } |
211 | } |
212 | |
213 | return true; |
214 | } |
215 | |
216 | string Compiler::to_name(uint32_t id, bool allow_alias) const |
217 | { |
218 | if (allow_alias && ir.ids[id].get_type() == TypeType) |
219 | { |
220 | // If this type is a simple alias, emit the |
221 | // name of the original type instead. |
222 | // We don't want to override the meta alias |
223 | // as that can be overridden by the reflection APIs after parse. |
224 | auto &type = get<SPIRType>(id); |
225 | if (type.type_alias) |
226 | { |
227 | // If the alias master has been specially packed, we will have emitted a clean variant as well, |
228 | // so skip the name aliasing here. |
229 | if (!has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) |
230 | return to_name(type.type_alias); |
231 | } |
232 | } |
233 | |
234 | auto &alias = ir.get_name(id); |
235 | if (alias.empty()) |
236 | return join("_" , id); |
237 | else |
238 | return alias; |
239 | } |
240 | |
241 | bool Compiler::function_is_pure(const SPIRFunction &func) |
242 | { |
243 | for (auto block : func.blocks) |
244 | { |
245 | if (!block_is_pure(get<SPIRBlock>(block))) |
246 | { |
247 | //fprintf(stderr, "Function %s is impure!\n", to_name(func.self).c_str()); |
248 | return false; |
249 | } |
250 | } |
251 | |
252 | //fprintf(stderr, "Function %s is pure!\n", to_name(func.self).c_str()); |
253 | return true; |
254 | } |
255 | |
256 | void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_t id) |
257 | { |
258 | for (auto &i : block.ops) |
259 | { |
260 | auto ops = stream(i); |
261 | auto op = static_cast<Op>(i.op); |
262 | |
263 | switch (op) |
264 | { |
265 | case OpFunctionCall: |
266 | { |
267 | uint32_t func = ops[2]; |
268 | register_global_read_dependencies(get<SPIRFunction>(func), id); |
269 | break; |
270 | } |
271 | |
272 | case OpLoad: |
273 | case OpImageRead: |
274 | { |
275 | // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. |
276 | auto *var = maybe_get_backing_variable(ops[2]); |
277 | if (var && var->storage != StorageClassFunction) |
278 | { |
279 | auto &type = get<SPIRType>(var->basetype); |
280 | |
281 | // InputTargets are immutable. |
282 | if (type.basetype != SPIRType::Image && type.image.dim != DimSubpassData) |
283 | var->dependees.push_back(id); |
284 | } |
285 | break; |
286 | } |
287 | |
288 | default: |
289 | break; |
290 | } |
291 | } |
292 | } |
293 | |
294 | void Compiler::register_global_read_dependencies(const SPIRFunction &func, uint32_t id) |
295 | { |
296 | for (auto block : func.blocks) |
297 | register_global_read_dependencies(get<SPIRBlock>(block), id); |
298 | } |
299 | |
300 | SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) |
301 | { |
302 | auto *var = maybe_get<SPIRVariable>(chain); |
303 | if (!var) |
304 | { |
305 | auto *cexpr = maybe_get<SPIRExpression>(chain); |
306 | if (cexpr) |
307 | var = maybe_get<SPIRVariable>(cexpr->loaded_from); |
308 | |
309 | auto *access_chain = maybe_get<SPIRAccessChain>(chain); |
310 | if (access_chain) |
311 | var = maybe_get<SPIRVariable>(access_chain->loaded_from); |
312 | } |
313 | |
314 | return var; |
315 | } |
316 | |
317 | void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) |
318 | { |
319 | auto &e = get<SPIRExpression>(expr); |
320 | auto *var = maybe_get_backing_variable(chain); |
321 | |
322 | if (var) |
323 | { |
324 | e.loaded_from = var->self; |
325 | |
326 | // If the backing variable is immutable, we do not need to depend on the variable. |
327 | if (forwarded && !is_immutable(var->self)) |
328 | var->dependees.push_back(e.self); |
329 | |
330 | // If we load from a parameter, make sure we create "inout" if we also write to the parameter. |
331 | // The default is "in" however, so we never invalidate our compilation by reading. |
332 | if (var && var->parameter) |
333 | var->parameter->read_count++; |
334 | } |
335 | } |
336 | |
337 | void Compiler::register_write(uint32_t chain) |
338 | { |
339 | auto *var = maybe_get<SPIRVariable>(chain); |
340 | if (!var) |
341 | { |
342 | // If we're storing through an access chain, invalidate the backing variable instead. |
343 | auto *expr = maybe_get<SPIRExpression>(chain); |
344 | if (expr && expr->loaded_from) |
345 | var = maybe_get<SPIRVariable>(expr->loaded_from); |
346 | |
347 | auto *access_chain = maybe_get<SPIRAccessChain>(chain); |
348 | if (access_chain && access_chain->loaded_from) |
349 | var = maybe_get<SPIRVariable>(access_chain->loaded_from); |
350 | } |
351 | |
352 | auto &chain_type = expression_type(chain); |
353 | |
354 | if (var) |
355 | { |
356 | bool check_argument_storage_qualifier = true; |
357 | auto &type = expression_type(chain); |
358 | |
359 | // If our variable is in a storage class which can alias with other buffers, |
360 | // invalidate all variables which depend on aliased variables. And if this is a |
361 | // variable pointer, then invalidate all variables regardless. |
362 | if (get_variable_data_type(*var).pointer) |
363 | { |
364 | flush_all_active_variables(); |
365 | |
366 | if (type.pointer_depth == 1) |
367 | { |
368 | // We have a backing variable which is a pointer-to-pointer type. |
369 | // We are storing some data through a pointer acquired through that variable, |
370 | // but we are not writing to the value of the variable itself, |
371 | // i.e., we are not modifying the pointer directly. |
372 | // If we are storing a non-pointer type (pointer_depth == 1), |
373 | // we know that we are storing some unrelated data. |
374 | // A case here would be |
375 | // void foo(Foo * const *arg) { |
376 | // Foo *bar = *arg; |
377 | // bar->unrelated = 42; |
378 | // } |
379 | // arg, the argument is constant. |
380 | check_argument_storage_qualifier = false; |
381 | } |
382 | } |
383 | |
384 | if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var)) |
385 | flush_all_aliased_variables(); |
386 | else if (var) |
387 | flush_dependees(*var); |
388 | |
389 | // We tried to write to a parameter which is not marked with out qualifier, force a recompile. |
390 | if (check_argument_storage_qualifier && var->parameter && var->parameter->write_count == 0) |
391 | { |
392 | var->parameter->write_count++; |
393 | force_recompile(); |
394 | } |
395 | } |
396 | else if (chain_type.pointer) |
397 | { |
398 | // If we stored through a variable pointer, then we don't know which |
399 | // variable we stored to. So *all* expressions after this point need to |
400 | // be invalidated. |
401 | // FIXME: If we can prove that the variable pointer will point to |
402 | // only certain variables, we can invalidate only those. |
403 | flush_all_active_variables(); |
404 | } |
405 | |
406 | // If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead. |
407 | // This can happen in copy_logical_type where we unroll complex reads and writes to temporaries. |
408 | } |
409 | |
410 | void Compiler::flush_dependees(SPIRVariable &var) |
411 | { |
412 | for (auto expr : var.dependees) |
413 | invalid_expressions.insert(expr); |
414 | var.dependees.clear(); |
415 | } |
416 | |
417 | void Compiler::flush_all_aliased_variables() |
418 | { |
419 | for (auto aliased : aliased_variables) |
420 | flush_dependees(get<SPIRVariable>(aliased)); |
421 | } |
422 | |
423 | void Compiler::flush_all_atomic_capable_variables() |
424 | { |
425 | for (auto global : global_variables) |
426 | flush_dependees(get<SPIRVariable>(global)); |
427 | flush_all_aliased_variables(); |
428 | } |
429 | |
430 | void Compiler::flush_control_dependent_expressions(uint32_t block_id) |
431 | { |
432 | auto &block = get<SPIRBlock>(block_id); |
433 | for (auto &expr : block.invalidate_expressions) |
434 | invalid_expressions.insert(expr); |
435 | block.invalidate_expressions.clear(); |
436 | } |
437 | |
438 | void Compiler::flush_all_active_variables() |
439 | { |
440 | // Invalidate all temporaries we read from variables in this block since they were forwarded. |
441 | // Invalidate all temporaries we read from globals. |
442 | for (auto &v : current_function->local_variables) |
443 | flush_dependees(get<SPIRVariable>(v)); |
444 | for (auto &arg : current_function->arguments) |
445 | flush_dependees(get<SPIRVariable>(arg.id)); |
446 | for (auto global : global_variables) |
447 | flush_dependees(get<SPIRVariable>(global)); |
448 | |
449 | flush_all_aliased_variables(); |
450 | } |
451 | |
452 | uint32_t Compiler::expression_type_id(uint32_t id) const |
453 | { |
454 | switch (ir.ids[id].get_type()) |
455 | { |
456 | case TypeVariable: |
457 | return get<SPIRVariable>(id).basetype; |
458 | |
459 | case TypeExpression: |
460 | return get<SPIRExpression>(id).expression_type; |
461 | |
462 | case TypeConstant: |
463 | return get<SPIRConstant>(id).constant_type; |
464 | |
465 | case TypeConstantOp: |
466 | return get<SPIRConstantOp>(id).basetype; |
467 | |
468 | case TypeUndef: |
469 | return get<SPIRUndef>(id).basetype; |
470 | |
471 | case TypeCombinedImageSampler: |
472 | return get<SPIRCombinedImageSampler>(id).combined_type; |
473 | |
474 | case TypeAccessChain: |
475 | return get<SPIRAccessChain>(id).basetype; |
476 | |
477 | default: |
478 | SPIRV_CROSS_THROW("Cannot resolve expression type." ); |
479 | } |
480 | } |
481 | |
482 | const SPIRType &Compiler::expression_type(uint32_t id) const |
483 | { |
484 | return get<SPIRType>(expression_type_id(id)); |
485 | } |
486 | |
487 | bool Compiler::expression_is_lvalue(uint32_t id) const |
488 | { |
489 | auto &type = expression_type(id); |
490 | switch (type.basetype) |
491 | { |
492 | case SPIRType::SampledImage: |
493 | case SPIRType::Image: |
494 | case SPIRType::Sampler: |
495 | return false; |
496 | |
497 | default: |
498 | return true; |
499 | } |
500 | } |
501 | |
502 | bool Compiler::is_immutable(uint32_t id) const |
503 | { |
504 | if (ir.ids[id].get_type() == TypeVariable) |
505 | { |
506 | auto &var = get<SPIRVariable>(id); |
507 | |
508 | // Anything we load from the UniformConstant address space is guaranteed to be immutable. |
509 | bool pointer_to_const = var.storage == StorageClassUniformConstant; |
510 | return pointer_to_const || var.phi_variable || !expression_is_lvalue(id); |
511 | } |
512 | else if (ir.ids[id].get_type() == TypeAccessChain) |
513 | return get<SPIRAccessChain>(id).immutable; |
514 | else if (ir.ids[id].get_type() == TypeExpression) |
515 | return get<SPIRExpression>(id).immutable; |
516 | else if (ir.ids[id].get_type() == TypeConstant || ir.ids[id].get_type() == TypeConstantOp || |
517 | ir.ids[id].get_type() == TypeUndef) |
518 | return true; |
519 | else |
520 | return false; |
521 | } |
522 | |
523 | static inline bool storage_class_is_interface(spv::StorageClass storage) |
524 | { |
525 | switch (storage) |
526 | { |
527 | case StorageClassInput: |
528 | case StorageClassOutput: |
529 | case StorageClassUniform: |
530 | case StorageClassUniformConstant: |
531 | case StorageClassAtomicCounter: |
532 | case StorageClassPushConstant: |
533 | case StorageClassStorageBuffer: |
534 | return true; |
535 | |
536 | default: |
537 | return false; |
538 | } |
539 | } |
540 | |
541 | bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins) const |
542 | { |
543 | if ((is_builtin_variable(var) && !include_builtins) || var.remapped_variable) |
544 | return true; |
545 | |
546 | // Combined image samplers are always considered active as they are "magic" variables. |
547 | if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) { |
548 | return samp.combined_id == var.self; |
549 | }) != end(combined_image_samplers)) |
550 | { |
551 | return false; |
552 | } |
553 | |
554 | // In SPIR-V 1.4 and up we must also use the active variable interface to disable global variables |
555 | // which are not part of the entry point. |
556 | if (ir.get_spirv_version() >= 0x10400 && var.storage != spv::StorageClassGeneric && |
557 | var.storage != spv::StorageClassFunction && !interface_variable_exists_in_entry_point(var.self)) |
558 | { |
559 | return true; |
560 | } |
561 | |
562 | return check_active_interface_variables && storage_class_is_interface(var.storage) && |
563 | active_interface_variables.find(var.self) == end(active_interface_variables); |
564 | } |
565 | |
566 | bool Compiler::is_builtin_type(const SPIRType &type) const |
567 | { |
568 | auto *type_meta = ir.find_meta(type.self); |
569 | |
570 | // We can have builtin structs as well. If one member of a struct is builtin, the struct must also be builtin. |
571 | if (type_meta) |
572 | for (auto &m : type_meta->members) |
573 | if (m.builtin) |
574 | return true; |
575 | |
576 | return false; |
577 | } |
578 | |
579 | bool Compiler::is_builtin_variable(const SPIRVariable &var) const |
580 | { |
581 | auto *m = ir.find_meta(var.self); |
582 | |
583 | if (var.compat_builtin || (m && m->decoration.builtin)) |
584 | return true; |
585 | else |
586 | return is_builtin_type(get<SPIRType>(var.basetype)); |
587 | } |
588 | |
589 | bool Compiler::is_member_builtin(const SPIRType &type, uint32_t index, BuiltIn *builtin) const |
590 | { |
591 | auto *type_meta = ir.find_meta(type.self); |
592 | |
593 | if (type_meta) |
594 | { |
595 | auto &memb = type_meta->members; |
596 | if (index < memb.size() && memb[index].builtin) |
597 | { |
598 | if (builtin) |
599 | *builtin = memb[index].builtin_type; |
600 | return true; |
601 | } |
602 | } |
603 | |
604 | return false; |
605 | } |
606 | |
607 | bool Compiler::is_scalar(const SPIRType &type) const |
608 | { |
609 | return type.basetype != SPIRType::Struct && type.vecsize == 1 && type.columns == 1; |
610 | } |
611 | |
612 | bool Compiler::is_vector(const SPIRType &type) const |
613 | { |
614 | return type.vecsize > 1 && type.columns == 1; |
615 | } |
616 | |
617 | bool Compiler::is_matrix(const SPIRType &type) const |
618 | { |
619 | return type.vecsize > 1 && type.columns > 1; |
620 | } |
621 | |
622 | bool Compiler::is_array(const SPIRType &type) const |
623 | { |
624 | return !type.array.empty(); |
625 | } |
626 | |
627 | ShaderResources Compiler::get_shader_resources() const |
628 | { |
629 | return get_shader_resources(nullptr); |
630 | } |
631 | |
632 | ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> &active_variables) const |
633 | { |
634 | return get_shader_resources(&active_variables); |
635 | } |
636 | |
637 | bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) |
638 | { |
639 | uint32_t variable = 0; |
640 | switch (opcode) |
641 | { |
642 | // Need this first, otherwise, GCC complains about unhandled switch statements. |
643 | default: |
644 | break; |
645 | |
646 | case OpFunctionCall: |
647 | { |
648 | // Invalid SPIR-V. |
649 | if (length < 3) |
650 | return false; |
651 | |
652 | uint32_t count = length - 3; |
653 | args += 3; |
654 | for (uint32_t i = 0; i < count; i++) |
655 | { |
656 | auto *var = compiler.maybe_get<SPIRVariable>(args[i]); |
657 | if (var && storage_class_is_interface(var->storage)) |
658 | variables.insert(args[i]); |
659 | } |
660 | break; |
661 | } |
662 | |
663 | case OpSelect: |
664 | { |
665 | // Invalid SPIR-V. |
666 | if (length < 5) |
667 | return false; |
668 | |
669 | uint32_t count = length - 3; |
670 | args += 3; |
671 | for (uint32_t i = 0; i < count; i++) |
672 | { |
673 | auto *var = compiler.maybe_get<SPIRVariable>(args[i]); |
674 | if (var && storage_class_is_interface(var->storage)) |
675 | variables.insert(args[i]); |
676 | } |
677 | break; |
678 | } |
679 | |
680 | case OpPhi: |
681 | { |
682 | // Invalid SPIR-V. |
683 | if (length < 2) |
684 | return false; |
685 | |
686 | uint32_t count = length - 2; |
687 | args += 2; |
688 | for (uint32_t i = 0; i < count; i += 2) |
689 | { |
690 | auto *var = compiler.maybe_get<SPIRVariable>(args[i]); |
691 | if (var && storage_class_is_interface(var->storage)) |
692 | variables.insert(args[i]); |
693 | } |
694 | break; |
695 | } |
696 | |
697 | case OpAtomicStore: |
698 | case OpStore: |
699 | // Invalid SPIR-V. |
700 | if (length < 1) |
701 | return false; |
702 | variable = args[0]; |
703 | break; |
704 | |
705 | case OpCopyMemory: |
706 | { |
707 | if (length < 2) |
708 | return false; |
709 | |
710 | auto *var = compiler.maybe_get<SPIRVariable>(args[0]); |
711 | if (var && storage_class_is_interface(var->storage)) |
712 | variables.insert(args[0]); |
713 | |
714 | var = compiler.maybe_get<SPIRVariable>(args[1]); |
715 | if (var && storage_class_is_interface(var->storage)) |
716 | variables.insert(args[1]); |
717 | break; |
718 | } |
719 | |
720 | case OpExtInst: |
721 | { |
722 | if (length < 5) |
723 | return false; |
724 | auto &extension_set = compiler.get<SPIRExtension>(args[2]); |
725 | switch (extension_set.ext) |
726 | { |
727 | case SPIRExtension::GLSL: |
728 | { |
729 | auto op = static_cast<GLSLstd450>(args[3]); |
730 | |
731 | switch (op) |
732 | { |
733 | case GLSLstd450InterpolateAtCentroid: |
734 | case GLSLstd450InterpolateAtSample: |
735 | case GLSLstd450InterpolateAtOffset: |
736 | { |
737 | auto *var = compiler.maybe_get<SPIRVariable>(args[4]); |
738 | if (var && storage_class_is_interface(var->storage)) |
739 | variables.insert(args[4]); |
740 | break; |
741 | } |
742 | |
743 | case GLSLstd450Modf: |
744 | case GLSLstd450Fract: |
745 | { |
746 | auto *var = compiler.maybe_get<SPIRVariable>(args[5]); |
747 | if (var && storage_class_is_interface(var->storage)) |
748 | variables.insert(args[5]); |
749 | break; |
750 | } |
751 | |
752 | default: |
753 | break; |
754 | } |
755 | break; |
756 | } |
757 | case SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter: |
758 | { |
759 | enum AMDShaderExplicitVertexParameter |
760 | { |
761 | InterpolateAtVertexAMD = 1 |
762 | }; |
763 | |
764 | auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]); |
765 | |
766 | switch (op) |
767 | { |
768 | case InterpolateAtVertexAMD: |
769 | { |
770 | auto *var = compiler.maybe_get<SPIRVariable>(args[4]); |
771 | if (var && storage_class_is_interface(var->storage)) |
772 | variables.insert(args[4]); |
773 | break; |
774 | } |
775 | |
776 | default: |
777 | break; |
778 | } |
779 | break; |
780 | } |
781 | default: |
782 | break; |
783 | } |
784 | break; |
785 | } |
786 | |
787 | case OpAccessChain: |
788 | case OpInBoundsAccessChain: |
789 | case OpPtrAccessChain: |
790 | case OpLoad: |
791 | case OpCopyObject: |
792 | case OpImageTexelPointer: |
793 | case OpAtomicLoad: |
794 | case OpAtomicExchange: |
795 | case OpAtomicCompareExchange: |
796 | case OpAtomicCompareExchangeWeak: |
797 | case OpAtomicIIncrement: |
798 | case OpAtomicIDecrement: |
799 | case OpAtomicIAdd: |
800 | case OpAtomicISub: |
801 | case OpAtomicSMin: |
802 | case OpAtomicUMin: |
803 | case OpAtomicSMax: |
804 | case OpAtomicUMax: |
805 | case OpAtomicAnd: |
806 | case OpAtomicOr: |
807 | case OpAtomicXor: |
808 | case OpArrayLength: |
809 | // Invalid SPIR-V. |
810 | if (length < 3) |
811 | return false; |
812 | variable = args[2]; |
813 | break; |
814 | } |
815 | |
816 | if (variable) |
817 | { |
818 | auto *var = compiler.maybe_get<SPIRVariable>(variable); |
819 | if (var && storage_class_is_interface(var->storage)) |
820 | variables.insert(variable); |
821 | } |
822 | return true; |
823 | } |
824 | |
825 | unordered_set<VariableID> Compiler::get_active_interface_variables() const |
826 | { |
827 | // Traverse the call graph and find all interface variables which are in use. |
828 | unordered_set<VariableID> variables; |
829 | InterfaceVariableAccessHandler handler(*this, variables); |
830 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
831 | |
832 | ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { |
833 | if (var.storage != StorageClassOutput) |
834 | return; |
835 | if (!interface_variable_exists_in_entry_point(var.self)) |
836 | return; |
837 | |
838 | // An output variable which is just declared (but uninitialized) might be read by subsequent stages |
839 | // so we should force-enable these outputs, |
840 | // since compilation will fail if a subsequent stage attempts to read from the variable in question. |
841 | // Also, make sure we preserve output variables which are only initialized, but never accessed by any code. |
842 | if (var.initializer != ID(0) || get_execution_model() != ExecutionModelFragment) |
843 | variables.insert(var.self); |
844 | }); |
845 | |
846 | // If we needed to create one, we'll need it. |
847 | if (dummy_sampler_id) |
848 | variables.insert(dummy_sampler_id); |
849 | |
850 | return variables; |
851 | } |
852 | |
853 | void Compiler::set_enabled_interface_variables(std::unordered_set<VariableID> active_variables) |
854 | { |
855 | active_interface_variables = move(active_variables); |
856 | check_active_interface_variables = true; |
857 | } |
858 | |
859 | ShaderResources Compiler::get_shader_resources(const unordered_set<VariableID> *active_variables) const |
860 | { |
861 | ShaderResources res; |
862 | |
863 | bool ssbo_instance_name = reflection_ssbo_instance_name_is_significant(); |
864 | |
865 | ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { |
866 | auto &type = this->get<SPIRType>(var.basetype); |
867 | |
868 | // It is possible for uniform storage classes to be passed as function parameters, so detect |
869 | // that. To detect function parameters, check of StorageClass of variable is function scope. |
870 | if (var.storage == StorageClassFunction || !type.pointer) |
871 | return; |
872 | |
873 | if (active_variables && active_variables->find(var.self) == end(*active_variables)) |
874 | return; |
875 | |
876 | // In SPIR-V 1.4 and up, every global must be present in the entry point interface list, |
877 | // not just IO variables. |
878 | bool active_in_entry_point = true; |
879 | if (ir.get_spirv_version() < 0x10400) |
880 | { |
881 | if (var.storage == StorageClassInput || var.storage == StorageClassOutput) |
882 | active_in_entry_point = interface_variable_exists_in_entry_point(var.self); |
883 | } |
884 | else |
885 | active_in_entry_point = interface_variable_exists_in_entry_point(var.self); |
886 | |
887 | if (!active_in_entry_point) |
888 | return; |
889 | |
890 | bool is_builtin = is_builtin_variable(var); |
891 | |
892 | if (is_builtin) |
893 | { |
894 | if (var.storage != StorageClassInput && var.storage != StorageClassOutput) |
895 | return; |
896 | |
897 | auto &list = var.storage == StorageClassInput ? res.builtin_inputs : res.builtin_outputs; |
898 | BuiltInResource resource; |
899 | |
900 | if (has_decoration(type.self, DecorationBlock)) |
901 | { |
902 | resource.resource = { var.self, var.basetype, type.self, |
903 | get_remapped_declared_block_name(var.self, false) }; |
904 | |
905 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
906 | { |
907 | resource.value_type_id = type.member_types[i]; |
908 | resource.builtin = BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn)); |
909 | list.push_back(resource); |
910 | } |
911 | } |
912 | else |
913 | { |
914 | bool strip_array = |
915 | !has_decoration(var.self, DecorationPatch) && ( |
916 | get_execution_model() == ExecutionModelTessellationControl || |
917 | (get_execution_model() == ExecutionModelTessellationEvaluation && |
918 | var.storage == StorageClassInput)); |
919 | |
920 | resource.resource = { var.self, var.basetype, type.self, get_name(var.self) }; |
921 | |
922 | if (strip_array && !type.array.empty()) |
923 | resource.value_type_id = get_variable_data_type(var).parent_type; |
924 | else |
925 | resource.value_type_id = get_variable_data_type_id(var); |
926 | |
927 | assert(resource.value_type_id); |
928 | |
929 | resource.builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); |
930 | list.push_back(std::move(resource)); |
931 | } |
932 | return; |
933 | } |
934 | |
935 | // Input |
936 | if (var.storage == StorageClassInput) |
937 | { |
938 | if (has_decoration(type.self, DecorationBlock)) |
939 | { |
940 | res.stage_inputs.push_back( |
941 | { var.self, var.basetype, type.self, |
942 | get_remapped_declared_block_name(var.self, false) }); |
943 | } |
944 | else |
945 | res.stage_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
946 | } |
947 | // Subpass inputs |
948 | else if (var.storage == StorageClassUniformConstant && type.image.dim == DimSubpassData) |
949 | { |
950 | res.subpass_inputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
951 | } |
952 | // Outputs |
953 | else if (var.storage == StorageClassOutput) |
954 | { |
955 | if (has_decoration(type.self, DecorationBlock)) |
956 | { |
957 | res.stage_outputs.push_back( |
958 | { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); |
959 | } |
960 | else |
961 | res.stage_outputs.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
962 | } |
963 | // UBOs |
964 | else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock)) |
965 | { |
966 | res.uniform_buffers.push_back( |
967 | { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, false) }); |
968 | } |
969 | // Old way to declare SSBOs. |
970 | else if (type.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)) |
971 | { |
972 | res.storage_buffers.push_back( |
973 | { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); |
974 | } |
975 | // Modern way to declare SSBOs. |
976 | else if (type.storage == StorageClassStorageBuffer) |
977 | { |
978 | res.storage_buffers.push_back( |
979 | { var.self, var.basetype, type.self, get_remapped_declared_block_name(var.self, ssbo_instance_name) }); |
980 | } |
981 | // Push constant blocks |
982 | else if (type.storage == StorageClassPushConstant) |
983 | { |
984 | // There can only be one push constant block, but keep the vector in case this restriction is lifted |
985 | // in the future. |
986 | res.push_constant_buffers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
987 | } |
988 | // Images |
989 | else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && |
990 | type.image.sampled == 2) |
991 | { |
992 | res.storage_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
993 | } |
994 | // Separate images |
995 | else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Image && |
996 | type.image.sampled == 1) |
997 | { |
998 | res.separate_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
999 | } |
1000 | // Separate samplers |
1001 | else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::Sampler) |
1002 | { |
1003 | res.separate_samplers.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
1004 | } |
1005 | // Textures |
1006 | else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::SampledImage) |
1007 | { |
1008 | res.sampled_images.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
1009 | } |
1010 | // Atomic counters |
1011 | else if (type.storage == StorageClassAtomicCounter) |
1012 | { |
1013 | res.atomic_counters.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
1014 | } |
1015 | // Acceleration structures |
1016 | else if (type.storage == StorageClassUniformConstant && type.basetype == SPIRType::AccelerationStructure) |
1017 | { |
1018 | res.acceleration_structures.push_back({ var.self, var.basetype, type.self, get_name(var.self) }); |
1019 | } |
1020 | }); |
1021 | |
1022 | return res; |
1023 | } |
1024 | |
1025 | bool Compiler::type_is_block_like(const SPIRType &type) const |
1026 | { |
1027 | if (type.basetype != SPIRType::Struct) |
1028 | return false; |
1029 | |
1030 | if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) |
1031 | { |
1032 | return true; |
1033 | } |
1034 | |
1035 | // Block-like types may have Offset decorations. |
1036 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
1037 | if (has_member_decoration(type.self, i, DecorationOffset)) |
1038 | return true; |
1039 | |
1040 | return false; |
1041 | } |
1042 | |
1043 | void Compiler::parse_fixup() |
1044 | { |
1045 | // Figure out specialization constants for work group sizes. |
1046 | for (auto id_ : ir.ids_for_constant_or_variable) |
1047 | { |
1048 | auto &id = ir.ids[id_]; |
1049 | |
1050 | if (id.get_type() == TypeConstant) |
1051 | { |
1052 | auto &c = id.get<SPIRConstant>(); |
1053 | if (has_decoration(c.self, DecorationBuiltIn) && |
1054 | BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize) |
1055 | { |
1056 | // In current SPIR-V, there can be just one constant like this. |
1057 | // All entry points will receive the constant value. |
1058 | // WorkgroupSize take precedence over LocalSizeId. |
1059 | for (auto &entry : ir.entry_points) |
1060 | { |
1061 | entry.second.workgroup_size.constant = c.self; |
1062 | entry.second.workgroup_size.x = c.scalar(0, 0); |
1063 | entry.second.workgroup_size.y = c.scalar(0, 1); |
1064 | entry.second.workgroup_size.z = c.scalar(0, 2); |
1065 | } |
1066 | } |
1067 | } |
1068 | else if (id.get_type() == TypeVariable) |
1069 | { |
1070 | auto &var = id.get<SPIRVariable>(); |
1071 | if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || |
1072 | var.storage == StorageClassOutput) |
1073 | global_variables.push_back(var.self); |
1074 | if (variable_storage_is_aliased(var)) |
1075 | aliased_variables.push_back(var.self); |
1076 | } |
1077 | } |
1078 | } |
1079 | |
1080 | void Compiler::update_name_cache(unordered_set<string> &cache_primary, const unordered_set<string> &cache_secondary, |
1081 | string &name) |
1082 | { |
1083 | if (name.empty()) |
1084 | return; |
1085 | |
1086 | const auto find_name = [&](const string &n) -> bool { |
1087 | if (cache_primary.find(n) != end(cache_primary)) |
1088 | return true; |
1089 | |
1090 | if (&cache_primary != &cache_secondary) |
1091 | if (cache_secondary.find(n) != end(cache_secondary)) |
1092 | return true; |
1093 | |
1094 | return false; |
1095 | }; |
1096 | |
1097 | const auto insert_name = [&](const string &n) { cache_primary.insert(n); }; |
1098 | |
1099 | if (!find_name(name)) |
1100 | { |
1101 | insert_name(name); |
1102 | return; |
1103 | } |
1104 | |
1105 | uint32_t counter = 0; |
1106 | auto tmpname = name; |
1107 | |
1108 | bool use_linked_underscore = true; |
1109 | |
1110 | if (tmpname == "_" ) |
1111 | { |
1112 | // We cannot just append numbers, as we will end up creating internally reserved names. |
1113 | // Make it like _0_<counter> instead. |
1114 | tmpname += "0" ; |
1115 | } |
1116 | else if (tmpname.back() == '_') |
1117 | { |
1118 | // The last_character is an underscore, so we don't need to link in underscore. |
1119 | // This would violate double underscore rules. |
1120 | use_linked_underscore = false; |
1121 | } |
1122 | |
1123 | // If there is a collision (very rare), |
1124 | // keep tacking on extra identifier until it's unique. |
1125 | do |
1126 | { |
1127 | counter++; |
1128 | name = tmpname + (use_linked_underscore ? "_" : "" ) + convert_to_string(counter); |
1129 | } while (find_name(name)); |
1130 | insert_name(name); |
1131 | } |
1132 | |
1133 | void Compiler::update_name_cache(unordered_set<string> &cache, string &name) |
1134 | { |
1135 | update_name_cache(cache, cache, name); |
1136 | } |
1137 | |
1138 | void Compiler::set_name(ID id, const std::string &name) |
1139 | { |
1140 | ir.set_name(id, name); |
1141 | } |
1142 | |
1143 | const SPIRType &Compiler::get_type(TypeID id) const |
1144 | { |
1145 | return get<SPIRType>(id); |
1146 | } |
1147 | |
1148 | const SPIRType &Compiler::get_type_from_variable(VariableID id) const |
1149 | { |
1150 | return get<SPIRType>(get<SPIRVariable>(id).basetype); |
1151 | } |
1152 | |
1153 | uint32_t Compiler::get_pointee_type_id(uint32_t type_id) const |
1154 | { |
1155 | auto *p_type = &get<SPIRType>(type_id); |
1156 | if (p_type->pointer) |
1157 | { |
1158 | assert(p_type->parent_type); |
1159 | type_id = p_type->parent_type; |
1160 | } |
1161 | return type_id; |
1162 | } |
1163 | |
1164 | const SPIRType &Compiler::get_pointee_type(const SPIRType &type) const |
1165 | { |
1166 | auto *p_type = &type; |
1167 | if (p_type->pointer) |
1168 | { |
1169 | assert(p_type->parent_type); |
1170 | p_type = &get<SPIRType>(p_type->parent_type); |
1171 | } |
1172 | return *p_type; |
1173 | } |
1174 | |
1175 | const SPIRType &Compiler::get_pointee_type(uint32_t type_id) const |
1176 | { |
1177 | return get_pointee_type(get<SPIRType>(type_id)); |
1178 | } |
1179 | |
1180 | uint32_t Compiler::get_variable_data_type_id(const SPIRVariable &var) const |
1181 | { |
1182 | if (var.phi_variable) |
1183 | return var.basetype; |
1184 | return get_pointee_type_id(var.basetype); |
1185 | } |
1186 | |
1187 | SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) |
1188 | { |
1189 | return get<SPIRType>(get_variable_data_type_id(var)); |
1190 | } |
1191 | |
1192 | const SPIRType &Compiler::get_variable_data_type(const SPIRVariable &var) const |
1193 | { |
1194 | return get<SPIRType>(get_variable_data_type_id(var)); |
1195 | } |
1196 | |
1197 | SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) |
1198 | { |
1199 | SPIRType *type = &get_variable_data_type(var); |
1200 | if (is_array(*type)) |
1201 | type = &get<SPIRType>(type->parent_type); |
1202 | return *type; |
1203 | } |
1204 | |
1205 | const SPIRType &Compiler::get_variable_element_type(const SPIRVariable &var) const |
1206 | { |
1207 | const SPIRType *type = &get_variable_data_type(var); |
1208 | if (is_array(*type)) |
1209 | type = &get<SPIRType>(type->parent_type); |
1210 | return *type; |
1211 | } |
1212 | |
1213 | bool Compiler::is_sampled_image_type(const SPIRType &type) |
1214 | { |
1215 | return (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage) && type.image.sampled == 1 && |
1216 | type.image.dim != DimBuffer; |
1217 | } |
1218 | |
1219 | void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, |
1220 | const std::string &argument) |
1221 | { |
1222 | ir.set_member_decoration_string(id, index, decoration, argument); |
1223 | } |
1224 | |
1225 | void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) |
1226 | { |
1227 | ir.set_member_decoration(id, index, decoration, argument); |
1228 | } |
1229 | |
1230 | void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name) |
1231 | { |
1232 | ir.set_member_name(id, index, name); |
1233 | } |
1234 | |
1235 | const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const |
1236 | { |
1237 | return ir.get_member_name(id, index); |
1238 | } |
1239 | |
1240 | void Compiler::set_qualified_name(uint32_t id, const string &name) |
1241 | { |
1242 | ir.meta[id].decoration.qualified_alias = name; |
1243 | } |
1244 | |
1245 | void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const std::string &name) |
1246 | { |
1247 | ir.meta[type_id].members.resize(max(ir.meta[type_id].members.size(), size_t(index) + 1)); |
1248 | ir.meta[type_id].members[index].qualified_alias = name; |
1249 | } |
1250 | |
1251 | const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const |
1252 | { |
1253 | auto *m = ir.find_meta(type_id); |
1254 | if (m && index < m->members.size()) |
1255 | return m->members[index].qualified_alias; |
1256 | else |
1257 | return ir.get_empty_string(); |
1258 | } |
1259 | |
1260 | uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const |
1261 | { |
1262 | return ir.get_member_decoration(id, index, decoration); |
1263 | } |
1264 | |
1265 | const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const |
1266 | { |
1267 | return ir.get_member_decoration_bitset(id, index); |
1268 | } |
1269 | |
1270 | bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const |
1271 | { |
1272 | return ir.has_member_decoration(id, index, decoration); |
1273 | } |
1274 | |
1275 | void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) |
1276 | { |
1277 | ir.unset_member_decoration(id, index, decoration); |
1278 | } |
1279 | |
1280 | void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument) |
1281 | { |
1282 | ir.set_decoration_string(id, decoration, argument); |
1283 | } |
1284 | |
1285 | void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument) |
1286 | { |
1287 | ir.set_decoration(id, decoration, argument); |
1288 | } |
1289 | |
1290 | void Compiler::set_extended_decoration(uint32_t id, ExtendedDecorations decoration, uint32_t value) |
1291 | { |
1292 | auto &dec = ir.meta[id].decoration; |
1293 | dec.extended.flags.set(decoration); |
1294 | dec.extended.values[decoration] = value; |
1295 | } |
1296 | |
1297 | void Compiler::set_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration, |
1298 | uint32_t value) |
1299 | { |
1300 | ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); |
1301 | auto &dec = ir.meta[type].members[index]; |
1302 | dec.extended.flags.set(decoration); |
1303 | dec.extended.values[decoration] = value; |
1304 | } |
1305 | |
1306 | static uint32_t get_default_extended_decoration(ExtendedDecorations decoration) |
1307 | { |
1308 | switch (decoration) |
1309 | { |
1310 | case SPIRVCrossDecorationResourceIndexPrimary: |
1311 | case SPIRVCrossDecorationResourceIndexSecondary: |
1312 | case SPIRVCrossDecorationResourceIndexTertiary: |
1313 | case SPIRVCrossDecorationResourceIndexQuaternary: |
1314 | case SPIRVCrossDecorationInterfaceMemberIndex: |
1315 | return ~(0u); |
1316 | |
1317 | default: |
1318 | return 0; |
1319 | } |
1320 | } |
1321 | |
1322 | uint32_t Compiler::get_extended_decoration(uint32_t id, ExtendedDecorations decoration) const |
1323 | { |
1324 | auto *m = ir.find_meta(id); |
1325 | if (!m) |
1326 | return 0; |
1327 | |
1328 | auto &dec = m->decoration; |
1329 | |
1330 | if (!dec.extended.flags.get(decoration)) |
1331 | return get_default_extended_decoration(decoration); |
1332 | |
1333 | return dec.extended.values[decoration]; |
1334 | } |
1335 | |
1336 | uint32_t Compiler::get_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const |
1337 | { |
1338 | auto *m = ir.find_meta(type); |
1339 | if (!m) |
1340 | return 0; |
1341 | |
1342 | if (index >= m->members.size()) |
1343 | return 0; |
1344 | |
1345 | auto &dec = m->members[index]; |
1346 | if (!dec.extended.flags.get(decoration)) |
1347 | return get_default_extended_decoration(decoration); |
1348 | return dec.extended.values[decoration]; |
1349 | } |
1350 | |
1351 | bool Compiler::has_extended_decoration(uint32_t id, ExtendedDecorations decoration) const |
1352 | { |
1353 | auto *m = ir.find_meta(id); |
1354 | if (!m) |
1355 | return false; |
1356 | |
1357 | auto &dec = m->decoration; |
1358 | return dec.extended.flags.get(decoration); |
1359 | } |
1360 | |
1361 | bool Compiler::has_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) const |
1362 | { |
1363 | auto *m = ir.find_meta(type); |
1364 | if (!m) |
1365 | return false; |
1366 | |
1367 | if (index >= m->members.size()) |
1368 | return false; |
1369 | |
1370 | auto &dec = m->members[index]; |
1371 | return dec.extended.flags.get(decoration); |
1372 | } |
1373 | |
1374 | void Compiler::unset_extended_decoration(uint32_t id, ExtendedDecorations decoration) |
1375 | { |
1376 | auto &dec = ir.meta[id].decoration; |
1377 | dec.extended.flags.clear(decoration); |
1378 | dec.extended.values[decoration] = 0; |
1379 | } |
1380 | |
1381 | void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, ExtendedDecorations decoration) |
1382 | { |
1383 | ir.meta[type].members.resize(max(ir.meta[type].members.size(), size_t(index) + 1)); |
1384 | auto &dec = ir.meta[type].members[index]; |
1385 | dec.extended.flags.clear(decoration); |
1386 | dec.extended.values[decoration] = 0; |
1387 | } |
1388 | |
1389 | StorageClass Compiler::get_storage_class(VariableID id) const |
1390 | { |
1391 | return get<SPIRVariable>(id).storage; |
1392 | } |
1393 | |
1394 | const std::string &Compiler::get_name(ID id) const |
1395 | { |
1396 | return ir.get_name(id); |
1397 | } |
1398 | |
1399 | const std::string Compiler::get_fallback_name(ID id) const |
1400 | { |
1401 | return join("_" , id); |
1402 | } |
1403 | |
1404 | const std::string Compiler::get_block_fallback_name(VariableID id) const |
1405 | { |
1406 | auto &var = get<SPIRVariable>(id); |
1407 | if (get_name(id).empty()) |
1408 | return join("_" , get<SPIRType>(var.basetype).self, "_" , id); |
1409 | else |
1410 | return get_name(id); |
1411 | } |
1412 | |
1413 | const Bitset &Compiler::get_decoration_bitset(ID id) const |
1414 | { |
1415 | return ir.get_decoration_bitset(id); |
1416 | } |
1417 | |
1418 | bool Compiler::has_decoration(ID id, Decoration decoration) const |
1419 | { |
1420 | return ir.has_decoration(id, decoration); |
1421 | } |
1422 | |
1423 | const string &Compiler::get_decoration_string(ID id, Decoration decoration) const |
1424 | { |
1425 | return ir.get_decoration_string(id, decoration); |
1426 | } |
1427 | |
1428 | const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const |
1429 | { |
1430 | return ir.get_member_decoration_string(id, index, decoration); |
1431 | } |
1432 | |
1433 | uint32_t Compiler::get_decoration(ID id, Decoration decoration) const |
1434 | { |
1435 | return ir.get_decoration(id, decoration); |
1436 | } |
1437 | |
1438 | void Compiler::unset_decoration(ID id, Decoration decoration) |
1439 | { |
1440 | ir.unset_decoration(id, decoration); |
1441 | } |
1442 | |
1443 | bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const |
1444 | { |
1445 | auto *m = ir.find_meta(id); |
1446 | if (!m) |
1447 | return false; |
1448 | |
1449 | auto &word_offsets = m->decoration_word_offset; |
1450 | auto itr = word_offsets.find(decoration); |
1451 | if (itr == end(word_offsets)) |
1452 | return false; |
1453 | |
1454 | word_offset = itr->second; |
1455 | return true; |
1456 | } |
1457 | |
1458 | bool Compiler::block_is_loop_candidate(const SPIRBlock &block, SPIRBlock::Method method) const |
1459 | { |
1460 | // Tried and failed. |
1461 | if (block.disable_block_optimization || block.complex_continue) |
1462 | return false; |
1463 | |
1464 | if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) |
1465 | { |
1466 | // Try to detect common for loop pattern |
1467 | // which the code backend can use to create cleaner code. |
1468 | // for(;;) { if (cond) { some_body; } else { break; } } |
1469 | // is the pattern we're looking for. |
1470 | const auto *false_block = maybe_get<SPIRBlock>(block.false_block); |
1471 | const auto *true_block = maybe_get<SPIRBlock>(block.true_block); |
1472 | const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block); |
1473 | |
1474 | bool false_block_is_merge = block.false_block == block.merge_block || |
1475 | (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); |
1476 | |
1477 | bool true_block_is_merge = block.true_block == block.merge_block || |
1478 | (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); |
1479 | |
1480 | bool positive_candidate = |
1481 | block.true_block != block.merge_block && block.true_block != block.self && false_block_is_merge; |
1482 | |
1483 | bool negative_candidate = |
1484 | block.false_block != block.merge_block && block.false_block != block.self && true_block_is_merge; |
1485 | |
1486 | bool ret = block.terminator == SPIRBlock::Select && block.merge == SPIRBlock::MergeLoop && |
1487 | (positive_candidate || negative_candidate); |
1488 | |
1489 | if (ret && positive_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) |
1490 | ret = block.true_block == block.continue_block; |
1491 | else if (ret && negative_candidate && method == SPIRBlock::MergeToSelectContinueForLoop) |
1492 | ret = block.false_block == block.continue_block; |
1493 | |
1494 | // If we have OpPhi which depends on branches which came from our own block, |
1495 | // we need to flush phi variables in else block instead of a trivial break, |
1496 | // so we cannot assume this is a for loop candidate. |
1497 | if (ret) |
1498 | { |
1499 | for (auto &phi : block.phi_variables) |
1500 | if (phi.parent == block.self) |
1501 | return false; |
1502 | |
1503 | auto *merge = maybe_get<SPIRBlock>(block.merge_block); |
1504 | if (merge) |
1505 | for (auto &phi : merge->phi_variables) |
1506 | if (phi.parent == block.self) |
1507 | return false; |
1508 | } |
1509 | return ret; |
1510 | } |
1511 | else if (method == SPIRBlock::MergeToDirectForLoop) |
1512 | { |
1513 | // Empty loop header that just sets up merge target |
1514 | // and branches to loop body. |
1515 | bool ret = block.terminator == SPIRBlock::Direct && block.merge == SPIRBlock::MergeLoop && block.ops.empty(); |
1516 | |
1517 | if (!ret) |
1518 | return false; |
1519 | |
1520 | auto &child = get<SPIRBlock>(block.next_block); |
1521 | |
1522 | const auto *false_block = maybe_get<SPIRBlock>(child.false_block); |
1523 | const auto *true_block = maybe_get<SPIRBlock>(child.true_block); |
1524 | const auto *merge_block = maybe_get<SPIRBlock>(block.merge_block); |
1525 | |
1526 | bool false_block_is_merge = child.false_block == block.merge_block || |
1527 | (false_block && merge_block && execution_is_noop(*false_block, *merge_block)); |
1528 | |
1529 | bool true_block_is_merge = child.true_block == block.merge_block || |
1530 | (true_block && merge_block && execution_is_noop(*true_block, *merge_block)); |
1531 | |
1532 | bool positive_candidate = |
1533 | child.true_block != block.merge_block && child.true_block != block.self && false_block_is_merge; |
1534 | |
1535 | bool negative_candidate = |
1536 | child.false_block != block.merge_block && child.false_block != block.self && true_block_is_merge; |
1537 | |
1538 | ret = child.terminator == SPIRBlock::Select && child.merge == SPIRBlock::MergeNone && |
1539 | (positive_candidate || negative_candidate); |
1540 | |
1541 | // If we have OpPhi which depends on branches which came from our own block, |
1542 | // we need to flush phi variables in else block instead of a trivial break, |
1543 | // so we cannot assume this is a for loop candidate. |
1544 | if (ret) |
1545 | { |
1546 | for (auto &phi : block.phi_variables) |
1547 | if (phi.parent == block.self || phi.parent == child.self) |
1548 | return false; |
1549 | |
1550 | for (auto &phi : child.phi_variables) |
1551 | if (phi.parent == block.self) |
1552 | return false; |
1553 | |
1554 | auto *merge = maybe_get<SPIRBlock>(block.merge_block); |
1555 | if (merge) |
1556 | for (auto &phi : merge->phi_variables) |
1557 | if (phi.parent == block.self || phi.parent == child.false_block) |
1558 | return false; |
1559 | } |
1560 | |
1561 | return ret; |
1562 | } |
1563 | else |
1564 | return false; |
1565 | } |
1566 | |
1567 | bool Compiler::execution_is_noop(const SPIRBlock &from, const SPIRBlock &to) const |
1568 | { |
1569 | if (!execution_is_branchless(from, to)) |
1570 | return false; |
1571 | |
1572 | auto *start = &from; |
1573 | for (;;) |
1574 | { |
1575 | if (start->self == to.self) |
1576 | return true; |
1577 | |
1578 | if (!start->ops.empty()) |
1579 | return false; |
1580 | |
1581 | auto &next = get<SPIRBlock>(start->next_block); |
1582 | // Flushing phi variables does not count as noop. |
1583 | for (auto &phi : next.phi_variables) |
1584 | if (phi.parent == start->self) |
1585 | return false; |
1586 | |
1587 | start = &next; |
1588 | } |
1589 | } |
1590 | |
1591 | bool Compiler::execution_is_branchless(const SPIRBlock &from, const SPIRBlock &to) const |
1592 | { |
1593 | auto *start = &from; |
1594 | for (;;) |
1595 | { |
1596 | if (start->self == to.self) |
1597 | return true; |
1598 | |
1599 | if (start->terminator == SPIRBlock::Direct && start->merge == SPIRBlock::MergeNone) |
1600 | start = &get<SPIRBlock>(start->next_block); |
1601 | else |
1602 | return false; |
1603 | } |
1604 | } |
1605 | |
1606 | bool Compiler::execution_is_direct_branch(const SPIRBlock &from, const SPIRBlock &to) const |
1607 | { |
1608 | return from.terminator == SPIRBlock::Direct && from.merge == SPIRBlock::MergeNone && from.next_block == to.self; |
1609 | } |
1610 | |
1611 | SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &block) const |
1612 | { |
1613 | // The block was deemed too complex during code emit, pick conservative fallback paths. |
1614 | if (block.complex_continue) |
1615 | return SPIRBlock::ComplexLoop; |
1616 | |
1617 | // In older glslang output continue block can be equal to the loop header. |
1618 | // In this case, execution is clearly branchless, so just assume a while loop header here. |
1619 | if (block.merge == SPIRBlock::MergeLoop) |
1620 | return SPIRBlock::WhileLoop; |
1621 | |
1622 | if (block.loop_dominator == BlockID(SPIRBlock::NoDominator)) |
1623 | { |
1624 | // Continue block is never reached from CFG. |
1625 | return SPIRBlock::ComplexLoop; |
1626 | } |
1627 | |
1628 | auto &dominator = get<SPIRBlock>(block.loop_dominator); |
1629 | |
1630 | if (execution_is_noop(block, dominator)) |
1631 | return SPIRBlock::WhileLoop; |
1632 | else if (execution_is_branchless(block, dominator)) |
1633 | return SPIRBlock::ForLoop; |
1634 | else |
1635 | { |
1636 | const auto *false_block = maybe_get<SPIRBlock>(block.false_block); |
1637 | const auto *true_block = maybe_get<SPIRBlock>(block.true_block); |
1638 | const auto *merge_block = maybe_get<SPIRBlock>(dominator.merge_block); |
1639 | |
1640 | // If we need to flush Phi in this block, we cannot have a DoWhile loop. |
1641 | bool flush_phi_to_false = false_block && flush_phi_required(block.self, block.false_block); |
1642 | bool flush_phi_to_true = true_block && flush_phi_required(block.self, block.true_block); |
1643 | if (flush_phi_to_false || flush_phi_to_true) |
1644 | return SPIRBlock::ComplexLoop; |
1645 | |
1646 | bool positive_do_while = block.true_block == dominator.self && |
1647 | (block.false_block == dominator.merge_block || |
1648 | (false_block && merge_block && execution_is_noop(*false_block, *merge_block))); |
1649 | |
1650 | bool negative_do_while = block.false_block == dominator.self && |
1651 | (block.true_block == dominator.merge_block || |
1652 | (true_block && merge_block && execution_is_noop(*true_block, *merge_block))); |
1653 | |
1654 | if (block.merge == SPIRBlock::MergeNone && block.terminator == SPIRBlock::Select && |
1655 | (positive_do_while || negative_do_while)) |
1656 | { |
1657 | return SPIRBlock::DoWhileLoop; |
1658 | } |
1659 | else |
1660 | return SPIRBlock::ComplexLoop; |
1661 | } |
1662 | } |
1663 | |
1664 | const SmallVector<SPIRBlock::Case> &Compiler::get_case_list(const SPIRBlock &block) const |
1665 | { |
1666 | uint32_t width = 0; |
1667 | |
1668 | // First we check if we can get the type directly from the block.condition |
1669 | // since it can be a SPIRConstant or a SPIRVariable. |
1670 | if (const auto *constant = maybe_get<SPIRConstant>(block.condition)) |
1671 | { |
1672 | const auto &type = get<SPIRType>(constant->constant_type); |
1673 | width = type.width; |
1674 | } |
1675 | else if (const auto *var = maybe_get<SPIRVariable>(block.condition)) |
1676 | { |
1677 | const auto &type = get<SPIRType>(var->basetype); |
1678 | width = type.width; |
1679 | } |
1680 | else |
1681 | { |
1682 | auto search = ir.load_type_width.find(block.condition); |
1683 | if (search == ir.load_type_width.end()) |
1684 | { |
1685 | SPIRV_CROSS_THROW("Use of undeclared variable on a switch statement." ); |
1686 | } |
1687 | |
1688 | width = search->second; |
1689 | } |
1690 | |
1691 | if (width > 32) |
1692 | return block.cases_64bit; |
1693 | |
1694 | return block.cases_32bit; |
1695 | } |
1696 | |
1697 | bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const |
1698 | { |
1699 | handler.set_current_block(block); |
1700 | handler.rearm_current_block(block); |
1701 | |
1702 | // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, |
1703 | // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing |
1704 | // inside dead blocks ... |
1705 | for (auto &i : block.ops) |
1706 | { |
1707 | auto ops = stream(i); |
1708 | auto op = static_cast<Op>(i.op); |
1709 | |
1710 | if (!handler.handle(op, ops, i.length)) |
1711 | return false; |
1712 | |
1713 | if (op == OpFunctionCall) |
1714 | { |
1715 | auto &func = get<SPIRFunction>(ops[2]); |
1716 | if (handler.follow_function_call(func)) |
1717 | { |
1718 | if (!handler.begin_function_scope(ops, i.length)) |
1719 | return false; |
1720 | if (!traverse_all_reachable_opcodes(get<SPIRFunction>(ops[2]), handler)) |
1721 | return false; |
1722 | if (!handler.end_function_scope(ops, i.length)) |
1723 | return false; |
1724 | |
1725 | handler.rearm_current_block(block); |
1726 | } |
1727 | } |
1728 | } |
1729 | |
1730 | if (!handler.handle_terminator(block)) |
1731 | return false; |
1732 | |
1733 | return true; |
1734 | } |
1735 | |
1736 | bool Compiler::traverse_all_reachable_opcodes(const SPIRFunction &func, OpcodeHandler &handler) const |
1737 | { |
1738 | for (auto block : func.blocks) |
1739 | if (!traverse_all_reachable_opcodes(get<SPIRBlock>(block), handler)) |
1740 | return false; |
1741 | |
1742 | return true; |
1743 | } |
1744 | |
1745 | uint32_t Compiler::type_struct_member_offset(const SPIRType &type, uint32_t index) const |
1746 | { |
1747 | auto *type_meta = ir.find_meta(type.self); |
1748 | if (type_meta) |
1749 | { |
1750 | // Decoration must be set in valid SPIR-V, otherwise throw. |
1751 | auto &dec = type_meta->members[index]; |
1752 | if (dec.decoration_flags.get(DecorationOffset)) |
1753 | return dec.offset; |
1754 | else |
1755 | SPIRV_CROSS_THROW("Struct member does not have Offset set." ); |
1756 | } |
1757 | else |
1758 | SPIRV_CROSS_THROW("Struct member does not have Offset set." ); |
1759 | } |
1760 | |
1761 | uint32_t Compiler::type_struct_member_array_stride(const SPIRType &type, uint32_t index) const |
1762 | { |
1763 | auto *type_meta = ir.find_meta(type.member_types[index]); |
1764 | if (type_meta) |
1765 | { |
1766 | // Decoration must be set in valid SPIR-V, otherwise throw. |
1767 | // ArrayStride is part of the array type not OpMemberDecorate. |
1768 | auto &dec = type_meta->decoration; |
1769 | if (dec.decoration_flags.get(DecorationArrayStride)) |
1770 | return dec.array_stride; |
1771 | else |
1772 | SPIRV_CROSS_THROW("Struct member does not have ArrayStride set." ); |
1773 | } |
1774 | else |
1775 | SPIRV_CROSS_THROW("Struct member does not have ArrayStride set." ); |
1776 | } |
1777 | |
1778 | uint32_t Compiler::type_struct_member_matrix_stride(const SPIRType &type, uint32_t index) const |
1779 | { |
1780 | auto *type_meta = ir.find_meta(type.self); |
1781 | if (type_meta) |
1782 | { |
1783 | // Decoration must be set in valid SPIR-V, otherwise throw. |
1784 | // MatrixStride is part of OpMemberDecorate. |
1785 | auto &dec = type_meta->members[index]; |
1786 | if (dec.decoration_flags.get(DecorationMatrixStride)) |
1787 | return dec.matrix_stride; |
1788 | else |
1789 | SPIRV_CROSS_THROW("Struct member does not have MatrixStride set." ); |
1790 | } |
1791 | else |
1792 | SPIRV_CROSS_THROW("Struct member does not have MatrixStride set." ); |
1793 | } |
1794 | |
1795 | size_t Compiler::get_declared_struct_size(const SPIRType &type) const |
1796 | { |
1797 | if (type.member_types.empty()) |
1798 | SPIRV_CROSS_THROW("Declared struct in block cannot be empty." ); |
1799 | |
1800 | // Offsets can be declared out of order, so we need to deduce the actual size |
1801 | // based on last member instead. |
1802 | uint32_t member_index = 0; |
1803 | size_t highest_offset = 0; |
1804 | for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) |
1805 | { |
1806 | size_t offset = type_struct_member_offset(type, i); |
1807 | if (offset > highest_offset) |
1808 | { |
1809 | highest_offset = offset; |
1810 | member_index = i; |
1811 | } |
1812 | } |
1813 | |
1814 | size_t size = get_declared_struct_member_size(type, member_index); |
1815 | return highest_offset + size; |
1816 | } |
1817 | |
1818 | size_t Compiler::get_declared_struct_size_runtime_array(const SPIRType &type, size_t array_size) const |
1819 | { |
1820 | if (type.member_types.empty()) |
1821 | SPIRV_CROSS_THROW("Declared struct in block cannot be empty." ); |
1822 | |
1823 | size_t size = get_declared_struct_size(type); |
1824 | auto &last_type = get<SPIRType>(type.member_types.back()); |
1825 | if (!last_type.array.empty() && last_type.array_size_literal[0] && last_type.array[0] == 0) // Runtime array |
1826 | size += array_size * type_struct_member_array_stride(type, uint32_t(type.member_types.size() - 1)); |
1827 | |
1828 | return size; |
1829 | } |
1830 | |
1831 | uint32_t Compiler::evaluate_spec_constant_u32(const SPIRConstantOp &spec) const |
1832 | { |
1833 | auto &result_type = get<SPIRType>(spec.basetype); |
1834 | if (result_type.basetype != SPIRType::UInt && result_type.basetype != SPIRType::Int && |
1835 | result_type.basetype != SPIRType::Boolean) |
1836 | { |
1837 | SPIRV_CROSS_THROW( |
1838 | "Only 32-bit integers and booleans are currently supported when evaluating specialization constants.\n" ); |
1839 | } |
1840 | |
1841 | if (!is_scalar(result_type)) |
1842 | SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n" ); |
1843 | |
1844 | uint32_t value = 0; |
1845 | |
1846 | const auto eval_u32 = [&](uint32_t id) -> uint32_t { |
1847 | auto &type = expression_type(id); |
1848 | if (type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int && type.basetype != SPIRType::Boolean) |
1849 | { |
1850 | SPIRV_CROSS_THROW("Only 32-bit integers and booleans are currently supported when evaluating " |
1851 | "specialization constants.\n" ); |
1852 | } |
1853 | |
1854 | if (!is_scalar(type)) |
1855 | SPIRV_CROSS_THROW("Spec constant evaluation must be a scalar.\n" ); |
1856 | if (const auto *c = this->maybe_get<SPIRConstant>(id)) |
1857 | return c->scalar(); |
1858 | else |
1859 | return evaluate_spec_constant_u32(this->get<SPIRConstantOp>(id)); |
1860 | }; |
1861 | |
1862 | #define binary_spec_op(op, binary_op) \ |
1863 | case Op##op: \ |
1864 | value = eval_u32(spec.arguments[0]) binary_op eval_u32(spec.arguments[1]); \ |
1865 | break |
1866 | #define binary_spec_op_cast(op, binary_op, type) \ |
1867 | case Op##op: \ |
1868 | value = uint32_t(type(eval_u32(spec.arguments[0])) binary_op type(eval_u32(spec.arguments[1]))); \ |
1869 | break |
1870 | |
1871 | // Support the basic opcodes which are typically used when computing array sizes. |
1872 | switch (spec.opcode) |
1873 | { |
1874 | binary_spec_op(IAdd, +); |
1875 | binary_spec_op(ISub, -); |
1876 | binary_spec_op(IMul, *); |
1877 | binary_spec_op(BitwiseAnd, &); |
1878 | binary_spec_op(BitwiseOr, |); |
1879 | binary_spec_op(BitwiseXor, ^); |
1880 | binary_spec_op(LogicalAnd, &); |
1881 | binary_spec_op(LogicalOr, |); |
1882 | binary_spec_op(ShiftLeftLogical, <<); |
1883 | binary_spec_op(ShiftRightLogical, >>); |
1884 | binary_spec_op_cast(ShiftRightArithmetic, >>, int32_t); |
1885 | binary_spec_op(LogicalEqual, ==); |
1886 | binary_spec_op(LogicalNotEqual, !=); |
1887 | binary_spec_op(IEqual, ==); |
1888 | binary_spec_op(INotEqual, !=); |
1889 | binary_spec_op(ULessThan, <); |
1890 | binary_spec_op(ULessThanEqual, <=); |
1891 | binary_spec_op(UGreaterThan, >); |
1892 | binary_spec_op(UGreaterThanEqual, >=); |
1893 | binary_spec_op_cast(SLessThan, <, int32_t); |
1894 | binary_spec_op_cast(SLessThanEqual, <=, int32_t); |
1895 | binary_spec_op_cast(SGreaterThan, >, int32_t); |
1896 | binary_spec_op_cast(SGreaterThanEqual, >=, int32_t); |
1897 | #undef binary_spec_op |
1898 | #undef binary_spec_op_cast |
1899 | |
1900 | case OpLogicalNot: |
1901 | value = uint32_t(!eval_u32(spec.arguments[0])); |
1902 | break; |
1903 | |
1904 | case OpNot: |
1905 | value = ~eval_u32(spec.arguments[0]); |
1906 | break; |
1907 | |
1908 | case OpSNegate: |
1909 | value = uint32_t(-int32_t(eval_u32(spec.arguments[0]))); |
1910 | break; |
1911 | |
1912 | case OpSelect: |
1913 | value = eval_u32(spec.arguments[0]) ? eval_u32(spec.arguments[1]) : eval_u32(spec.arguments[2]); |
1914 | break; |
1915 | |
1916 | case OpUMod: |
1917 | { |
1918 | uint32_t a = eval_u32(spec.arguments[0]); |
1919 | uint32_t b = eval_u32(spec.arguments[1]); |
1920 | if (b == 0) |
1921 | SPIRV_CROSS_THROW("Undefined behavior in UMod, b == 0.\n" ); |
1922 | value = a % b; |
1923 | break; |
1924 | } |
1925 | |
1926 | case OpSRem: |
1927 | { |
1928 | auto a = int32_t(eval_u32(spec.arguments[0])); |
1929 | auto b = int32_t(eval_u32(spec.arguments[1])); |
1930 | if (b == 0) |
1931 | SPIRV_CROSS_THROW("Undefined behavior in SRem, b == 0.\n" ); |
1932 | value = a % b; |
1933 | break; |
1934 | } |
1935 | |
1936 | case OpSMod: |
1937 | { |
1938 | auto a = int32_t(eval_u32(spec.arguments[0])); |
1939 | auto b = int32_t(eval_u32(spec.arguments[1])); |
1940 | if (b == 0) |
1941 | SPIRV_CROSS_THROW("Undefined behavior in SMod, b == 0.\n" ); |
1942 | auto v = a % b; |
1943 | |
1944 | // Makes sure we match the sign of b, not a. |
1945 | if ((b < 0 && v > 0) || (b > 0 && v < 0)) |
1946 | v += b; |
1947 | value = v; |
1948 | break; |
1949 | } |
1950 | |
1951 | case OpUDiv: |
1952 | { |
1953 | uint32_t a = eval_u32(spec.arguments[0]); |
1954 | uint32_t b = eval_u32(spec.arguments[1]); |
1955 | if (b == 0) |
1956 | SPIRV_CROSS_THROW("Undefined behavior in UDiv, b == 0.\n" ); |
1957 | value = a / b; |
1958 | break; |
1959 | } |
1960 | |
1961 | case OpSDiv: |
1962 | { |
1963 | auto a = int32_t(eval_u32(spec.arguments[0])); |
1964 | auto b = int32_t(eval_u32(spec.arguments[1])); |
1965 | if (b == 0) |
1966 | SPIRV_CROSS_THROW("Undefined behavior in SDiv, b == 0.\n" ); |
1967 | value = a / b; |
1968 | break; |
1969 | } |
1970 | |
1971 | default: |
1972 | SPIRV_CROSS_THROW("Unsupported spec constant opcode for evaluation.\n" ); |
1973 | } |
1974 | |
1975 | return value; |
1976 | } |
1977 | |
1978 | uint32_t Compiler::evaluate_constant_u32(uint32_t id) const |
1979 | { |
1980 | if (const auto *c = maybe_get<SPIRConstant>(id)) |
1981 | return c->scalar(); |
1982 | else |
1983 | return evaluate_spec_constant_u32(get<SPIRConstantOp>(id)); |
1984 | } |
1985 | |
1986 | size_t Compiler::get_declared_struct_member_size(const SPIRType &struct_type, uint32_t index) const |
1987 | { |
1988 | if (struct_type.member_types.empty()) |
1989 | SPIRV_CROSS_THROW("Declared struct in block cannot be empty." ); |
1990 | |
1991 | auto &flags = get_member_decoration_bitset(struct_type.self, index); |
1992 | auto &type = get<SPIRType>(struct_type.member_types[index]); |
1993 | |
1994 | switch (type.basetype) |
1995 | { |
1996 | case SPIRType::Unknown: |
1997 | case SPIRType::Void: |
1998 | case SPIRType::Boolean: // Bools are purely logical, and cannot be used for externally visible types. |
1999 | case SPIRType::AtomicCounter: |
2000 | case SPIRType::Image: |
2001 | case SPIRType::SampledImage: |
2002 | case SPIRType::Sampler: |
2003 | SPIRV_CROSS_THROW("Querying size for object with opaque size." ); |
2004 | |
2005 | default: |
2006 | break; |
2007 | } |
2008 | |
2009 | if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) |
2010 | { |
2011 | // Check if this is a top-level pointer type, and not an array of pointers. |
2012 | if (type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth) |
2013 | return 8; |
2014 | } |
2015 | |
2016 | if (!type.array.empty()) |
2017 | { |
2018 | // For arrays, we can use ArrayStride to get an easy check. |
2019 | bool array_size_literal = type.array_size_literal.back(); |
2020 | uint32_t array_size = array_size_literal ? type.array.back() : evaluate_constant_u32(type.array.back()); |
2021 | return type_struct_member_array_stride(struct_type, index) * array_size; |
2022 | } |
2023 | else if (type.basetype == SPIRType::Struct) |
2024 | { |
2025 | return get_declared_struct_size(type); |
2026 | } |
2027 | else |
2028 | { |
2029 | unsigned vecsize = type.vecsize; |
2030 | unsigned columns = type.columns; |
2031 | |
2032 | // Vectors. |
2033 | if (columns == 1) |
2034 | { |
2035 | size_t component_size = type.width / 8; |
2036 | return vecsize * component_size; |
2037 | } |
2038 | else |
2039 | { |
2040 | uint32_t matrix_stride = type_struct_member_matrix_stride(struct_type, index); |
2041 | |
2042 | // Per SPIR-V spec, matrices must be tightly packed and aligned up for vec3 accesses. |
2043 | if (flags.get(DecorationRowMajor)) |
2044 | return matrix_stride * vecsize; |
2045 | else if (flags.get(DecorationColMajor)) |
2046 | return matrix_stride * columns; |
2047 | else |
2048 | SPIRV_CROSS_THROW("Either row-major or column-major must be declared for matrices." ); |
2049 | } |
2050 | } |
2051 | } |
2052 | |
2053 | bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) |
2054 | { |
2055 | if (opcode != OpAccessChain && opcode != OpInBoundsAccessChain && opcode != OpPtrAccessChain) |
2056 | return true; |
2057 | |
2058 | bool ptr_chain = (opcode == OpPtrAccessChain); |
2059 | |
2060 | // Invalid SPIR-V. |
2061 | if (length < (ptr_chain ? 5u : 4u)) |
2062 | return false; |
2063 | |
2064 | if (args[2] != id) |
2065 | return true; |
2066 | |
2067 | // Don't bother traversing the entire access chain tree yet. |
2068 | // If we access a struct member, assume we access the entire member. |
2069 | uint32_t index = compiler.get<SPIRConstant>(args[ptr_chain ? 4 : 3]).scalar(); |
2070 | |
2071 | // Seen this index already. |
2072 | if (seen.find(index) != end(seen)) |
2073 | return true; |
2074 | seen.insert(index); |
2075 | |
2076 | auto &type = compiler.expression_type(id); |
2077 | uint32_t offset = compiler.type_struct_member_offset(type, index); |
2078 | |
2079 | size_t range; |
2080 | // If we have another member in the struct, deduce the range by looking at the next member. |
2081 | // This is okay since structs in SPIR-V can have padding, but Offset decoration must be |
2082 | // monotonically increasing. |
2083 | // Of course, this doesn't take into account if the SPIR-V for some reason decided to add |
2084 | // very large amounts of padding, but that's not really a big deal. |
2085 | if (index + 1 < type.member_types.size()) |
2086 | { |
2087 | range = compiler.type_struct_member_offset(type, index + 1) - offset; |
2088 | } |
2089 | else |
2090 | { |
2091 | // No padding, so just deduce it from the size of the member directly. |
2092 | range = compiler.get_declared_struct_member_size(type, index); |
2093 | } |
2094 | |
2095 | ranges.push_back({ index, offset, range }); |
2096 | return true; |
2097 | } |
2098 | |
2099 | SmallVector<BufferRange> Compiler::get_active_buffer_ranges(VariableID id) const |
2100 | { |
2101 | SmallVector<BufferRange> ranges; |
2102 | BufferAccessHandler handler(*this, ranges, id); |
2103 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
2104 | return ranges; |
2105 | } |
2106 | |
2107 | bool Compiler::types_are_logically_equivalent(const SPIRType &a, const SPIRType &b) const |
2108 | { |
2109 | if (a.basetype != b.basetype) |
2110 | return false; |
2111 | if (a.width != b.width) |
2112 | return false; |
2113 | if (a.vecsize != b.vecsize) |
2114 | return false; |
2115 | if (a.columns != b.columns) |
2116 | return false; |
2117 | if (a.array.size() != b.array.size()) |
2118 | return false; |
2119 | |
2120 | size_t array_count = a.array.size(); |
2121 | if (array_count && memcmp(a.array.data(), b.array.data(), array_count * sizeof(uint32_t)) != 0) |
2122 | return false; |
2123 | |
2124 | if (a.basetype == SPIRType::Image || a.basetype == SPIRType::SampledImage) |
2125 | { |
2126 | if (memcmp(&a.image, &b.image, sizeof(SPIRType::Image)) != 0) |
2127 | return false; |
2128 | } |
2129 | |
2130 | if (a.member_types.size() != b.member_types.size()) |
2131 | return false; |
2132 | |
2133 | size_t member_types = a.member_types.size(); |
2134 | for (size_t i = 0; i < member_types; i++) |
2135 | { |
2136 | if (!types_are_logically_equivalent(get<SPIRType>(a.member_types[i]), get<SPIRType>(b.member_types[i]))) |
2137 | return false; |
2138 | } |
2139 | |
2140 | return true; |
2141 | } |
2142 | |
2143 | const Bitset &Compiler::get_execution_mode_bitset() const |
2144 | { |
2145 | return get_entry_point().flags; |
2146 | } |
2147 | |
2148 | void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t arg1, uint32_t arg2) |
2149 | { |
2150 | auto &execution = get_entry_point(); |
2151 | |
2152 | execution.flags.set(mode); |
2153 | switch (mode) |
2154 | { |
2155 | case ExecutionModeLocalSize: |
2156 | execution.workgroup_size.x = arg0; |
2157 | execution.workgroup_size.y = arg1; |
2158 | execution.workgroup_size.z = arg2; |
2159 | break; |
2160 | |
2161 | case ExecutionModeLocalSizeId: |
2162 | execution.workgroup_size.id_x = arg0; |
2163 | execution.workgroup_size.id_y = arg1; |
2164 | execution.workgroup_size.id_z = arg2; |
2165 | break; |
2166 | |
2167 | case ExecutionModeInvocations: |
2168 | execution.invocations = arg0; |
2169 | break; |
2170 | |
2171 | case ExecutionModeOutputVertices: |
2172 | execution.output_vertices = arg0; |
2173 | break; |
2174 | |
2175 | default: |
2176 | break; |
2177 | } |
2178 | } |
2179 | |
2180 | void Compiler::unset_execution_mode(ExecutionMode mode) |
2181 | { |
2182 | auto &execution = get_entry_point(); |
2183 | execution.flags.clear(mode); |
2184 | } |
2185 | |
2186 | uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationConstant &x, SpecializationConstant &y, |
2187 | SpecializationConstant &z) const |
2188 | { |
2189 | auto &execution = get_entry_point(); |
2190 | x = { 0, 0 }; |
2191 | y = { 0, 0 }; |
2192 | z = { 0, 0 }; |
2193 | |
2194 | // WorkgroupSize builtin takes precedence over LocalSize / LocalSizeId. |
2195 | if (execution.workgroup_size.constant != 0) |
2196 | { |
2197 | auto &c = get<SPIRConstant>(execution.workgroup_size.constant); |
2198 | |
2199 | if (c.m.c[0].id[0] != ID(0)) |
2200 | { |
2201 | x.id = c.m.c[0].id[0]; |
2202 | x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); |
2203 | } |
2204 | |
2205 | if (c.m.c[0].id[1] != ID(0)) |
2206 | { |
2207 | y.id = c.m.c[0].id[1]; |
2208 | y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); |
2209 | } |
2210 | |
2211 | if (c.m.c[0].id[2] != ID(0)) |
2212 | { |
2213 | z.id = c.m.c[0].id[2]; |
2214 | z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); |
2215 | } |
2216 | } |
2217 | else if (execution.flags.get(ExecutionModeLocalSizeId)) |
2218 | { |
2219 | auto &cx = get<SPIRConstant>(execution.workgroup_size.id_x); |
2220 | if (cx.specialization) |
2221 | { |
2222 | x.id = execution.workgroup_size.id_x; |
2223 | x.constant_id = get_decoration(execution.workgroup_size.id_x, DecorationSpecId); |
2224 | } |
2225 | |
2226 | auto &cy = get<SPIRConstant>(execution.workgroup_size.id_y); |
2227 | if (cy.specialization) |
2228 | { |
2229 | y.id = execution.workgroup_size.id_y; |
2230 | y.constant_id = get_decoration(execution.workgroup_size.id_y, DecorationSpecId); |
2231 | } |
2232 | |
2233 | auto &cz = get<SPIRConstant>(execution.workgroup_size.id_z); |
2234 | if (cz.specialization) |
2235 | { |
2236 | z.id = execution.workgroup_size.id_z; |
2237 | z.constant_id = get_decoration(execution.workgroup_size.id_z, DecorationSpecId); |
2238 | } |
2239 | } |
2240 | |
2241 | return execution.workgroup_size.constant; |
2242 | } |
2243 | |
2244 | uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t index) const |
2245 | { |
2246 | auto &execution = get_entry_point(); |
2247 | switch (mode) |
2248 | { |
2249 | case ExecutionModeLocalSizeId: |
2250 | if (execution.flags.get(ExecutionModeLocalSizeId)) |
2251 | { |
2252 | switch (index) |
2253 | { |
2254 | case 0: |
2255 | return execution.workgroup_size.id_x; |
2256 | case 1: |
2257 | return execution.workgroup_size.id_y; |
2258 | case 2: |
2259 | return execution.workgroup_size.id_z; |
2260 | default: |
2261 | return 0; |
2262 | } |
2263 | } |
2264 | else |
2265 | return 0; |
2266 | |
2267 | case ExecutionModeLocalSize: |
2268 | switch (index) |
2269 | { |
2270 | case 0: |
2271 | if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_x != 0) |
2272 | return get<SPIRConstant>(execution.workgroup_size.id_x).scalar(); |
2273 | else |
2274 | return execution.workgroup_size.x; |
2275 | case 1: |
2276 | if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_y != 0) |
2277 | return get<SPIRConstant>(execution.workgroup_size.id_y).scalar(); |
2278 | else |
2279 | return execution.workgroup_size.y; |
2280 | case 2: |
2281 | if (execution.flags.get(ExecutionModeLocalSizeId) && execution.workgroup_size.id_z != 0) |
2282 | return get<SPIRConstant>(execution.workgroup_size.id_z).scalar(); |
2283 | else |
2284 | return execution.workgroup_size.z; |
2285 | default: |
2286 | return 0; |
2287 | } |
2288 | |
2289 | case ExecutionModeInvocations: |
2290 | return execution.invocations; |
2291 | |
2292 | case ExecutionModeOutputVertices: |
2293 | return execution.output_vertices; |
2294 | |
2295 | default: |
2296 | return 0; |
2297 | } |
2298 | } |
2299 | |
2300 | ExecutionModel Compiler::get_execution_model() const |
2301 | { |
2302 | auto &execution = get_entry_point(); |
2303 | return execution.model; |
2304 | } |
2305 | |
2306 | bool Compiler::is_tessellation_shader(ExecutionModel model) |
2307 | { |
2308 | return model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; |
2309 | } |
2310 | |
2311 | bool Compiler::is_vertex_like_shader() const |
2312 | { |
2313 | auto model = get_execution_model(); |
2314 | return model == ExecutionModelVertex || model == ExecutionModelGeometry || |
2315 | model == ExecutionModelTessellationControl || model == ExecutionModelTessellationEvaluation; |
2316 | } |
2317 | |
2318 | bool Compiler::is_tessellation_shader() const |
2319 | { |
2320 | return is_tessellation_shader(get_execution_model()); |
2321 | } |
2322 | |
2323 | void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable) |
2324 | { |
2325 | get<SPIRVariable>(id).remapped_variable = remap_enable; |
2326 | } |
2327 | |
2328 | bool Compiler::get_remapped_variable_state(VariableID id) const |
2329 | { |
2330 | return get<SPIRVariable>(id).remapped_variable; |
2331 | } |
2332 | |
2333 | void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components) |
2334 | { |
2335 | get<SPIRVariable>(id).remapped_components = components; |
2336 | } |
2337 | |
2338 | uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const |
2339 | { |
2340 | return get<SPIRVariable>(id).remapped_components; |
2341 | } |
2342 | |
2343 | void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) |
2344 | { |
2345 | auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); |
2346 | if (itr == end(e.implied_read_expressions)) |
2347 | e.implied_read_expressions.push_back(source); |
2348 | } |
2349 | |
2350 | void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) |
2351 | { |
2352 | auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); |
2353 | if (itr == end(e.implied_read_expressions)) |
2354 | e.implied_read_expressions.push_back(source); |
2355 | } |
2356 | |
2357 | void Compiler::inherit_expression_dependencies(uint32_t dst, uint32_t source_expression) |
2358 | { |
2359 | // Don't inherit any expression dependencies if the expression in dst |
2360 | // is not a forwarded temporary. |
2361 | if (forwarded_temporaries.find(dst) == end(forwarded_temporaries) || |
2362 | forced_temporaries.find(dst) != end(forced_temporaries)) |
2363 | { |
2364 | return; |
2365 | } |
2366 | |
2367 | auto &e = get<SPIRExpression>(dst); |
2368 | auto *phi = maybe_get<SPIRVariable>(source_expression); |
2369 | if (phi && phi->phi_variable) |
2370 | { |
2371 | // We have used a phi variable, which can change at the end of the block, |
2372 | // so make sure we take a dependency on this phi variable. |
2373 | phi->dependees.push_back(dst); |
2374 | } |
2375 | |
2376 | auto *s = maybe_get<SPIRExpression>(source_expression); |
2377 | if (!s) |
2378 | return; |
2379 | |
2380 | auto &e_deps = e.expression_dependencies; |
2381 | auto &s_deps = s->expression_dependencies; |
2382 | |
2383 | // If we depend on a expression, we also depend on all sub-dependencies from source. |
2384 | e_deps.push_back(source_expression); |
2385 | e_deps.insert(end(e_deps), begin(s_deps), end(s_deps)); |
2386 | |
2387 | // Eliminate duplicated dependencies. |
2388 | sort(begin(e_deps), end(e_deps)); |
2389 | e_deps.erase(unique(begin(e_deps), end(e_deps)), end(e_deps)); |
2390 | } |
2391 | |
2392 | SmallVector<EntryPoint> Compiler::get_entry_points_and_stages() const |
2393 | { |
2394 | SmallVector<EntryPoint> entries; |
2395 | for (auto &entry : ir.entry_points) |
2396 | entries.push_back({ entry.second.orig_name, entry.second.model }); |
2397 | return entries; |
2398 | } |
2399 | |
2400 | void Compiler::rename_entry_point(const std::string &old_name, const std::string &new_name, spv::ExecutionModel model) |
2401 | { |
2402 | auto &entry = get_entry_point(old_name, model); |
2403 | entry.orig_name = new_name; |
2404 | entry.name = new_name; |
2405 | } |
2406 | |
2407 | void Compiler::set_entry_point(const std::string &name, spv::ExecutionModel model) |
2408 | { |
2409 | auto &entry = get_entry_point(name, model); |
2410 | ir.default_entry_point = entry.self; |
2411 | } |
2412 | |
2413 | SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) |
2414 | { |
2415 | auto itr = find_if( |
2416 | begin(ir.entry_points), end(ir.entry_points), |
2417 | [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; }); |
2418 | |
2419 | if (itr == end(ir.entry_points)) |
2420 | SPIRV_CROSS_THROW("Entry point does not exist." ); |
2421 | |
2422 | return itr->second; |
2423 | } |
2424 | |
2425 | const SPIREntryPoint &Compiler::get_first_entry_point(const std::string &name) const |
2426 | { |
2427 | auto itr = find_if( |
2428 | begin(ir.entry_points), end(ir.entry_points), |
2429 | [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { return entry.second.orig_name == name; }); |
2430 | |
2431 | if (itr == end(ir.entry_points)) |
2432 | SPIRV_CROSS_THROW("Entry point does not exist." ); |
2433 | |
2434 | return itr->second; |
2435 | } |
2436 | |
2437 | SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) |
2438 | { |
2439 | auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), |
2440 | [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { |
2441 | return entry.second.orig_name == name && entry.second.model == model; |
2442 | }); |
2443 | |
2444 | if (itr == end(ir.entry_points)) |
2445 | SPIRV_CROSS_THROW("Entry point does not exist." ); |
2446 | |
2447 | return itr->second; |
2448 | } |
2449 | |
2450 | const SPIREntryPoint &Compiler::get_entry_point(const std::string &name, ExecutionModel model) const |
2451 | { |
2452 | auto itr = find_if(begin(ir.entry_points), end(ir.entry_points), |
2453 | [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool { |
2454 | return entry.second.orig_name == name && entry.second.model == model; |
2455 | }); |
2456 | |
2457 | if (itr == end(ir.entry_points)) |
2458 | SPIRV_CROSS_THROW("Entry point does not exist." ); |
2459 | |
2460 | return itr->second; |
2461 | } |
2462 | |
2463 | const string &Compiler::get_cleansed_entry_point_name(const std::string &name, ExecutionModel model) const |
2464 | { |
2465 | return get_entry_point(name, model).name; |
2466 | } |
2467 | |
2468 | const SPIREntryPoint &Compiler::get_entry_point() const |
2469 | { |
2470 | return ir.entry_points.find(ir.default_entry_point)->second; |
2471 | } |
2472 | |
2473 | SPIREntryPoint &Compiler::get_entry_point() |
2474 | { |
2475 | return ir.entry_points.find(ir.default_entry_point)->second; |
2476 | } |
2477 | |
2478 | bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const |
2479 | { |
2480 | auto &var = get<SPIRVariable>(id); |
2481 | |
2482 | if (ir.get_spirv_version() < 0x10400) |
2483 | { |
2484 | if (var.storage != StorageClassInput && var.storage != StorageClassOutput && |
2485 | var.storage != StorageClassUniformConstant) |
2486 | SPIRV_CROSS_THROW("Only Input, Output variables and Uniform constants are part of a shader linking interface." ); |
2487 | |
2488 | // This is to avoid potential problems with very old glslang versions which did |
2489 | // not emit input/output interfaces properly. |
2490 | // We can assume they only had a single entry point, and single entry point |
2491 | // shaders could easily be assumed to use every interface variable anyways. |
2492 | if (ir.entry_points.size() <= 1) |
2493 | return true; |
2494 | } |
2495 | |
2496 | // In SPIR-V 1.4 and later, all global resource variables must be present. |
2497 | |
2498 | auto &execution = get_entry_point(); |
2499 | return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) != |
2500 | end(execution.interface_variables); |
2501 | } |
2502 | |
2503 | void Compiler::CombinedImageSamplerHandler::push_remap_parameters(const SPIRFunction &func, const uint32_t *args, |
2504 | uint32_t length) |
2505 | { |
2506 | // If possible, pipe through a remapping table so that parameters know |
2507 | // which variables they actually bind to in this scope. |
2508 | unordered_map<uint32_t, uint32_t> remapping; |
2509 | for (uint32_t i = 0; i < length; i++) |
2510 | remapping[func.arguments[i].id] = remap_parameter(args[i]); |
2511 | parameter_remapping.push(move(remapping)); |
2512 | } |
2513 | |
2514 | void Compiler::CombinedImageSamplerHandler::pop_remap_parameters() |
2515 | { |
2516 | parameter_remapping.pop(); |
2517 | } |
2518 | |
2519 | uint32_t Compiler::CombinedImageSamplerHandler::remap_parameter(uint32_t id) |
2520 | { |
2521 | auto *var = compiler.maybe_get_backing_variable(id); |
2522 | if (var) |
2523 | id = var->self; |
2524 | |
2525 | if (parameter_remapping.empty()) |
2526 | return id; |
2527 | |
2528 | auto &remapping = parameter_remapping.top(); |
2529 | auto itr = remapping.find(id); |
2530 | if (itr != end(remapping)) |
2531 | return itr->second; |
2532 | else |
2533 | return id; |
2534 | } |
2535 | |
2536 | bool Compiler::CombinedImageSamplerHandler::begin_function_scope(const uint32_t *args, uint32_t length) |
2537 | { |
2538 | if (length < 3) |
2539 | return false; |
2540 | |
2541 | auto &callee = compiler.get<SPIRFunction>(args[2]); |
2542 | args += 3; |
2543 | length -= 3; |
2544 | push_remap_parameters(callee, args, length); |
2545 | functions.push(&callee); |
2546 | return true; |
2547 | } |
2548 | |
2549 | bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *args, uint32_t length) |
2550 | { |
2551 | if (length < 3) |
2552 | return false; |
2553 | |
2554 | auto &callee = compiler.get<SPIRFunction>(args[2]); |
2555 | args += 3; |
2556 | |
2557 | // There are two types of cases we have to handle, |
2558 | // a callee might call sampler2D(texture2D, sampler) directly where |
2559 | // one or more parameters originate from parameters. |
2560 | // Alternatively, we need to provide combined image samplers to our callees, |
2561 | // and in this case we need to add those as well. |
2562 | |
2563 | pop_remap_parameters(); |
2564 | |
2565 | // Our callee has now been processed at least once. |
2566 | // No point in doing it again. |
2567 | callee.do_combined_parameters = false; |
2568 | |
2569 | auto ¶ms = functions.top()->combined_parameters; |
2570 | functions.pop(); |
2571 | if (functions.empty()) |
2572 | return true; |
2573 | |
2574 | auto &caller = *functions.top(); |
2575 | if (caller.do_combined_parameters) |
2576 | { |
2577 | for (auto ¶m : params) |
2578 | { |
2579 | VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]); |
2580 | VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]); |
2581 | |
2582 | auto *i = compiler.maybe_get_backing_variable(image_id); |
2583 | auto *s = compiler.maybe_get_backing_variable(sampler_id); |
2584 | if (i) |
2585 | image_id = i->self; |
2586 | if (s) |
2587 | sampler_id = s->self; |
2588 | |
2589 | register_combined_image_sampler(caller, 0, image_id, sampler_id, param.depth); |
2590 | } |
2591 | } |
2592 | |
2593 | return true; |
2594 | } |
2595 | |
2596 | void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, |
2597 | VariableID combined_module_id, |
2598 | VariableID image_id, VariableID sampler_id, |
2599 | bool depth) |
2600 | { |
2601 | // We now have a texture ID and a sampler ID which will either be found as a global |
2602 | // or a parameter in our own function. If both are global, they will not need a parameter, |
2603 | // otherwise, add it to our list. |
2604 | SPIRFunction::CombinedImageSamplerParameter param = { |
2605 | 0u, image_id, sampler_id, true, true, depth, |
2606 | }; |
2607 | |
2608 | auto texture_itr = find_if(begin(caller.arguments), end(caller.arguments), |
2609 | [image_id](const SPIRFunction::Parameter &p) { return p.id == image_id; }); |
2610 | auto sampler_itr = find_if(begin(caller.arguments), end(caller.arguments), |
2611 | [sampler_id](const SPIRFunction::Parameter &p) { return p.id == sampler_id; }); |
2612 | |
2613 | if (texture_itr != end(caller.arguments)) |
2614 | { |
2615 | param.global_image = false; |
2616 | param.image_id = uint32_t(texture_itr - begin(caller.arguments)); |
2617 | } |
2618 | |
2619 | if (sampler_itr != end(caller.arguments)) |
2620 | { |
2621 | param.global_sampler = false; |
2622 | param.sampler_id = uint32_t(sampler_itr - begin(caller.arguments)); |
2623 | } |
2624 | |
2625 | if (param.global_image && param.global_sampler) |
2626 | return; |
2627 | |
2628 | auto itr = find_if(begin(caller.combined_parameters), end(caller.combined_parameters), |
2629 | [¶m](const SPIRFunction::CombinedImageSamplerParameter &p) { |
2630 | return param.image_id == p.image_id && param.sampler_id == p.sampler_id && |
2631 | param.global_image == p.global_image && param.global_sampler == p.global_sampler; |
2632 | }); |
2633 | |
2634 | if (itr == end(caller.combined_parameters)) |
2635 | { |
2636 | uint32_t id = compiler.ir.increase_bound_by(3); |
2637 | auto type_id = id + 0; |
2638 | auto ptr_type_id = id + 1; |
2639 | auto combined_id = id + 2; |
2640 | auto &base = compiler.expression_type(image_id); |
2641 | auto &type = compiler.set<SPIRType>(type_id); |
2642 | auto &ptr_type = compiler.set<SPIRType>(ptr_type_id); |
2643 | |
2644 | type = base; |
2645 | type.self = type_id; |
2646 | type.basetype = SPIRType::SampledImage; |
2647 | type.pointer = false; |
2648 | type.storage = StorageClassGeneric; |
2649 | type.image.depth = depth; |
2650 | |
2651 | ptr_type = type; |
2652 | ptr_type.pointer = true; |
2653 | ptr_type.storage = StorageClassUniformConstant; |
2654 | ptr_type.parent_type = type_id; |
2655 | |
2656 | // Build new variable. |
2657 | compiler.set<SPIRVariable>(combined_id, ptr_type_id, StorageClassFunction, 0); |
2658 | |
2659 | // Inherit RelaxedPrecision. |
2660 | // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. |
2661 | bool relaxed_precision = |
2662 | compiler.has_decoration(sampler_id, DecorationRelaxedPrecision) || |
2663 | compiler.has_decoration(image_id, DecorationRelaxedPrecision) || |
2664 | (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); |
2665 | |
2666 | if (relaxed_precision) |
2667 | compiler.set_decoration(combined_id, DecorationRelaxedPrecision); |
2668 | |
2669 | param.id = combined_id; |
2670 | |
2671 | compiler.set_name(combined_id, |
2672 | join("SPIRV_Cross_Combined" , compiler.to_name(image_id), compiler.to_name(sampler_id))); |
2673 | |
2674 | caller.combined_parameters.push_back(param); |
2675 | caller.shadow_arguments.push_back({ ptr_type_id, combined_id, 0u, 0u, true }); |
2676 | } |
2677 | } |
2678 | |
2679 | bool Compiler::DummySamplerForCombinedImageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) |
2680 | { |
2681 | if (need_dummy_sampler) |
2682 | { |
2683 | // No need to traverse further, we know the result. |
2684 | return false; |
2685 | } |
2686 | |
2687 | switch (opcode) |
2688 | { |
2689 | case OpLoad: |
2690 | { |
2691 | if (length < 3) |
2692 | return false; |
2693 | |
2694 | uint32_t result_type = args[0]; |
2695 | |
2696 | auto &type = compiler.get<SPIRType>(result_type); |
2697 | bool separate_image = |
2698 | type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; |
2699 | |
2700 | // If not separate image, don't bother. |
2701 | if (!separate_image) |
2702 | return true; |
2703 | |
2704 | uint32_t id = args[1]; |
2705 | uint32_t ptr = args[2]; |
2706 | compiler.set<SPIRExpression>(id, "" , result_type, true); |
2707 | compiler.register_read(id, ptr, true); |
2708 | break; |
2709 | } |
2710 | |
2711 | case OpImageFetch: |
2712 | case OpImageQuerySizeLod: |
2713 | case OpImageQuerySize: |
2714 | case OpImageQueryLevels: |
2715 | case OpImageQuerySamples: |
2716 | { |
2717 | // If we are fetching or querying LOD from a plain OpTypeImage, we must pre-combine with our dummy sampler. |
2718 | auto *var = compiler.maybe_get_backing_variable(args[2]); |
2719 | if (var) |
2720 | { |
2721 | auto &type = compiler.get<SPIRType>(var->basetype); |
2722 | if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) |
2723 | need_dummy_sampler = true; |
2724 | } |
2725 | |
2726 | break; |
2727 | } |
2728 | |
2729 | case OpInBoundsAccessChain: |
2730 | case OpAccessChain: |
2731 | case OpPtrAccessChain: |
2732 | { |
2733 | if (length < 3) |
2734 | return false; |
2735 | |
2736 | uint32_t result_type = args[0]; |
2737 | auto &type = compiler.get<SPIRType>(result_type); |
2738 | bool separate_image = |
2739 | type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer; |
2740 | if (!separate_image) |
2741 | return true; |
2742 | |
2743 | uint32_t id = args[1]; |
2744 | uint32_t ptr = args[2]; |
2745 | compiler.set<SPIRExpression>(id, "" , result_type, true); |
2746 | compiler.register_read(id, ptr, true); |
2747 | |
2748 | // Other backends might use SPIRAccessChain for this later. |
2749 | compiler.ir.ids[id].set_allow_type_rewrite(); |
2750 | break; |
2751 | } |
2752 | |
2753 | default: |
2754 | break; |
2755 | } |
2756 | |
2757 | return true; |
2758 | } |
2759 | |
2760 | bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *args, uint32_t length) |
2761 | { |
2762 | // We need to figure out where samplers and images are loaded from, so do only the bare bones compilation we need. |
2763 | bool is_fetch = false; |
2764 | |
2765 | switch (opcode) |
2766 | { |
2767 | case OpLoad: |
2768 | { |
2769 | if (length < 3) |
2770 | return false; |
2771 | |
2772 | uint32_t result_type = args[0]; |
2773 | |
2774 | auto &type = compiler.get<SPIRType>(result_type); |
2775 | bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; |
2776 | bool separate_sampler = type.basetype == SPIRType::Sampler; |
2777 | |
2778 | // If not separate image or sampler, don't bother. |
2779 | if (!separate_image && !separate_sampler) |
2780 | return true; |
2781 | |
2782 | uint32_t id = args[1]; |
2783 | uint32_t ptr = args[2]; |
2784 | compiler.set<SPIRExpression>(id, "" , result_type, true); |
2785 | compiler.register_read(id, ptr, true); |
2786 | return true; |
2787 | } |
2788 | |
2789 | case OpInBoundsAccessChain: |
2790 | case OpAccessChain: |
2791 | case OpPtrAccessChain: |
2792 | { |
2793 | if (length < 3) |
2794 | return false; |
2795 | |
2796 | // Technically, it is possible to have arrays of textures and arrays of samplers and combine them, but this becomes essentially |
2797 | // impossible to implement, since we don't know which concrete sampler we are accessing. |
2798 | // One potential way is to create a combinatorial explosion where N textures and M samplers are combined into N * M sampler2Ds, |
2799 | // but this seems ridiculously complicated for a problem which is easy to work around. |
2800 | // Checking access chains like this assumes we don't have samplers or textures inside uniform structs, but this makes no sense. |
2801 | |
2802 | uint32_t result_type = args[0]; |
2803 | |
2804 | auto &type = compiler.get<SPIRType>(result_type); |
2805 | bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; |
2806 | bool separate_sampler = type.basetype == SPIRType::Sampler; |
2807 | if (separate_sampler) |
2808 | SPIRV_CROSS_THROW( |
2809 | "Attempting to use arrays or structs of separate samplers. This is not possible to statically " |
2810 | "remap to plain GLSL." ); |
2811 | |
2812 | if (separate_image) |
2813 | { |
2814 | uint32_t id = args[1]; |
2815 | uint32_t ptr = args[2]; |
2816 | compiler.set<SPIRExpression>(id, "" , result_type, true); |
2817 | compiler.register_read(id, ptr, true); |
2818 | } |
2819 | return true; |
2820 | } |
2821 | |
2822 | case OpImageFetch: |
2823 | case OpImageQuerySizeLod: |
2824 | case OpImageQuerySize: |
2825 | case OpImageQueryLevels: |
2826 | case OpImageQuerySamples: |
2827 | { |
2828 | // If we are fetching from a plain OpTypeImage or querying LOD, we must pre-combine with our dummy sampler. |
2829 | auto *var = compiler.maybe_get_backing_variable(args[2]); |
2830 | if (!var) |
2831 | return true; |
2832 | |
2833 | auto &type = compiler.get<SPIRType>(var->basetype); |
2834 | if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) |
2835 | { |
2836 | if (compiler.dummy_sampler_id == 0) |
2837 | SPIRV_CROSS_THROW("texelFetch without sampler was found, but no dummy sampler has been created with " |
2838 | "build_dummy_sampler_for_combined_images()." ); |
2839 | |
2840 | // Do it outside. |
2841 | is_fetch = true; |
2842 | break; |
2843 | } |
2844 | |
2845 | return true; |
2846 | } |
2847 | |
2848 | case OpSampledImage: |
2849 | // Do it outside. |
2850 | break; |
2851 | |
2852 | default: |
2853 | return true; |
2854 | } |
2855 | |
2856 | // Registers sampler2D calls used in case they are parameters so |
2857 | // that their callees know which combined image samplers to propagate down the call stack. |
2858 | if (!functions.empty()) |
2859 | { |
2860 | auto &callee = *functions.top(); |
2861 | if (callee.do_combined_parameters) |
2862 | { |
2863 | uint32_t image_id = args[2]; |
2864 | |
2865 | auto *image = compiler.maybe_get_backing_variable(image_id); |
2866 | if (image) |
2867 | image_id = image->self; |
2868 | |
2869 | uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : args[3]; |
2870 | auto *sampler = compiler.maybe_get_backing_variable(sampler_id); |
2871 | if (sampler) |
2872 | sampler_id = sampler->self; |
2873 | |
2874 | uint32_t combined_id = args[1]; |
2875 | |
2876 | auto &combined_type = compiler.get<SPIRType>(args[0]); |
2877 | register_combined_image_sampler(callee, combined_id, image_id, sampler_id, combined_type.image.depth); |
2878 | } |
2879 | } |
2880 | |
2881 | // For function calls, we need to remap IDs which are function parameters into global variables. |
2882 | // This information is statically known from the current place in the call stack. |
2883 | // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know |
2884 | // which backing variable the image/sample came from. |
2885 | VariableID image_id = remap_parameter(args[2]); |
2886 | VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); |
2887 | |
2888 | auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), |
2889 | [image_id, sampler_id](const CombinedImageSampler &combined) { |
2890 | return combined.image_id == image_id && combined.sampler_id == sampler_id; |
2891 | }); |
2892 | |
2893 | if (itr == end(compiler.combined_image_samplers)) |
2894 | { |
2895 | uint32_t sampled_type; |
2896 | uint32_t combined_module_id; |
2897 | if (is_fetch) |
2898 | { |
2899 | // Have to invent the sampled image type. |
2900 | sampled_type = compiler.ir.increase_bound_by(1); |
2901 | auto &type = compiler.set<SPIRType>(sampled_type); |
2902 | type = compiler.expression_type(args[2]); |
2903 | type.self = sampled_type; |
2904 | type.basetype = SPIRType::SampledImage; |
2905 | type.image.depth = false; |
2906 | combined_module_id = 0; |
2907 | } |
2908 | else |
2909 | { |
2910 | sampled_type = args[0]; |
2911 | combined_module_id = args[1]; |
2912 | } |
2913 | |
2914 | auto id = compiler.ir.increase_bound_by(2); |
2915 | auto type_id = id + 0; |
2916 | auto combined_id = id + 1; |
2917 | |
2918 | // Make a new type, pointer to OpTypeSampledImage, so we can make a variable of this type. |
2919 | // We will probably have this type lying around, but it doesn't hurt to make duplicates for internal purposes. |
2920 | auto &type = compiler.set<SPIRType>(type_id); |
2921 | auto &base = compiler.get<SPIRType>(sampled_type); |
2922 | type = base; |
2923 | type.pointer = true; |
2924 | type.storage = StorageClassUniformConstant; |
2925 | type.parent_type = type_id; |
2926 | |
2927 | // Build new variable. |
2928 | compiler.set<SPIRVariable>(combined_id, type_id, StorageClassUniformConstant, 0); |
2929 | |
2930 | // Inherit RelaxedPrecision (and potentially other useful flags if deemed relevant). |
2931 | // If any of OpSampledImage, underlying image or sampler are marked, inherit the decoration. |
2932 | bool relaxed_precision = |
2933 | (sampler_id && compiler.has_decoration(sampler_id, DecorationRelaxedPrecision)) || |
2934 | (image_id && compiler.has_decoration(image_id, DecorationRelaxedPrecision)) || |
2935 | (combined_module_id && compiler.has_decoration(combined_module_id, DecorationRelaxedPrecision)); |
2936 | |
2937 | if (relaxed_precision) |
2938 | compiler.set_decoration(combined_id, DecorationRelaxedPrecision); |
2939 | |
2940 | // Propagate the array type for the original image as well. |
2941 | auto *var = compiler.maybe_get_backing_variable(image_id); |
2942 | if (var) |
2943 | { |
2944 | auto &parent_type = compiler.get<SPIRType>(var->basetype); |
2945 | type.array = parent_type.array; |
2946 | type.array_size_literal = parent_type.array_size_literal; |
2947 | } |
2948 | |
2949 | compiler.combined_image_samplers.push_back({ combined_id, image_id, sampler_id }); |
2950 | } |
2951 | |
2952 | return true; |
2953 | } |
2954 | |
2955 | VariableID Compiler::build_dummy_sampler_for_combined_images() |
2956 | { |
2957 | DummySamplerForCombinedImageHandler handler(*this); |
2958 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
2959 | if (handler.need_dummy_sampler) |
2960 | { |
2961 | uint32_t offset = ir.increase_bound_by(3); |
2962 | auto type_id = offset + 0; |
2963 | auto ptr_type_id = offset + 1; |
2964 | auto var_id = offset + 2; |
2965 | |
2966 | SPIRType sampler_type; |
2967 | auto &sampler = set<SPIRType>(type_id); |
2968 | sampler.basetype = SPIRType::Sampler; |
2969 | |
2970 | auto &ptr_sampler = set<SPIRType>(ptr_type_id); |
2971 | ptr_sampler = sampler; |
2972 | ptr_sampler.self = type_id; |
2973 | ptr_sampler.storage = StorageClassUniformConstant; |
2974 | ptr_sampler.pointer = true; |
2975 | ptr_sampler.parent_type = type_id; |
2976 | |
2977 | set<SPIRVariable>(var_id, ptr_type_id, StorageClassUniformConstant, 0); |
2978 | set_name(var_id, "SPIRV_Cross_DummySampler" ); |
2979 | dummy_sampler_id = var_id; |
2980 | return var_id; |
2981 | } |
2982 | else |
2983 | return 0; |
2984 | } |
2985 | |
2986 | void Compiler::build_combined_image_samplers() |
2987 | { |
2988 | ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) { |
2989 | func.combined_parameters.clear(); |
2990 | func.shadow_arguments.clear(); |
2991 | func.do_combined_parameters = true; |
2992 | }); |
2993 | |
2994 | combined_image_samplers.clear(); |
2995 | CombinedImageSamplerHandler handler(*this); |
2996 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
2997 | } |
2998 | |
2999 | SmallVector<SpecializationConstant> Compiler::get_specialization_constants() const |
3000 | { |
3001 | SmallVector<SpecializationConstant> spec_consts; |
3002 | ir.for_each_typed_id<SPIRConstant>([&](uint32_t, const SPIRConstant &c) { |
3003 | if (c.specialization && has_decoration(c.self, DecorationSpecId)) |
3004 | spec_consts.push_back({ c.self, get_decoration(c.self, DecorationSpecId) }); |
3005 | }); |
3006 | return spec_consts; |
3007 | } |
3008 | |
3009 | SPIRConstant &Compiler::get_constant(ConstantID id) |
3010 | { |
3011 | return get<SPIRConstant>(id); |
3012 | } |
3013 | |
3014 | const SPIRConstant &Compiler::get_constant(ConstantID id) const |
3015 | { |
3016 | return get<SPIRConstant>(id); |
3017 | } |
3018 | |
3019 | static bool exists_unaccessed_path_to_return(const CFG &cfg, uint32_t block, const unordered_set<uint32_t> &blocks, |
3020 | unordered_set<uint32_t> &visit_cache) |
3021 | { |
3022 | // This block accesses the variable. |
3023 | if (blocks.find(block) != end(blocks)) |
3024 | return false; |
3025 | |
3026 | // We are at the end of the CFG. |
3027 | if (cfg.get_succeeding_edges(block).empty()) |
3028 | return true; |
3029 | |
3030 | // If any of our successors have a path to the end, there exists a path from block. |
3031 | for (auto &succ : cfg.get_succeeding_edges(block)) |
3032 | { |
3033 | if (visit_cache.count(succ) == 0) |
3034 | { |
3035 | if (exists_unaccessed_path_to_return(cfg, succ, blocks, visit_cache)) |
3036 | return true; |
3037 | visit_cache.insert(succ); |
3038 | } |
3039 | } |
3040 | |
3041 | return false; |
3042 | } |
3043 | |
3044 | void Compiler::analyze_parameter_preservation( |
3045 | SPIRFunction &entry, const CFG &cfg, const unordered_map<uint32_t, unordered_set<uint32_t>> &variable_to_blocks, |
3046 | const unordered_map<uint32_t, unordered_set<uint32_t>> &complete_write_blocks) |
3047 | { |
3048 | for (auto &arg : entry.arguments) |
3049 | { |
3050 | // Non-pointers are always inputs. |
3051 | auto &type = get<SPIRType>(arg.type); |
3052 | if (!type.pointer) |
3053 | continue; |
3054 | |
3055 | // Opaque argument types are always in |
3056 | bool potential_preserve; |
3057 | switch (type.basetype) |
3058 | { |
3059 | case SPIRType::Sampler: |
3060 | case SPIRType::Image: |
3061 | case SPIRType::SampledImage: |
3062 | case SPIRType::AtomicCounter: |
3063 | potential_preserve = false; |
3064 | break; |
3065 | |
3066 | default: |
3067 | potential_preserve = true; |
3068 | break; |
3069 | } |
3070 | |
3071 | if (!potential_preserve) |
3072 | continue; |
3073 | |
3074 | auto itr = variable_to_blocks.find(arg.id); |
3075 | if (itr == end(variable_to_blocks)) |
3076 | { |
3077 | // Variable is never accessed. |
3078 | continue; |
3079 | } |
3080 | |
3081 | // We have accessed a variable, but there was no complete writes to that variable. |
3082 | // We deduce that we must preserve the argument. |
3083 | itr = complete_write_blocks.find(arg.id); |
3084 | if (itr == end(complete_write_blocks)) |
3085 | { |
3086 | arg.read_count++; |
3087 | continue; |
3088 | } |
3089 | |
3090 | // If there is a path through the CFG where no block completely writes to the variable, the variable will be in an undefined state |
3091 | // when the function returns. We therefore need to implicitly preserve the variable in case there are writers in the function. |
3092 | // Major case here is if a function is |
3093 | // void foo(int &var) { if (cond) var = 10; } |
3094 | // Using read/write counts, we will think it's just an out variable, but it really needs to be inout, |
3095 | // because if we don't write anything whatever we put into the function must return back to the caller. |
3096 | unordered_set<uint32_t> visit_cache; |
3097 | if (exists_unaccessed_path_to_return(cfg, entry.entry_block, itr->second, visit_cache)) |
3098 | arg.read_count++; |
3099 | } |
3100 | } |
3101 | |
3102 | Compiler::AnalyzeVariableScopeAccessHandler::AnalyzeVariableScopeAccessHandler(Compiler &compiler_, |
3103 | SPIRFunction &entry_) |
3104 | : compiler(compiler_) |
3105 | , entry(entry_) |
3106 | { |
3107 | } |
3108 | |
3109 | bool Compiler::AnalyzeVariableScopeAccessHandler::follow_function_call(const SPIRFunction &) |
3110 | { |
3111 | // Only analyze within this function. |
3112 | return false; |
3113 | } |
3114 | |
3115 | void Compiler::AnalyzeVariableScopeAccessHandler::set_current_block(const SPIRBlock &block) |
3116 | { |
3117 | current_block = █ |
3118 | |
3119 | // If we're branching to a block which uses OpPhi, in GLSL |
3120 | // this will be a variable write when we branch, |
3121 | // so we need to track access to these variables as well to |
3122 | // have a complete picture. |
3123 | const auto test_phi = [this, &block](uint32_t to) { |
3124 | auto &next = compiler.get<SPIRBlock>(to); |
3125 | for (auto &phi : next.phi_variables) |
3126 | { |
3127 | if (phi.parent == block.self) |
3128 | { |
3129 | accessed_variables_to_block[phi.function_variable].insert(block.self); |
3130 | // Phi variables are also accessed in our target branch block. |
3131 | accessed_variables_to_block[phi.function_variable].insert(next.self); |
3132 | |
3133 | notify_variable_access(phi.local_variable, block.self); |
3134 | } |
3135 | } |
3136 | }; |
3137 | |
3138 | switch (block.terminator) |
3139 | { |
3140 | case SPIRBlock::Direct: |
3141 | notify_variable_access(block.condition, block.self); |
3142 | test_phi(block.next_block); |
3143 | break; |
3144 | |
3145 | case SPIRBlock::Select: |
3146 | notify_variable_access(block.condition, block.self); |
3147 | test_phi(block.true_block); |
3148 | test_phi(block.false_block); |
3149 | break; |
3150 | |
3151 | case SPIRBlock::MultiSelect: |
3152 | { |
3153 | notify_variable_access(block.condition, block.self); |
3154 | auto &cases = compiler.get_case_list(block); |
3155 | for (auto &target : cases) |
3156 | test_phi(target.block); |
3157 | if (block.default_block) |
3158 | test_phi(block.default_block); |
3159 | break; |
3160 | } |
3161 | |
3162 | default: |
3163 | break; |
3164 | } |
3165 | } |
3166 | |
3167 | void Compiler::AnalyzeVariableScopeAccessHandler::notify_variable_access(uint32_t id, uint32_t block) |
3168 | { |
3169 | if (id == 0) |
3170 | return; |
3171 | |
3172 | // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. |
3173 | auto itr = access_chain_children.find(id); |
3174 | if (itr != end(access_chain_children)) |
3175 | for (auto child_id : itr->second) |
3176 | notify_variable_access(child_id, block); |
3177 | |
3178 | if (id_is_phi_variable(id)) |
3179 | accessed_variables_to_block[id].insert(block); |
3180 | else if (id_is_potential_temporary(id)) |
3181 | accessed_temporaries_to_block[id].insert(block); |
3182 | } |
3183 | |
3184 | bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_phi_variable(uint32_t id) const |
3185 | { |
3186 | if (id >= compiler.get_current_id_bound()) |
3187 | return false; |
3188 | auto *var = compiler.maybe_get<SPIRVariable>(id); |
3189 | return var && var->phi_variable; |
3190 | } |
3191 | |
3192 | bool Compiler::AnalyzeVariableScopeAccessHandler::id_is_potential_temporary(uint32_t id) const |
3193 | { |
3194 | if (id >= compiler.get_current_id_bound()) |
3195 | return false; |
3196 | |
3197 | // Temporaries are not created before we start emitting code. |
3198 | return compiler.ir.ids[id].empty() || (compiler.ir.ids[id].get_type() == TypeExpression); |
3199 | } |
3200 | |
3201 | bool Compiler::AnalyzeVariableScopeAccessHandler::handle_terminator(const SPIRBlock &block) |
3202 | { |
3203 | switch (block.terminator) |
3204 | { |
3205 | case SPIRBlock::Return: |
3206 | if (block.return_value) |
3207 | notify_variable_access(block.return_value, block.self); |
3208 | break; |
3209 | |
3210 | case SPIRBlock::Select: |
3211 | case SPIRBlock::MultiSelect: |
3212 | notify_variable_access(block.condition, block.self); |
3213 | break; |
3214 | |
3215 | default: |
3216 | break; |
3217 | } |
3218 | |
3219 | return true; |
3220 | } |
3221 | |
3222 | bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) |
3223 | { |
3224 | // Keep track of the types of temporaries, so we can hoist them out as necessary. |
3225 | uint32_t result_type, result_id; |
3226 | if (compiler.instruction_to_result_type(result_type, result_id, op, args, length)) |
3227 | result_id_to_type[result_id] = result_type; |
3228 | |
3229 | switch (op) |
3230 | { |
3231 | case OpStore: |
3232 | { |
3233 | if (length < 2) |
3234 | return false; |
3235 | |
3236 | ID ptr = args[0]; |
3237 | auto *var = compiler.maybe_get_backing_variable(ptr); |
3238 | |
3239 | // If we store through an access chain, we have a partial write. |
3240 | if (var) |
3241 | { |
3242 | accessed_variables_to_block[var->self].insert(current_block->self); |
3243 | if (var->self == ptr) |
3244 | complete_write_variables_to_block[var->self].insert(current_block->self); |
3245 | else |
3246 | partial_write_variables_to_block[var->self].insert(current_block->self); |
3247 | } |
3248 | |
3249 | // args[0] might be an access chain we have to track use of. |
3250 | notify_variable_access(args[0], current_block->self); |
3251 | // Might try to store a Phi variable here. |
3252 | notify_variable_access(args[1], current_block->self); |
3253 | break; |
3254 | } |
3255 | |
3256 | case OpAccessChain: |
3257 | case OpInBoundsAccessChain: |
3258 | case OpPtrAccessChain: |
3259 | { |
3260 | if (length < 3) |
3261 | return false; |
3262 | |
3263 | // Access chains used in multiple blocks mean hoisting all the variables used to construct the access chain as not all backends can use pointers. |
3264 | uint32_t ptr = args[2]; |
3265 | auto *var = compiler.maybe_get<SPIRVariable>(ptr); |
3266 | if (var) |
3267 | { |
3268 | accessed_variables_to_block[var->self].insert(current_block->self); |
3269 | access_chain_children[args[1]].insert(var->self); |
3270 | } |
3271 | |
3272 | // args[2] might be another access chain we have to track use of. |
3273 | for (uint32_t i = 2; i < length; i++) |
3274 | { |
3275 | notify_variable_access(args[i], current_block->self); |
3276 | access_chain_children[args[1]].insert(args[i]); |
3277 | } |
3278 | |
3279 | // Also keep track of the access chain pointer itself. |
3280 | // In exceptionally rare cases, we can end up with a case where |
3281 | // the access chain is generated in the loop body, but is consumed in continue block. |
3282 | // This means we need complex loop workarounds, and we must detect this via CFG analysis. |
3283 | notify_variable_access(args[1], current_block->self); |
3284 | |
3285 | // The result of an access chain is a fixed expression and is not really considered a temporary. |
3286 | auto &e = compiler.set<SPIRExpression>(args[1], "" , args[0], true); |
3287 | auto *backing_variable = compiler.maybe_get_backing_variable(ptr); |
3288 | e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0); |
3289 | |
3290 | // Other backends might use SPIRAccessChain for this later. |
3291 | compiler.ir.ids[args[1]].set_allow_type_rewrite(); |
3292 | access_chain_expressions.insert(args[1]); |
3293 | break; |
3294 | } |
3295 | |
3296 | case OpCopyMemory: |
3297 | { |
3298 | if (length < 2) |
3299 | return false; |
3300 | |
3301 | ID lhs = args[0]; |
3302 | ID rhs = args[1]; |
3303 | auto *var = compiler.maybe_get_backing_variable(lhs); |
3304 | |
3305 | // If we store through an access chain, we have a partial write. |
3306 | if (var) |
3307 | { |
3308 | accessed_variables_to_block[var->self].insert(current_block->self); |
3309 | if (var->self == lhs) |
3310 | complete_write_variables_to_block[var->self].insert(current_block->self); |
3311 | else |
3312 | partial_write_variables_to_block[var->self].insert(current_block->self); |
3313 | } |
3314 | |
3315 | // args[0:1] might be access chains we have to track use of. |
3316 | for (uint32_t i = 0; i < 2; i++) |
3317 | notify_variable_access(args[i], current_block->self); |
3318 | |
3319 | var = compiler.maybe_get_backing_variable(rhs); |
3320 | if (var) |
3321 | accessed_variables_to_block[var->self].insert(current_block->self); |
3322 | break; |
3323 | } |
3324 | |
3325 | case OpCopyObject: |
3326 | { |
3327 | if (length < 3) |
3328 | return false; |
3329 | |
3330 | auto *var = compiler.maybe_get_backing_variable(args[2]); |
3331 | if (var) |
3332 | accessed_variables_to_block[var->self].insert(current_block->self); |
3333 | |
3334 | // Might be an access chain which we have to keep track of. |
3335 | notify_variable_access(args[1], current_block->self); |
3336 | if (access_chain_expressions.count(args[2])) |
3337 | access_chain_expressions.insert(args[1]); |
3338 | |
3339 | // Might try to copy a Phi variable here. |
3340 | notify_variable_access(args[2], current_block->self); |
3341 | break; |
3342 | } |
3343 | |
3344 | case OpLoad: |
3345 | { |
3346 | if (length < 3) |
3347 | return false; |
3348 | uint32_t ptr = args[2]; |
3349 | auto *var = compiler.maybe_get_backing_variable(ptr); |
3350 | if (var) |
3351 | accessed_variables_to_block[var->self].insert(current_block->self); |
3352 | |
3353 | // Loaded value is a temporary. |
3354 | notify_variable_access(args[1], current_block->self); |
3355 | |
3356 | // Might be an access chain we have to track use of. |
3357 | notify_variable_access(args[2], current_block->self); |
3358 | break; |
3359 | } |
3360 | |
3361 | case OpFunctionCall: |
3362 | { |
3363 | if (length < 3) |
3364 | return false; |
3365 | |
3366 | // Return value may be a temporary. |
3367 | if (compiler.get_type(args[0]).basetype != SPIRType::Void) |
3368 | notify_variable_access(args[1], current_block->self); |
3369 | |
3370 | length -= 3; |
3371 | args += 3; |
3372 | |
3373 | for (uint32_t i = 0; i < length; i++) |
3374 | { |
3375 | auto *var = compiler.maybe_get_backing_variable(args[i]); |
3376 | if (var) |
3377 | { |
3378 | accessed_variables_to_block[var->self].insert(current_block->self); |
3379 | // Assume we can get partial writes to this variable. |
3380 | partial_write_variables_to_block[var->self].insert(current_block->self); |
3381 | } |
3382 | |
3383 | // Cannot easily prove if argument we pass to a function is completely written. |
3384 | // Usually, functions write to a dummy variable, |
3385 | // which is then copied to in full to the real argument. |
3386 | |
3387 | // Might try to copy a Phi variable here. |
3388 | notify_variable_access(args[i], current_block->self); |
3389 | } |
3390 | break; |
3391 | } |
3392 | |
3393 | case OpSelect: |
3394 | { |
3395 | // In case of variable pointers, we might access a variable here. |
3396 | // We cannot prove anything about these accesses however. |
3397 | for (uint32_t i = 1; i < length; i++) |
3398 | { |
3399 | if (i >= 3) |
3400 | { |
3401 | auto *var = compiler.maybe_get_backing_variable(args[i]); |
3402 | if (var) |
3403 | { |
3404 | accessed_variables_to_block[var->self].insert(current_block->self); |
3405 | // Assume we can get partial writes to this variable. |
3406 | partial_write_variables_to_block[var->self].insert(current_block->self); |
3407 | } |
3408 | } |
3409 | |
3410 | // Might try to copy a Phi variable here. |
3411 | notify_variable_access(args[i], current_block->self); |
3412 | } |
3413 | break; |
3414 | } |
3415 | |
3416 | case OpExtInst: |
3417 | { |
3418 | for (uint32_t i = 4; i < length; i++) |
3419 | notify_variable_access(args[i], current_block->self); |
3420 | notify_variable_access(args[1], current_block->self); |
3421 | |
3422 | uint32_t extension_set = args[2]; |
3423 | if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL) |
3424 | { |
3425 | auto op_450 = static_cast<GLSLstd450>(args[3]); |
3426 | switch (op_450) |
3427 | { |
3428 | case GLSLstd450Modf: |
3429 | case GLSLstd450Frexp: |
3430 | { |
3431 | uint32_t ptr = args[5]; |
3432 | auto *var = compiler.maybe_get_backing_variable(ptr); |
3433 | if (var) |
3434 | { |
3435 | accessed_variables_to_block[var->self].insert(current_block->self); |
3436 | if (var->self == ptr) |
3437 | complete_write_variables_to_block[var->self].insert(current_block->self); |
3438 | else |
3439 | partial_write_variables_to_block[var->self].insert(current_block->self); |
3440 | } |
3441 | break; |
3442 | } |
3443 | |
3444 | default: |
3445 | break; |
3446 | } |
3447 | } |
3448 | break; |
3449 | } |
3450 | |
3451 | case OpArrayLength: |
3452 | // Only result is a temporary. |
3453 | notify_variable_access(args[1], current_block->self); |
3454 | break; |
3455 | |
3456 | case OpLine: |
3457 | case OpNoLine: |
3458 | // Uses literals, but cannot be a phi variable or temporary, so ignore. |
3459 | break; |
3460 | |
3461 | // Atomics shouldn't be able to access function-local variables. |
3462 | // Some GLSL builtins access a pointer. |
3463 | |
3464 | case OpCompositeInsert: |
3465 | case OpVectorShuffle: |
3466 | // Specialize for opcode which contains literals. |
3467 | for (uint32_t i = 1; i < 4; i++) |
3468 | notify_variable_access(args[i], current_block->self); |
3469 | break; |
3470 | |
3471 | case OpCompositeExtract: |
3472 | // Specialize for opcode which contains literals. |
3473 | for (uint32_t i = 1; i < 3; i++) |
3474 | notify_variable_access(args[i], current_block->self); |
3475 | break; |
3476 | |
3477 | case OpImageWrite: |
3478 | for (uint32_t i = 0; i < length; i++) |
3479 | { |
3480 | // Argument 3 is a literal. |
3481 | if (i != 3) |
3482 | notify_variable_access(args[i], current_block->self); |
3483 | } |
3484 | break; |
3485 | |
3486 | case OpImageSampleImplicitLod: |
3487 | case OpImageSampleExplicitLod: |
3488 | case OpImageSparseSampleImplicitLod: |
3489 | case OpImageSparseSampleExplicitLod: |
3490 | case OpImageSampleProjImplicitLod: |
3491 | case OpImageSampleProjExplicitLod: |
3492 | case OpImageSparseSampleProjImplicitLod: |
3493 | case OpImageSparseSampleProjExplicitLod: |
3494 | case OpImageFetch: |
3495 | case OpImageSparseFetch: |
3496 | case OpImageRead: |
3497 | case OpImageSparseRead: |
3498 | for (uint32_t i = 1; i < length; i++) |
3499 | { |
3500 | // Argument 4 is a literal. |
3501 | if (i != 4) |
3502 | notify_variable_access(args[i], current_block->self); |
3503 | } |
3504 | break; |
3505 | |
3506 | case OpImageSampleDrefImplicitLod: |
3507 | case OpImageSampleDrefExplicitLod: |
3508 | case OpImageSparseSampleDrefImplicitLod: |
3509 | case OpImageSparseSampleDrefExplicitLod: |
3510 | case OpImageSampleProjDrefImplicitLod: |
3511 | case OpImageSampleProjDrefExplicitLod: |
3512 | case OpImageSparseSampleProjDrefImplicitLod: |
3513 | case OpImageSparseSampleProjDrefExplicitLod: |
3514 | case OpImageGather: |
3515 | case OpImageSparseGather: |
3516 | case OpImageDrefGather: |
3517 | case OpImageSparseDrefGather: |
3518 | for (uint32_t i = 1; i < length; i++) |
3519 | { |
3520 | // Argument 5 is a literal. |
3521 | if (i != 5) |
3522 | notify_variable_access(args[i], current_block->self); |
3523 | } |
3524 | break; |
3525 | |
3526 | default: |
3527 | { |
3528 | // Rather dirty way of figuring out where Phi variables are used. |
3529 | // As long as only IDs are used, we can scan through instructions and try to find any evidence that |
3530 | // the ID of a variable has been used. |
3531 | // There are potential false positives here where a literal is used in-place of an ID, |
3532 | // but worst case, it does not affect the correctness of the compile. |
3533 | // Exhaustive analysis would be better here, but it's not worth it for now. |
3534 | for (uint32_t i = 0; i < length; i++) |
3535 | notify_variable_access(args[i], current_block->self); |
3536 | break; |
3537 | } |
3538 | } |
3539 | return true; |
3540 | } |
3541 | |
3542 | Compiler::StaticExpressionAccessHandler::StaticExpressionAccessHandler(Compiler &compiler_, uint32_t variable_id_) |
3543 | : compiler(compiler_) |
3544 | , variable_id(variable_id_) |
3545 | { |
3546 | } |
3547 | |
3548 | bool Compiler::StaticExpressionAccessHandler::follow_function_call(const SPIRFunction &) |
3549 | { |
3550 | return false; |
3551 | } |
3552 | |
3553 | bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t *args, uint32_t length) |
3554 | { |
3555 | switch (op) |
3556 | { |
3557 | case OpStore: |
3558 | if (length < 2) |
3559 | return false; |
3560 | if (args[0] == variable_id) |
3561 | { |
3562 | static_expression = args[1]; |
3563 | write_count++; |
3564 | } |
3565 | break; |
3566 | |
3567 | case OpLoad: |
3568 | if (length < 3) |
3569 | return false; |
3570 | if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. |
3571 | return false; |
3572 | break; |
3573 | |
3574 | case OpAccessChain: |
3575 | case OpInBoundsAccessChain: |
3576 | case OpPtrAccessChain: |
3577 | if (length < 3) |
3578 | return false; |
3579 | if (args[2] == variable_id) // If we try to access chain our candidate variable before we store to it, bail. |
3580 | return false; |
3581 | break; |
3582 | |
3583 | default: |
3584 | break; |
3585 | } |
3586 | |
3587 | return true; |
3588 | } |
3589 | |
3590 | void Compiler::find_function_local_luts(SPIRFunction &entry, const AnalyzeVariableScopeAccessHandler &handler, |
3591 | bool single_function) |
3592 | { |
3593 | auto &cfg = *function_cfgs.find(entry.self)->second; |
3594 | |
3595 | // For each variable which is statically accessed. |
3596 | for (auto &accessed_var : handler.accessed_variables_to_block) |
3597 | { |
3598 | auto &blocks = accessed_var.second; |
3599 | auto &var = get<SPIRVariable>(accessed_var.first); |
3600 | auto &type = expression_type(accessed_var.first); |
3601 | |
3602 | // Only consider function local variables here. |
3603 | // If we only have a single function in our CFG, private storage is also fine, |
3604 | // since it behaves like a function local variable. |
3605 | bool allow_lut = var.storage == StorageClassFunction || (single_function && var.storage == StorageClassPrivate); |
3606 | if (!allow_lut) |
3607 | continue; |
3608 | |
3609 | // We cannot be a phi variable. |
3610 | if (var.phi_variable) |
3611 | continue; |
3612 | |
3613 | // Only consider arrays here. |
3614 | if (type.array.empty()) |
3615 | continue; |
3616 | |
3617 | // If the variable has an initializer, make sure it is a constant expression. |
3618 | uint32_t static_constant_expression = 0; |
3619 | if (var.initializer) |
3620 | { |
3621 | if (ir.ids[var.initializer].get_type() != TypeConstant) |
3622 | continue; |
3623 | static_constant_expression = var.initializer; |
3624 | |
3625 | // There can be no stores to this variable, we have now proved we have a LUT. |
3626 | if (handler.complete_write_variables_to_block.count(var.self) != 0 || |
3627 | handler.partial_write_variables_to_block.count(var.self) != 0) |
3628 | continue; |
3629 | } |
3630 | else |
3631 | { |
3632 | // We can have one, and only one write to the variable, and that write needs to be a constant. |
3633 | |
3634 | // No partial writes allowed. |
3635 | if (handler.partial_write_variables_to_block.count(var.self) != 0) |
3636 | continue; |
3637 | |
3638 | auto itr = handler.complete_write_variables_to_block.find(var.self); |
3639 | |
3640 | // No writes? |
3641 | if (itr == end(handler.complete_write_variables_to_block)) |
3642 | continue; |
3643 | |
3644 | // We write to the variable in more than one block. |
3645 | auto &write_blocks = itr->second; |
3646 | if (write_blocks.size() != 1) |
3647 | continue; |
3648 | |
3649 | // The write needs to happen in the dominating block. |
3650 | DominatorBuilder builder(cfg); |
3651 | for (auto &block : blocks) |
3652 | builder.add_block(block); |
3653 | uint32_t dominator = builder.get_dominator(); |
3654 | |
3655 | // The complete write happened in a branch or similar, cannot deduce static expression. |
3656 | if (write_blocks.count(dominator) == 0) |
3657 | continue; |
3658 | |
3659 | // Find the static expression for this variable. |
3660 | StaticExpressionAccessHandler static_expression_handler(*this, var.self); |
3661 | traverse_all_reachable_opcodes(get<SPIRBlock>(dominator), static_expression_handler); |
3662 | |
3663 | // We want one, and exactly one write |
3664 | if (static_expression_handler.write_count != 1 || static_expression_handler.static_expression == 0) |
3665 | continue; |
3666 | |
3667 | // Is it a constant expression? |
3668 | if (ir.ids[static_expression_handler.static_expression].get_type() != TypeConstant) |
3669 | continue; |
3670 | |
3671 | // We found a LUT! |
3672 | static_constant_expression = static_expression_handler.static_expression; |
3673 | } |
3674 | |
3675 | get<SPIRConstant>(static_constant_expression).is_used_as_lut = true; |
3676 | var.static_expression = static_constant_expression; |
3677 | var.statically_assigned = true; |
3678 | var.remapped_variable = true; |
3679 | } |
3680 | } |
3681 | |
3682 | void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeAccessHandler &handler) |
3683 | { |
3684 | // First, we map out all variable access within a function. |
3685 | // Essentially a map of block -> { variables accessed in the basic block } |
3686 | traverse_all_reachable_opcodes(entry, handler); |
3687 | |
3688 | auto &cfg = *function_cfgs.find(entry.self)->second; |
3689 | |
3690 | // Analyze if there are parameters which need to be implicitly preserved with an "in" qualifier. |
3691 | analyze_parameter_preservation(entry, cfg, handler.accessed_variables_to_block, |
3692 | handler.complete_write_variables_to_block); |
3693 | |
3694 | unordered_map<uint32_t, uint32_t> potential_loop_variables; |
3695 | |
3696 | // Find the loop dominator block for each block. |
3697 | for (auto &block_id : entry.blocks) |
3698 | { |
3699 | auto &block = get<SPIRBlock>(block_id); |
3700 | |
3701 | auto itr = ir.continue_block_to_loop_header.find(block_id); |
3702 | if (itr != end(ir.continue_block_to_loop_header) && itr->second != block_id) |
3703 | { |
3704 | // Continue block might be unreachable in the CFG, but we still like to know the loop dominator. |
3705 | // Edge case is when continue block is also the loop header, don't set the dominator in this case. |
3706 | block.loop_dominator = itr->second; |
3707 | } |
3708 | else |
3709 | { |
3710 | uint32_t loop_dominator = cfg.find_loop_dominator(block_id); |
3711 | if (loop_dominator != block_id) |
3712 | block.loop_dominator = loop_dominator; |
3713 | else |
3714 | block.loop_dominator = SPIRBlock::NoDominator; |
3715 | } |
3716 | } |
3717 | |
3718 | // For each variable which is statically accessed. |
3719 | for (auto &var : handler.accessed_variables_to_block) |
3720 | { |
3721 | // Only deal with variables which are considered local variables in this function. |
3722 | if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) == |
3723 | end(entry.local_variables)) |
3724 | continue; |
3725 | |
3726 | DominatorBuilder builder(cfg); |
3727 | auto &blocks = var.second; |
3728 | auto &type = expression_type(var.first); |
3729 | |
3730 | // Figure out which block is dominating all accesses of those variables. |
3731 | for (auto &block : blocks) |
3732 | { |
3733 | // If we're accessing a variable inside a continue block, this variable might be a loop variable. |
3734 | // We can only use loop variables with scalars, as we cannot track static expressions for vectors. |
3735 | if (is_continue(block)) |
3736 | { |
3737 | // Potentially awkward case to check for. |
3738 | // We might have a variable inside a loop, which is touched by the continue block, |
3739 | // but is not actually a loop variable. |
3740 | // The continue block is dominated by the inner part of the loop, which does not make sense in high-level |
3741 | // language output because it will be declared before the body, |
3742 | // so we will have to lift the dominator up to the relevant loop header instead. |
3743 | builder.add_block(ir.continue_block_to_loop_header[block]); |
3744 | |
3745 | // Arrays or structs cannot be loop variables. |
3746 | if (type.vecsize == 1 && type.columns == 1 && type.basetype != SPIRType::Struct && type.array.empty()) |
3747 | { |
3748 | // The variable is used in multiple continue blocks, this is not a loop |
3749 | // candidate, signal that by setting block to -1u. |
3750 | auto &potential = potential_loop_variables[var.first]; |
3751 | |
3752 | if (potential == 0) |
3753 | potential = block; |
3754 | else |
3755 | potential = ~(0u); |
3756 | } |
3757 | } |
3758 | builder.add_block(block); |
3759 | } |
3760 | |
3761 | builder.lift_continue_block_dominator(); |
3762 | |
3763 | // Add it to a per-block list of variables. |
3764 | BlockID dominating_block = builder.get_dominator(); |
3765 | |
3766 | // For variables whose dominating block is inside a loop, there is a risk that these variables |
3767 | // actually need to be preserved across loop iterations. We can express this by adding |
3768 | // a "read" access to the loop header. |
3769 | // In the dominating block, we must see an OpStore or equivalent as the first access of an OpVariable. |
3770 | // Should that fail, we look for the outermost loop header and tack on an access there. |
3771 | // Phi nodes cannot have this problem. |
3772 | if (dominating_block) |
3773 | { |
3774 | auto &variable = get<SPIRVariable>(var.first); |
3775 | if (!variable.phi_variable) |
3776 | { |
3777 | auto *block = &get<SPIRBlock>(dominating_block); |
3778 | bool preserve = may_read_undefined_variable_in_block(*block, var.first); |
3779 | if (preserve) |
3780 | { |
3781 | // Find the outermost loop scope. |
3782 | while (block->loop_dominator != BlockID(SPIRBlock::NoDominator)) |
3783 | block = &get<SPIRBlock>(block->loop_dominator); |
3784 | |
3785 | if (block->self != dominating_block) |
3786 | { |
3787 | builder.add_block(block->self); |
3788 | dominating_block = builder.get_dominator(); |
3789 | } |
3790 | } |
3791 | } |
3792 | } |
3793 | |
3794 | // If all blocks here are dead code, this will be 0, so the variable in question |
3795 | // will be completely eliminated. |
3796 | if (dominating_block) |
3797 | { |
3798 | auto &block = get<SPIRBlock>(dominating_block); |
3799 | block.dominated_variables.push_back(var.first); |
3800 | get<SPIRVariable>(var.first).dominator = dominating_block; |
3801 | } |
3802 | } |
3803 | |
3804 | for (auto &var : handler.accessed_temporaries_to_block) |
3805 | { |
3806 | auto itr = handler.result_id_to_type.find(var.first); |
3807 | |
3808 | if (itr == end(handler.result_id_to_type)) |
3809 | { |
3810 | // We found a false positive ID being used, ignore. |
3811 | // This should probably be an assert. |
3812 | continue; |
3813 | } |
3814 | |
3815 | // There is no point in doing domination analysis for opaque types. |
3816 | auto &type = get<SPIRType>(itr->second); |
3817 | if (type_is_opaque_value(type)) |
3818 | continue; |
3819 | |
3820 | DominatorBuilder builder(cfg); |
3821 | bool force_temporary = false; |
3822 | bool = false; |
3823 | |
3824 | // Figure out which block is dominating all accesses of those temporaries. |
3825 | auto &blocks = var.second; |
3826 | for (auto &block : blocks) |
3827 | { |
3828 | builder.add_block(block); |
3829 | |
3830 | if (blocks.size() != 1 && is_continue(block)) |
3831 | { |
3832 | // The risk here is that inner loop can dominate the continue block. |
3833 | // Any temporary we access in the continue block must be declared before the loop. |
3834 | // This is moot for complex loops however. |
3835 | auto & = get<SPIRBlock>(ir.continue_block_to_loop_header[block]); |
3836 | assert(loop_header_block.merge == SPIRBlock::MergeLoop); |
3837 | builder.add_block(loop_header_block.self); |
3838 | used_in_header_hoisted_continue_block = true; |
3839 | } |
3840 | } |
3841 | |
3842 | uint32_t dominating_block = builder.get_dominator(); |
3843 | |
3844 | if (blocks.size() != 1 && is_single_block_loop(dominating_block)) |
3845 | { |
3846 | // Awkward case, because the loop header is also the continue block, |
3847 | // so hoisting to loop header does not help. |
3848 | force_temporary = true; |
3849 | } |
3850 | |
3851 | if (dominating_block) |
3852 | { |
3853 | // If we touch a variable in the dominating block, this is the expected setup. |
3854 | // SPIR-V normally mandates this, but we have extra cases for temporary use inside loops. |
3855 | bool first_use_is_dominator = blocks.count(dominating_block) != 0; |
3856 | |
3857 | if (!first_use_is_dominator || force_temporary) |
3858 | { |
3859 | if (handler.access_chain_expressions.count(var.first)) |
3860 | { |
3861 | // Exceptionally rare case. |
3862 | // We cannot declare temporaries of access chains (except on MSL perhaps with pointers). |
3863 | // Rather than do that, we force the indexing expressions to be declared in the right scope by |
3864 | // tracking their usage to that end. There is no temporary to hoist. |
3865 | // However, we still need to observe declaration order of the access chain. |
3866 | |
3867 | if (used_in_header_hoisted_continue_block) |
3868 | { |
3869 | // For this scenario, we used an access chain inside a continue block where we also registered an access to header block. |
3870 | // This is a problem as we need to declare an access chain properly first with full definition. |
3871 | // We cannot use temporaries for these expressions, |
3872 | // so we must make sure the access chain is declared ahead of time. |
3873 | // Force a complex for loop to deal with this. |
3874 | // TODO: Out-of-order declaring for loops where continue blocks are emitted last might be another option. |
3875 | auto & = get<SPIRBlock>(dominating_block); |
3876 | assert(loop_header_block.merge == SPIRBlock::MergeLoop); |
3877 | loop_header_block.complex_continue = true; |
3878 | } |
3879 | } |
3880 | else |
3881 | { |
3882 | // This should be very rare, but if we try to declare a temporary inside a loop, |
3883 | // and that temporary is used outside the loop as well (spirv-opt inliner likes this) |
3884 | // we should actually emit the temporary outside the loop. |
3885 | hoisted_temporaries.insert(var.first); |
3886 | forced_temporaries.insert(var.first); |
3887 | |
3888 | auto &block_temporaries = get<SPIRBlock>(dominating_block).declare_temporary; |
3889 | block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); |
3890 | } |
3891 | } |
3892 | else if (blocks.size() > 1) |
3893 | { |
3894 | // Keep track of the temporary as we might have to declare this temporary. |
3895 | // This can happen if the loop header dominates a temporary, but we have a complex fallback loop. |
3896 | // In this case, the header is actually inside the for (;;) {} block, and we have problems. |
3897 | // What we need to do is hoist the temporaries outside the for (;;) {} block in case the header block |
3898 | // declares the temporary. |
3899 | auto &block_temporaries = get<SPIRBlock>(dominating_block).potential_declare_temporary; |
3900 | block_temporaries.emplace_back(handler.result_id_to_type[var.first], var.first); |
3901 | } |
3902 | } |
3903 | } |
3904 | |
3905 | unordered_set<uint32_t> seen_blocks; |
3906 | |
3907 | // Now, try to analyze whether or not these variables are actually loop variables. |
3908 | for (auto &loop_variable : potential_loop_variables) |
3909 | { |
3910 | auto &var = get<SPIRVariable>(loop_variable.first); |
3911 | auto dominator = var.dominator; |
3912 | BlockID block = loop_variable.second; |
3913 | |
3914 | // The variable was accessed in multiple continue blocks, ignore. |
3915 | if (block == BlockID(~(0u)) || block == BlockID(0)) |
3916 | continue; |
3917 | |
3918 | // Dead code. |
3919 | if (dominator == ID(0)) |
3920 | continue; |
3921 | |
3922 | BlockID = 0; |
3923 | |
3924 | // Find the loop header for this block if we are a continue block. |
3925 | { |
3926 | auto itr = ir.continue_block_to_loop_header.find(block); |
3927 | if (itr != end(ir.continue_block_to_loop_header)) |
3928 | { |
3929 | header = itr->second; |
3930 | } |
3931 | else if (get<SPIRBlock>(block).continue_block == block) |
3932 | { |
3933 | // Also check for self-referential continue block. |
3934 | header = block; |
3935 | } |
3936 | } |
3937 | |
3938 | assert(header); |
3939 | auto & = get<SPIRBlock>(header); |
3940 | auto &blocks = handler.accessed_variables_to_block[loop_variable.first]; |
3941 | |
3942 | // If a loop variable is not used before the loop, it's probably not a loop variable. |
3943 | bool has_accessed_variable = blocks.count(header) != 0; |
3944 | |
3945 | // Now, there are two conditions we need to meet for the variable to be a loop variable. |
3946 | // 1. The dominating block must have a branch-free path to the loop header, |
3947 | // this way we statically know which expression should be part of the loop variable initializer. |
3948 | |
3949 | // Walk from the dominator, if there is one straight edge connecting |
3950 | // dominator and loop header, we statically know the loop initializer. |
3951 | bool static_loop_init = true; |
3952 | while (dominator != header) |
3953 | { |
3954 | if (blocks.count(dominator) != 0) |
3955 | has_accessed_variable = true; |
3956 | |
3957 | auto &succ = cfg.get_succeeding_edges(dominator); |
3958 | if (succ.size() != 1) |
3959 | { |
3960 | static_loop_init = false; |
3961 | break; |
3962 | } |
3963 | |
3964 | auto &pred = cfg.get_preceding_edges(succ.front()); |
3965 | if (pred.size() != 1 || pred.front() != dominator) |
3966 | { |
3967 | static_loop_init = false; |
3968 | break; |
3969 | } |
3970 | |
3971 | dominator = succ.front(); |
3972 | } |
3973 | |
3974 | if (!static_loop_init || !has_accessed_variable) |
3975 | continue; |
3976 | |
3977 | // The second condition we need to meet is that no access after the loop |
3978 | // merge can occur. Walk the CFG to see if we find anything. |
3979 | |
3980 | seen_blocks.clear(); |
3981 | cfg.walk_from(seen_blocks, header_block.merge_block, [&](uint32_t walk_block) -> bool { |
3982 | // We found a block which accesses the variable outside the loop. |
3983 | if (blocks.find(walk_block) != end(blocks)) |
3984 | static_loop_init = false; |
3985 | return true; |
3986 | }); |
3987 | |
3988 | if (!static_loop_init) |
3989 | continue; |
3990 | |
3991 | // We have a loop variable. |
3992 | header_block.loop_variables.push_back(loop_variable.first); |
3993 | // Need to sort here as variables come from an unordered container, and pushing stuff in wrong order |
3994 | // will break reproducability in regression runs. |
3995 | sort(begin(header_block.loop_variables), end(header_block.loop_variables)); |
3996 | get<SPIRVariable>(loop_variable.first).loop_variable = true; |
3997 | } |
3998 | } |
3999 | |
4000 | bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var) |
4001 | { |
4002 | for (auto &op : block.ops) |
4003 | { |
4004 | auto *ops = stream(op); |
4005 | switch (op.op) |
4006 | { |
4007 | case OpStore: |
4008 | case OpCopyMemory: |
4009 | if (ops[0] == var) |
4010 | return false; |
4011 | break; |
4012 | |
4013 | case OpAccessChain: |
4014 | case OpInBoundsAccessChain: |
4015 | case OpPtrAccessChain: |
4016 | // Access chains are generally used to partially read and write. It's too hard to analyze |
4017 | // if all constituents are written fully before continuing, so just assume it's preserved. |
4018 | // This is the same as the parameter preservation analysis. |
4019 | if (ops[2] == var) |
4020 | return true; |
4021 | break; |
4022 | |
4023 | case OpSelect: |
4024 | // Variable pointers. |
4025 | // We might read before writing. |
4026 | if (ops[3] == var || ops[4] == var) |
4027 | return true; |
4028 | break; |
4029 | |
4030 | case OpPhi: |
4031 | { |
4032 | // Variable pointers. |
4033 | // We might read before writing. |
4034 | if (op.length < 2) |
4035 | break; |
4036 | |
4037 | uint32_t count = op.length - 2; |
4038 | for (uint32_t i = 0; i < count; i += 2) |
4039 | if (ops[i + 2] == var) |
4040 | return true; |
4041 | break; |
4042 | } |
4043 | |
4044 | case OpCopyObject: |
4045 | case OpLoad: |
4046 | if (ops[2] == var) |
4047 | return true; |
4048 | break; |
4049 | |
4050 | case OpFunctionCall: |
4051 | { |
4052 | if (op.length < 3) |
4053 | break; |
4054 | |
4055 | // May read before writing. |
4056 | uint32_t count = op.length - 3; |
4057 | for (uint32_t i = 0; i < count; i++) |
4058 | if (ops[i + 3] == var) |
4059 | return true; |
4060 | break; |
4061 | } |
4062 | |
4063 | default: |
4064 | break; |
4065 | } |
4066 | } |
4067 | |
4068 | // Not accessed somehow, at least not in a usual fashion. |
4069 | // It's likely accessed in a branch, so assume we must preserve. |
4070 | return true; |
4071 | } |
4072 | |
4073 | Bitset Compiler::get_buffer_block_flags(VariableID id) const |
4074 | { |
4075 | return ir.get_buffer_block_flags(get<SPIRVariable>(id)); |
4076 | } |
4077 | |
4078 | bool Compiler::get_common_basic_type(const SPIRType &type, SPIRType::BaseType &base_type) |
4079 | { |
4080 | if (type.basetype == SPIRType::Struct) |
4081 | { |
4082 | base_type = SPIRType::Unknown; |
4083 | for (auto &member_type : type.member_types) |
4084 | { |
4085 | SPIRType::BaseType member_base; |
4086 | if (!get_common_basic_type(get<SPIRType>(member_type), member_base)) |
4087 | return false; |
4088 | |
4089 | if (base_type == SPIRType::Unknown) |
4090 | base_type = member_base; |
4091 | else if (base_type != member_base) |
4092 | return false; |
4093 | } |
4094 | return true; |
4095 | } |
4096 | else |
4097 | { |
4098 | base_type = type.basetype; |
4099 | return true; |
4100 | } |
4101 | } |
4102 | |
4103 | void Compiler::ActiveBuiltinHandler::handle_builtin(const SPIRType &type, BuiltIn builtin, |
4104 | const Bitset &decoration_flags) |
4105 | { |
4106 | // If used, we will need to explicitly declare a new array size for these builtins. |
4107 | |
4108 | if (builtin == BuiltInClipDistance) |
4109 | { |
4110 | if (!type.array_size_literal[0]) |
4111 | SPIRV_CROSS_THROW("Array size for ClipDistance must be a literal." ); |
4112 | uint32_t array_size = type.array[0]; |
4113 | if (array_size == 0) |
4114 | SPIRV_CROSS_THROW("Array size for ClipDistance must not be unsized." ); |
4115 | compiler.clip_distance_count = array_size; |
4116 | } |
4117 | else if (builtin == BuiltInCullDistance) |
4118 | { |
4119 | if (!type.array_size_literal[0]) |
4120 | SPIRV_CROSS_THROW("Array size for CullDistance must be a literal." ); |
4121 | uint32_t array_size = type.array[0]; |
4122 | if (array_size == 0) |
4123 | SPIRV_CROSS_THROW("Array size for CullDistance must not be unsized." ); |
4124 | compiler.cull_distance_count = array_size; |
4125 | } |
4126 | else if (builtin == BuiltInPosition) |
4127 | { |
4128 | if (decoration_flags.get(DecorationInvariant)) |
4129 | compiler.position_invariant = true; |
4130 | } |
4131 | } |
4132 | |
4133 | void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id, bool allow_blocks) |
4134 | { |
4135 | // Only handle plain variables here. |
4136 | // Builtins which are part of a block are handled in AccessChain. |
4137 | // If allow_blocks is used however, this is to handle initializers of blocks, |
4138 | // which implies that all members are written to. |
4139 | |
4140 | auto *var = compiler.maybe_get<SPIRVariable>(id); |
4141 | auto *m = compiler.ir.find_meta(id); |
4142 | if (var && m) |
4143 | { |
4144 | auto &type = compiler.get<SPIRType>(var->basetype); |
4145 | auto &decorations = m->decoration; |
4146 | auto &flags = type.storage == StorageClassInput ? |
4147 | compiler.active_input_builtins : compiler.active_output_builtins; |
4148 | if (decorations.builtin) |
4149 | { |
4150 | flags.set(decorations.builtin_type); |
4151 | handle_builtin(type, decorations.builtin_type, decorations.decoration_flags); |
4152 | } |
4153 | else if (allow_blocks && compiler.has_decoration(type.self, DecorationBlock)) |
4154 | { |
4155 | uint32_t member_count = uint32_t(type.member_types.size()); |
4156 | for (uint32_t i = 0; i < member_count; i++) |
4157 | { |
4158 | if (compiler.has_member_decoration(type.self, i, DecorationBuiltIn)) |
4159 | { |
4160 | auto &member_type = compiler.get<SPIRType>(type.member_types[i]); |
4161 | BuiltIn builtin = BuiltIn(compiler.get_member_decoration(type.self, i, DecorationBuiltIn)); |
4162 | flags.set(builtin); |
4163 | handle_builtin(member_type, builtin, compiler.get_member_decoration_bitset(type.self, i)); |
4164 | } |
4165 | } |
4166 | } |
4167 | } |
4168 | } |
4169 | |
4170 | void Compiler::ActiveBuiltinHandler::add_if_builtin(uint32_t id) |
4171 | { |
4172 | add_if_builtin(id, false); |
4173 | } |
4174 | |
4175 | void Compiler::ActiveBuiltinHandler::add_if_builtin_or_block(uint32_t id) |
4176 | { |
4177 | add_if_builtin(id, true); |
4178 | } |
4179 | |
4180 | bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t length) |
4181 | { |
4182 | switch (opcode) |
4183 | { |
4184 | case OpStore: |
4185 | if (length < 1) |
4186 | return false; |
4187 | |
4188 | add_if_builtin(args[0]); |
4189 | break; |
4190 | |
4191 | case OpCopyMemory: |
4192 | if (length < 2) |
4193 | return false; |
4194 | |
4195 | add_if_builtin(args[0]); |
4196 | add_if_builtin(args[1]); |
4197 | break; |
4198 | |
4199 | case OpCopyObject: |
4200 | case OpLoad: |
4201 | if (length < 3) |
4202 | return false; |
4203 | |
4204 | add_if_builtin(args[2]); |
4205 | break; |
4206 | |
4207 | case OpSelect: |
4208 | if (length < 5) |
4209 | return false; |
4210 | |
4211 | add_if_builtin(args[3]); |
4212 | add_if_builtin(args[4]); |
4213 | break; |
4214 | |
4215 | case OpPhi: |
4216 | { |
4217 | if (length < 2) |
4218 | return false; |
4219 | |
4220 | uint32_t count = length - 2; |
4221 | args += 2; |
4222 | for (uint32_t i = 0; i < count; i += 2) |
4223 | add_if_builtin(args[i]); |
4224 | break; |
4225 | } |
4226 | |
4227 | case OpFunctionCall: |
4228 | { |
4229 | if (length < 3) |
4230 | return false; |
4231 | |
4232 | uint32_t count = length - 3; |
4233 | args += 3; |
4234 | for (uint32_t i = 0; i < count; i++) |
4235 | add_if_builtin(args[i]); |
4236 | break; |
4237 | } |
4238 | |
4239 | case OpAccessChain: |
4240 | case OpInBoundsAccessChain: |
4241 | case OpPtrAccessChain: |
4242 | { |
4243 | if (length < 4) |
4244 | return false; |
4245 | |
4246 | // Only consider global variables, cannot consider variables in functions yet, or other |
4247 | // access chains as they have not been created yet. |
4248 | auto *var = compiler.maybe_get<SPIRVariable>(args[2]); |
4249 | if (!var) |
4250 | break; |
4251 | |
4252 | // Required if we access chain into builtins like gl_GlobalInvocationID. |
4253 | add_if_builtin(args[2]); |
4254 | |
4255 | // Start traversing type hierarchy at the proper non-pointer types. |
4256 | auto *type = &compiler.get_variable_data_type(*var); |
4257 | |
4258 | auto &flags = |
4259 | var->storage == StorageClassInput ? compiler.active_input_builtins : compiler.active_output_builtins; |
4260 | |
4261 | uint32_t count = length - 3; |
4262 | args += 3; |
4263 | for (uint32_t i = 0; i < count; i++) |
4264 | { |
4265 | // Pointers |
4266 | if (opcode == OpPtrAccessChain && i == 0) |
4267 | { |
4268 | type = &compiler.get<SPIRType>(type->parent_type); |
4269 | continue; |
4270 | } |
4271 | |
4272 | // Arrays |
4273 | if (!type->array.empty()) |
4274 | { |
4275 | type = &compiler.get<SPIRType>(type->parent_type); |
4276 | } |
4277 | // Structs |
4278 | else if (type->basetype == SPIRType::Struct) |
4279 | { |
4280 | uint32_t index = compiler.get<SPIRConstant>(args[i]).scalar(); |
4281 | |
4282 | if (index < uint32_t(compiler.ir.meta[type->self].members.size())) |
4283 | { |
4284 | auto &decorations = compiler.ir.meta[type->self].members[index]; |
4285 | if (decorations.builtin) |
4286 | { |
4287 | flags.set(decorations.builtin_type); |
4288 | handle_builtin(compiler.get<SPIRType>(type->member_types[index]), decorations.builtin_type, |
4289 | decorations.decoration_flags); |
4290 | } |
4291 | } |
4292 | |
4293 | type = &compiler.get<SPIRType>(type->member_types[index]); |
4294 | } |
4295 | else |
4296 | { |
4297 | // No point in traversing further. We won't find any extra builtins. |
4298 | break; |
4299 | } |
4300 | } |
4301 | break; |
4302 | } |
4303 | |
4304 | default: |
4305 | break; |
4306 | } |
4307 | |
4308 | return true; |
4309 | } |
4310 | |
4311 | void Compiler::update_active_builtins() |
4312 | { |
4313 | active_input_builtins.reset(); |
4314 | active_output_builtins.reset(); |
4315 | cull_distance_count = 0; |
4316 | clip_distance_count = 0; |
4317 | ActiveBuiltinHandler handler(*this); |
4318 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
4319 | |
4320 | ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { |
4321 | if (var.storage != StorageClassOutput) |
4322 | return; |
4323 | if (!interface_variable_exists_in_entry_point(var.self)) |
4324 | return; |
4325 | |
4326 | // Also, make sure we preserve output variables which are only initialized, but never accessed by any code. |
4327 | if (var.initializer != ID(0)) |
4328 | handler.add_if_builtin_or_block(var.self); |
4329 | }); |
4330 | } |
4331 | |
4332 | // Returns whether this shader uses a builtin of the storage class |
4333 | bool Compiler::has_active_builtin(BuiltIn builtin, StorageClass storage) const |
4334 | { |
4335 | const Bitset *flags; |
4336 | switch (storage) |
4337 | { |
4338 | case StorageClassInput: |
4339 | flags = &active_input_builtins; |
4340 | break; |
4341 | case StorageClassOutput: |
4342 | flags = &active_output_builtins; |
4343 | break; |
4344 | |
4345 | default: |
4346 | return false; |
4347 | } |
4348 | return flags->get(builtin); |
4349 | } |
4350 | |
4351 | void Compiler::analyze_image_and_sampler_usage() |
4352 | { |
4353 | CombinedImageSamplerDrefHandler dref_handler(*this); |
4354 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), dref_handler); |
4355 | |
4356 | CombinedImageSamplerUsageHandler handler(*this, dref_handler.dref_combined_samplers); |
4357 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
4358 | |
4359 | // Need to run this traversal twice. First time, we propagate any comparison sampler usage from leaf functions |
4360 | // down to main(). |
4361 | // In the second pass, we can propagate up forced depth state coming from main() up into leaf functions. |
4362 | handler.dependency_hierarchy.clear(); |
4363 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
4364 | |
4365 | comparison_ids = move(handler.comparison_ids); |
4366 | need_subpass_input = handler.need_subpass_input; |
4367 | |
4368 | // Forward information from separate images and samplers into combined image samplers. |
4369 | for (auto &combined : combined_image_samplers) |
4370 | if (comparison_ids.count(combined.sampler_id)) |
4371 | comparison_ids.insert(combined.combined_id); |
4372 | } |
4373 | |
4374 | bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uint32_t *args, uint32_t) |
4375 | { |
4376 | // Mark all sampled images which are used with Dref. |
4377 | switch (opcode) |
4378 | { |
4379 | case OpImageSampleDrefExplicitLod: |
4380 | case OpImageSampleDrefImplicitLod: |
4381 | case OpImageSampleProjDrefExplicitLod: |
4382 | case OpImageSampleProjDrefImplicitLod: |
4383 | case OpImageSparseSampleProjDrefImplicitLod: |
4384 | case OpImageSparseSampleDrefImplicitLod: |
4385 | case OpImageSparseSampleProjDrefExplicitLod: |
4386 | case OpImageSparseSampleDrefExplicitLod: |
4387 | case OpImageDrefGather: |
4388 | case OpImageSparseDrefGather: |
4389 | dref_combined_samplers.insert(args[2]); |
4390 | return true; |
4391 | |
4392 | default: |
4393 | break; |
4394 | } |
4395 | |
4396 | return true; |
4397 | } |
4398 | |
4399 | const CFG &Compiler::get_cfg_for_current_function() const |
4400 | { |
4401 | assert(current_function); |
4402 | return get_cfg_for_function(current_function->self); |
4403 | } |
4404 | |
4405 | const CFG &Compiler::get_cfg_for_function(uint32_t id) const |
4406 | { |
4407 | auto cfg_itr = function_cfgs.find(id); |
4408 | assert(cfg_itr != end(function_cfgs)); |
4409 | assert(cfg_itr->second); |
4410 | return *cfg_itr->second; |
4411 | } |
4412 | |
4413 | void Compiler::build_function_control_flow_graphs_and_analyze() |
4414 | { |
4415 | CFGBuilder handler(*this); |
4416 | handler.function_cfgs[ir.default_entry_point].reset(new CFG(*this, get<SPIRFunction>(ir.default_entry_point))); |
4417 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
4418 | function_cfgs = move(handler.function_cfgs); |
4419 | bool single_function = function_cfgs.size() <= 1; |
4420 | |
4421 | for (auto &f : function_cfgs) |
4422 | { |
4423 | auto &func = get<SPIRFunction>(f.first); |
4424 | AnalyzeVariableScopeAccessHandler scope_handler(*this, func); |
4425 | analyze_variable_scope(func, scope_handler); |
4426 | find_function_local_luts(func, scope_handler, single_function); |
4427 | |
4428 | // Check if we can actually use the loop variables we found in analyze_variable_scope. |
4429 | // To use multiple initializers, we need the same type and qualifiers. |
4430 | for (auto block : func.blocks) |
4431 | { |
4432 | auto &b = get<SPIRBlock>(block); |
4433 | if (b.loop_variables.size() < 2) |
4434 | continue; |
4435 | |
4436 | auto &flags = get_decoration_bitset(b.loop_variables.front()); |
4437 | uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype; |
4438 | bool invalid_initializers = false; |
4439 | for (auto loop_variable : b.loop_variables) |
4440 | { |
4441 | if (flags != get_decoration_bitset(loop_variable) || |
4442 | type != get<SPIRVariable>(b.loop_variables.front()).basetype) |
4443 | { |
4444 | invalid_initializers = true; |
4445 | break; |
4446 | } |
4447 | } |
4448 | |
4449 | if (invalid_initializers) |
4450 | { |
4451 | for (auto loop_variable : b.loop_variables) |
4452 | get<SPIRVariable>(loop_variable).loop_variable = false; |
4453 | b.loop_variables.clear(); |
4454 | } |
4455 | } |
4456 | } |
4457 | } |
4458 | |
4459 | Compiler::CFGBuilder::CFGBuilder(Compiler &compiler_) |
4460 | : compiler(compiler_) |
4461 | { |
4462 | } |
4463 | |
4464 | bool Compiler::CFGBuilder::handle(spv::Op, const uint32_t *, uint32_t) |
4465 | { |
4466 | return true; |
4467 | } |
4468 | |
4469 | bool Compiler::CFGBuilder::follow_function_call(const SPIRFunction &func) |
4470 | { |
4471 | if (function_cfgs.find(func.self) == end(function_cfgs)) |
4472 | { |
4473 | function_cfgs[func.self].reset(new CFG(compiler, func)); |
4474 | return true; |
4475 | } |
4476 | else |
4477 | return false; |
4478 | } |
4479 | |
4480 | void Compiler::CombinedImageSamplerUsageHandler::add_dependency(uint32_t dst, uint32_t src) |
4481 | { |
4482 | dependency_hierarchy[dst].insert(src); |
4483 | // Propagate up any comparison state if we're loading from one such variable. |
4484 | if (comparison_ids.count(src)) |
4485 | comparison_ids.insert(dst); |
4486 | } |
4487 | |
4488 | bool Compiler::CombinedImageSamplerUsageHandler::begin_function_scope(const uint32_t *args, uint32_t length) |
4489 | { |
4490 | if (length < 3) |
4491 | return false; |
4492 | |
4493 | auto &func = compiler.get<SPIRFunction>(args[2]); |
4494 | const auto *arg = &args[3]; |
4495 | length -= 3; |
4496 | |
4497 | for (uint32_t i = 0; i < length; i++) |
4498 | { |
4499 | auto &argument = func.arguments[i]; |
4500 | add_dependency(argument.id, arg[i]); |
4501 | } |
4502 | |
4503 | return true; |
4504 | } |
4505 | |
4506 | void Compiler::CombinedImageSamplerUsageHandler::add_hierarchy_to_comparison_ids(uint32_t id) |
4507 | { |
4508 | // Traverse the variable dependency hierarchy and tag everything in its path with comparison ids. |
4509 | comparison_ids.insert(id); |
4510 | |
4511 | for (auto &dep_id : dependency_hierarchy[id]) |
4512 | add_hierarchy_to_comparison_ids(dep_id); |
4513 | } |
4514 | |
4515 | bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_t *args, uint32_t length) |
4516 | { |
4517 | switch (opcode) |
4518 | { |
4519 | case OpAccessChain: |
4520 | case OpInBoundsAccessChain: |
4521 | case OpPtrAccessChain: |
4522 | case OpLoad: |
4523 | { |
4524 | if (length < 3) |
4525 | return false; |
4526 | |
4527 | add_dependency(args[1], args[2]); |
4528 | |
4529 | // Ideally defer this to OpImageRead, but then we'd need to track loaded IDs. |
4530 | // If we load an image, we're going to use it and there is little harm in declaring an unused gl_FragCoord. |
4531 | auto &type = compiler.get<SPIRType>(args[0]); |
4532 | if (type.image.dim == DimSubpassData) |
4533 | need_subpass_input = true; |
4534 | |
4535 | // If we load a SampledImage and it will be used with Dref, propagate the state up. |
4536 | if (dref_combined_samplers.count(args[1]) != 0) |
4537 | add_hierarchy_to_comparison_ids(args[1]); |
4538 | break; |
4539 | } |
4540 | |
4541 | case OpSampledImage: |
4542 | { |
4543 | if (length < 4) |
4544 | return false; |
4545 | |
4546 | // If the underlying resource has been used for comparison then duplicate loads of that resource must be too. |
4547 | // This image must be a depth image. |
4548 | uint32_t result_id = args[1]; |
4549 | uint32_t image = args[2]; |
4550 | uint32_t sampler = args[3]; |
4551 | |
4552 | if (dref_combined_samplers.count(result_id) != 0) |
4553 | { |
4554 | add_hierarchy_to_comparison_ids(image); |
4555 | |
4556 | // This sampler must be a SamplerComparisonState, and not a regular SamplerState. |
4557 | add_hierarchy_to_comparison_ids(sampler); |
4558 | |
4559 | // Mark the OpSampledImage itself as being comparison state. |
4560 | comparison_ids.insert(result_id); |
4561 | } |
4562 | return true; |
4563 | } |
4564 | |
4565 | default: |
4566 | break; |
4567 | } |
4568 | |
4569 | return true; |
4570 | } |
4571 | |
4572 | bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const |
4573 | { |
4574 | auto *m = ir.find_meta(id); |
4575 | return m && m->hlsl_is_magic_counter_buffer; |
4576 | } |
4577 | |
4578 | bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const |
4579 | { |
4580 | auto *m = ir.find_meta(id); |
4581 | |
4582 | // First, check for the proper decoration. |
4583 | if (m && m->hlsl_magic_counter_buffer != 0) |
4584 | { |
4585 | counter_id = m->hlsl_magic_counter_buffer; |
4586 | return true; |
4587 | } |
4588 | else |
4589 | return false; |
4590 | } |
4591 | |
4592 | void Compiler::make_constant_null(uint32_t id, uint32_t type) |
4593 | { |
4594 | auto &constant_type = get<SPIRType>(type); |
4595 | |
4596 | if (constant_type.pointer) |
4597 | { |
4598 | auto &constant = set<SPIRConstant>(id, type); |
4599 | constant.make_null(constant_type); |
4600 | } |
4601 | else if (!constant_type.array.empty()) |
4602 | { |
4603 | assert(constant_type.parent_type); |
4604 | uint32_t parent_id = ir.increase_bound_by(1); |
4605 | make_constant_null(parent_id, constant_type.parent_type); |
4606 | |
4607 | if (!constant_type.array_size_literal.back()) |
4608 | SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal." ); |
4609 | |
4610 | SmallVector<uint32_t> elements(constant_type.array.back()); |
4611 | for (uint32_t i = 0; i < constant_type.array.back(); i++) |
4612 | elements[i] = parent_id; |
4613 | set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false); |
4614 | } |
4615 | else if (!constant_type.member_types.empty()) |
4616 | { |
4617 | uint32_t member_ids = ir.increase_bound_by(uint32_t(constant_type.member_types.size())); |
4618 | SmallVector<uint32_t> elements(constant_type.member_types.size()); |
4619 | for (uint32_t i = 0; i < constant_type.member_types.size(); i++) |
4620 | { |
4621 | make_constant_null(member_ids + i, constant_type.member_types[i]); |
4622 | elements[i] = member_ids + i; |
4623 | } |
4624 | set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false); |
4625 | } |
4626 | else |
4627 | { |
4628 | auto &constant = set<SPIRConstant>(id, type); |
4629 | constant.make_null(constant_type); |
4630 | } |
4631 | } |
4632 | |
4633 | const SmallVector<spv::Capability> &Compiler::get_declared_capabilities() const |
4634 | { |
4635 | return ir.declared_capabilities; |
4636 | } |
4637 | |
4638 | const SmallVector<std::string> &Compiler::get_declared_extensions() const |
4639 | { |
4640 | return ir.declared_extensions; |
4641 | } |
4642 | |
4643 | std::string Compiler::get_remapped_declared_block_name(VariableID id) const |
4644 | { |
4645 | return get_remapped_declared_block_name(id, false); |
4646 | } |
4647 | |
4648 | std::string Compiler::get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const |
4649 | { |
4650 | auto itr = declared_block_names.find(id); |
4651 | if (itr != end(declared_block_names)) |
4652 | { |
4653 | return itr->second; |
4654 | } |
4655 | else |
4656 | { |
4657 | auto &var = get<SPIRVariable>(id); |
4658 | |
4659 | if (fallback_prefer_instance_name) |
4660 | { |
4661 | return to_name(var.self); |
4662 | } |
4663 | else |
4664 | { |
4665 | auto &type = get<SPIRType>(var.basetype); |
4666 | auto *type_meta = ir.find_meta(type.self); |
4667 | auto *block_name = type_meta ? &type_meta->decoration.alias : nullptr; |
4668 | return (!block_name || block_name->empty()) ? get_block_fallback_name(id) : *block_name; |
4669 | } |
4670 | } |
4671 | } |
4672 | |
4673 | bool Compiler::reflection_ssbo_instance_name_is_significant() const |
4674 | { |
4675 | if (ir.source.known) |
4676 | { |
4677 | // UAVs from HLSL source tend to be declared in a way where the type is reused |
4678 | // but the instance name is significant, and that's the name we should report. |
4679 | // For GLSL, SSBOs each have their own block type as that's how GLSL is written. |
4680 | return ir.source.hlsl; |
4681 | } |
4682 | |
4683 | unordered_set<uint32_t> ssbo_type_ids; |
4684 | bool aliased_ssbo_types = false; |
4685 | |
4686 | // If we don't have any OpSource information, we need to perform some shaky heuristics. |
4687 | ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { |
4688 | auto &type = this->get<SPIRType>(var.basetype); |
4689 | if (!type.pointer || var.storage == StorageClassFunction) |
4690 | return; |
4691 | |
4692 | bool ssbo = var.storage == StorageClassStorageBuffer || |
4693 | (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBufferBlock)); |
4694 | |
4695 | if (ssbo) |
4696 | { |
4697 | if (ssbo_type_ids.count(type.self)) |
4698 | aliased_ssbo_types = true; |
4699 | else |
4700 | ssbo_type_ids.insert(type.self); |
4701 | } |
4702 | }); |
4703 | |
4704 | // If the block name is aliased, assume we have HLSL-style UAV declarations. |
4705 | return aliased_ssbo_types; |
4706 | } |
4707 | |
4708 | bool Compiler::instruction_to_result_type(uint32_t &result_type, uint32_t &result_id, spv::Op op, const uint32_t *args, |
4709 | uint32_t length) |
4710 | { |
4711 | // Most instructions follow the pattern of <result-type> <result-id> <arguments>. |
4712 | // There are some exceptions. |
4713 | switch (op) |
4714 | { |
4715 | case OpStore: |
4716 | case OpCopyMemory: |
4717 | case OpCopyMemorySized: |
4718 | case OpImageWrite: |
4719 | case OpAtomicStore: |
4720 | case OpAtomicFlagClear: |
4721 | case OpEmitStreamVertex: |
4722 | case OpEndStreamPrimitive: |
4723 | case OpControlBarrier: |
4724 | case OpMemoryBarrier: |
4725 | case OpGroupWaitEvents: |
4726 | case OpRetainEvent: |
4727 | case OpReleaseEvent: |
4728 | case OpSetUserEventStatus: |
4729 | case OpCaptureEventProfilingInfo: |
4730 | case OpCommitReadPipe: |
4731 | case OpCommitWritePipe: |
4732 | case OpGroupCommitReadPipe: |
4733 | case OpGroupCommitWritePipe: |
4734 | case OpLine: |
4735 | case OpNoLine: |
4736 | return false; |
4737 | |
4738 | default: |
4739 | if (length > 1 && maybe_get<SPIRType>(args[0]) != nullptr) |
4740 | { |
4741 | result_type = args[0]; |
4742 | result_id = args[1]; |
4743 | return true; |
4744 | } |
4745 | else |
4746 | return false; |
4747 | } |
4748 | } |
4749 | |
4750 | Bitset Compiler::combined_decoration_for_member(const SPIRType &type, uint32_t index) const |
4751 | { |
4752 | Bitset flags; |
4753 | auto *type_meta = ir.find_meta(type.self); |
4754 | |
4755 | if (type_meta) |
4756 | { |
4757 | auto &members = type_meta->members; |
4758 | if (index >= members.size()) |
4759 | return flags; |
4760 | auto &dec = members[index]; |
4761 | |
4762 | flags.merge_or(dec.decoration_flags); |
4763 | |
4764 | auto &member_type = get<SPIRType>(type.member_types[index]); |
4765 | |
4766 | // If our member type is a struct, traverse all the child members as well recursively. |
4767 | auto &member_childs = member_type.member_types; |
4768 | for (uint32_t i = 0; i < member_childs.size(); i++) |
4769 | { |
4770 | auto &child_member_type = get<SPIRType>(member_childs[i]); |
4771 | if (!child_member_type.pointer) |
4772 | flags.merge_or(combined_decoration_for_member(member_type, i)); |
4773 | } |
4774 | } |
4775 | |
4776 | return flags; |
4777 | } |
4778 | |
4779 | bool Compiler::is_desktop_only_format(spv::ImageFormat format) |
4780 | { |
4781 | switch (format) |
4782 | { |
4783 | // Desktop-only formats |
4784 | case ImageFormatR11fG11fB10f: |
4785 | case ImageFormatR16f: |
4786 | case ImageFormatRgb10A2: |
4787 | case ImageFormatR8: |
4788 | case ImageFormatRg8: |
4789 | case ImageFormatR16: |
4790 | case ImageFormatRg16: |
4791 | case ImageFormatRgba16: |
4792 | case ImageFormatR16Snorm: |
4793 | case ImageFormatRg16Snorm: |
4794 | case ImageFormatRgba16Snorm: |
4795 | case ImageFormatR8Snorm: |
4796 | case ImageFormatRg8Snorm: |
4797 | case ImageFormatR8ui: |
4798 | case ImageFormatRg8ui: |
4799 | case ImageFormatR16ui: |
4800 | case ImageFormatRgb10a2ui: |
4801 | case ImageFormatR8i: |
4802 | case ImageFormatRg8i: |
4803 | case ImageFormatR16i: |
4804 | return true; |
4805 | default: |
4806 | break; |
4807 | } |
4808 | |
4809 | return false; |
4810 | } |
4811 | |
4812 | // An image is determined to be a depth image if it is marked as a depth image and is not also |
4813 | // explicitly marked with a color format, or if there are any sample/gather compare operations on it. |
4814 | bool Compiler::is_depth_image(const SPIRType &type, uint32_t id) const |
4815 | { |
4816 | return (type.image.depth && type.image.format == ImageFormatUnknown) || comparison_ids.count(id); |
4817 | } |
4818 | |
4819 | bool Compiler::type_is_opaque_value(const SPIRType &type) const |
4820 | { |
4821 | return !type.pointer && (type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image || |
4822 | type.basetype == SPIRType::Sampler); |
4823 | } |
4824 | |
4825 | // Make these member functions so we can easily break on any force_recompile events. |
4826 | void Compiler::force_recompile() |
4827 | { |
4828 | is_force_recompile = true; |
4829 | } |
4830 | |
4831 | void Compiler::force_recompile_guarantee_forward_progress() |
4832 | { |
4833 | force_recompile(); |
4834 | is_force_recompile_forward_progress = true; |
4835 | } |
4836 | |
4837 | bool Compiler::is_forcing_recompilation() const |
4838 | { |
4839 | return is_force_recompile; |
4840 | } |
4841 | |
4842 | void Compiler::clear_force_recompile() |
4843 | { |
4844 | is_force_recompile = false; |
4845 | is_force_recompile_forward_progress = false; |
4846 | } |
4847 | |
4848 | Compiler::PhysicalStorageBufferPointerHandler::PhysicalStorageBufferPointerHandler(Compiler &compiler_) |
4849 | : compiler(compiler_) |
4850 | { |
4851 | } |
4852 | |
4853 | Compiler::PhysicalBlockMeta *Compiler::PhysicalStorageBufferPointerHandler::find_block_meta(uint32_t id) const |
4854 | { |
4855 | auto chain_itr = access_chain_to_physical_block.find(id); |
4856 | if (chain_itr != access_chain_to_physical_block.end()) |
4857 | return chain_itr->second; |
4858 | else |
4859 | return nullptr; |
4860 | } |
4861 | |
4862 | void Compiler::PhysicalStorageBufferPointerHandler::mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length) |
4863 | { |
4864 | uint32_t mask = *args; |
4865 | args++; |
4866 | length--; |
4867 | if (length && (mask & MemoryAccessVolatileMask) != 0) |
4868 | { |
4869 | args++; |
4870 | length--; |
4871 | } |
4872 | |
4873 | if (length && (mask & MemoryAccessAlignedMask) != 0) |
4874 | { |
4875 | uint32_t alignment = *args; |
4876 | auto *meta = find_block_meta(id); |
4877 | |
4878 | // This makes the assumption that the application does not rely on insane edge cases like: |
4879 | // Bind buffer with ADDR = 8, use block offset of 8 bytes, load/store with 16 byte alignment. |
4880 | // If we emit the buffer with alignment = 16 here, the first element at offset = 0 should |
4881 | // actually have alignment of 8 bytes, but this is too theoretical and awkward to support. |
4882 | // We could potentially keep track of any offset in the access chain, but it's |
4883 | // practically impossible for high level compilers to emit code like that, |
4884 | // so deducing overall alignment requirement based on maximum observed Alignment value is probably fine. |
4885 | if (meta && alignment > meta->alignment) |
4886 | meta->alignment = alignment; |
4887 | } |
4888 | } |
4889 | |
4890 | bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint32_t type_id) const |
4891 | { |
4892 | auto &type = compiler.get<SPIRType>(type_id); |
4893 | return type.storage == StorageClassPhysicalStorageBufferEXT && type.pointer && |
4894 | type.pointer_depth == 1 && !compiler.type_is_array_of_pointers(type); |
4895 | } |
4896 | |
4897 | uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const |
4898 | { |
4899 | if (type.storage == spv::StorageClassPhysicalStorageBufferEXT) |
4900 | return 8; |
4901 | else if (type.basetype == SPIRType::Struct) |
4902 | { |
4903 | uint32_t alignment = 0; |
4904 | for (auto &member_type : type.member_types) |
4905 | { |
4906 | uint32_t member_align = get_minimum_scalar_alignment(compiler.get<SPIRType>(member_type)); |
4907 | if (member_align > alignment) |
4908 | alignment = member_align; |
4909 | } |
4910 | return alignment; |
4911 | } |
4912 | else |
4913 | return type.width / 8; |
4914 | } |
4915 | |
4916 | void Compiler::PhysicalStorageBufferPointerHandler::setup_meta_chain(uint32_t type_id, uint32_t var_id) |
4917 | { |
4918 | if (type_is_bda_block_entry(type_id)) |
4919 | { |
4920 | auto &meta = physical_block_type_meta[type_id]; |
4921 | access_chain_to_physical_block[var_id] = &meta; |
4922 | |
4923 | auto &type = compiler.get<SPIRType>(type_id); |
4924 | if (type.basetype != SPIRType::Struct) |
4925 | non_block_types.insert(type_id); |
4926 | |
4927 | if (meta.alignment == 0) |
4928 | meta.alignment = get_minimum_scalar_alignment(compiler.get_pointee_type(type)); |
4929 | } |
4930 | } |
4931 | |
4932 | bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t *args, uint32_t length) |
4933 | { |
4934 | // When a BDA pointer comes to life, we need to keep a mapping of SSA ID -> type ID for the pointer type. |
4935 | // For every load and store, we'll need to be able to look up the type ID being accessed and mark any alignment |
4936 | // requirements. |
4937 | switch (op) |
4938 | { |
4939 | case OpConvertUToPtr: |
4940 | case OpBitcast: |
4941 | case OpCompositeExtract: |
4942 | // Extract can begin a new chain if we had a struct or array of pointers as input. |
4943 | // We don't begin chains before we have a pure scalar pointer. |
4944 | setup_meta_chain(args[0], args[1]); |
4945 | break; |
4946 | |
4947 | case OpAccessChain: |
4948 | case OpInBoundsAccessChain: |
4949 | case OpPtrAccessChain: |
4950 | case OpCopyObject: |
4951 | { |
4952 | auto itr = access_chain_to_physical_block.find(args[2]); |
4953 | if (itr != access_chain_to_physical_block.end()) |
4954 | access_chain_to_physical_block[args[1]] = itr->second; |
4955 | break; |
4956 | } |
4957 | |
4958 | case OpLoad: |
4959 | { |
4960 | setup_meta_chain(args[0], args[1]); |
4961 | if (length >= 4) |
4962 | mark_aligned_access(args[2], args + 3, length - 3); |
4963 | break; |
4964 | } |
4965 | |
4966 | case OpStore: |
4967 | { |
4968 | if (length >= 3) |
4969 | mark_aligned_access(args[0], args + 2, length - 2); |
4970 | break; |
4971 | } |
4972 | |
4973 | default: |
4974 | break; |
4975 | } |
4976 | |
4977 | return true; |
4978 | } |
4979 | |
4980 | uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_id(uint32_t type_id) const |
4981 | { |
4982 | auto *type = &compiler.get<SPIRType>(type_id); |
4983 | while (type->pointer && |
4984 | type->storage == StorageClassPhysicalStorageBufferEXT && |
4985 | !type_is_bda_block_entry(type_id)) |
4986 | { |
4987 | type_id = type->parent_type; |
4988 | type = &compiler.get<SPIRType>(type_id); |
4989 | } |
4990 | |
4991 | assert(type_is_bda_block_entry(type_id)); |
4992 | return type_id; |
4993 | } |
4994 | |
4995 | void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type) |
4996 | { |
4997 | for (auto &member : type.member_types) |
4998 | { |
4999 | auto &subtype = compiler.get<SPIRType>(member); |
5000 | if (subtype.basetype != SPIRType::Struct && subtype.pointer && |
5001 | subtype.storage == spv::StorageClassPhysicalStorageBufferEXT) |
5002 | { |
5003 | non_block_types.insert(get_base_non_block_type_id(member)); |
5004 | } |
5005 | else if (subtype.basetype == SPIRType::Struct && !subtype.pointer) |
5006 | analyze_non_block_types_from_block(subtype); |
5007 | } |
5008 | } |
5009 | |
5010 | void Compiler::analyze_non_block_pointer_types() |
5011 | { |
5012 | PhysicalStorageBufferPointerHandler handler(*this); |
5013 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
5014 | |
5015 | // Analyze any block declaration we have to make. It might contain |
5016 | // physical pointers to POD types which we never used, and thus never added to the list. |
5017 | // We'll need to add those pointer types to the set of types we declare. |
5018 | ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) { |
5019 | if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) |
5020 | handler.analyze_non_block_types_from_block(type); |
5021 | }); |
5022 | |
5023 | physical_storage_non_block_pointer_types.reserve(handler.non_block_types.size()); |
5024 | for (auto type : handler.non_block_types) |
5025 | physical_storage_non_block_pointer_types.push_back(type); |
5026 | sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); |
5027 | physical_storage_type_to_alignment = move(handler.physical_block_type_meta); |
5028 | } |
5029 | |
5030 | bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) |
5031 | { |
5032 | if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) |
5033 | { |
5034 | if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) |
5035 | { |
5036 | // Most complex case, we have no sensible way of dealing with this |
5037 | // other than taking the 100% conservative approach, exit early. |
5038 | split_function_case = true; |
5039 | return false; |
5040 | } |
5041 | else |
5042 | { |
5043 | interlock_function_id = call_stack.back(); |
5044 | // If this call is performed inside control flow we have a problem. |
5045 | auto &cfg = compiler.get_cfg_for_function(interlock_function_id); |
5046 | |
5047 | uint32_t from_block_id = compiler.get<SPIRFunction>(interlock_function_id).entry_block; |
5048 | bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); |
5049 | if (!outside_control_flow) |
5050 | control_flow_interlock = true; |
5051 | } |
5052 | } |
5053 | return true; |
5054 | } |
5055 | |
5056 | void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) |
5057 | { |
5058 | current_block_id = block.self; |
5059 | } |
5060 | |
5061 | bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) |
5062 | { |
5063 | if (length < 3) |
5064 | return false; |
5065 | call_stack.push_back(args[2]); |
5066 | return true; |
5067 | } |
5068 | |
5069 | bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) |
5070 | { |
5071 | call_stack.pop_back(); |
5072 | return true; |
5073 | } |
5074 | |
5075 | bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) |
5076 | { |
5077 | if (length < 3) |
5078 | return false; |
5079 | |
5080 | if (args[2] == interlock_function_id) |
5081 | call_stack_is_interlocked = true; |
5082 | |
5083 | call_stack.push_back(args[2]); |
5084 | return true; |
5085 | } |
5086 | |
5087 | bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) |
5088 | { |
5089 | if (call_stack.back() == interlock_function_id) |
5090 | call_stack_is_interlocked = false; |
5091 | |
5092 | call_stack.pop_back(); |
5093 | return true; |
5094 | } |
5095 | |
5096 | void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) |
5097 | { |
5098 | if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) || |
5099 | split_function_case) |
5100 | { |
5101 | compiler.interlocked_resources.insert(id); |
5102 | } |
5103 | } |
5104 | |
5105 | bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) |
5106 | { |
5107 | // Only care about critical section analysis if we have simple case. |
5108 | if (use_critical_section) |
5109 | { |
5110 | if (opcode == OpBeginInvocationInterlockEXT) |
5111 | { |
5112 | in_crit_sec = true; |
5113 | return true; |
5114 | } |
5115 | |
5116 | if (opcode == OpEndInvocationInterlockEXT) |
5117 | { |
5118 | // End critical section--nothing more to do. |
5119 | return false; |
5120 | } |
5121 | } |
5122 | |
5123 | // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. |
5124 | switch (opcode) |
5125 | { |
5126 | case OpLoad: |
5127 | { |
5128 | if (length < 3) |
5129 | return false; |
5130 | |
5131 | uint32_t ptr = args[2]; |
5132 | auto *var = compiler.maybe_get_backing_variable(ptr); |
5133 | |
5134 | // We're only concerned with buffer and image memory here. |
5135 | if (!var) |
5136 | break; |
5137 | |
5138 | switch (var->storage) |
5139 | { |
5140 | default: |
5141 | break; |
5142 | |
5143 | case StorageClassUniformConstant: |
5144 | { |
5145 | uint32_t result_type = args[0]; |
5146 | uint32_t id = args[1]; |
5147 | compiler.set<SPIRExpression>(id, "" , result_type, true); |
5148 | compiler.register_read(id, ptr, true); |
5149 | break; |
5150 | } |
5151 | |
5152 | case StorageClassUniform: |
5153 | // Must have BufferBlock; we only care about SSBOs. |
5154 | if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock)) |
5155 | break; |
5156 | // fallthrough |
5157 | case StorageClassStorageBuffer: |
5158 | access_potential_resource(var->self); |
5159 | break; |
5160 | } |
5161 | break; |
5162 | } |
5163 | |
5164 | case OpInBoundsAccessChain: |
5165 | case OpAccessChain: |
5166 | case OpPtrAccessChain: |
5167 | { |
5168 | if (length < 3) |
5169 | return false; |
5170 | |
5171 | uint32_t result_type = args[0]; |
5172 | |
5173 | auto &type = compiler.get<SPIRType>(result_type); |
5174 | if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || |
5175 | type.storage == StorageClassStorageBuffer) |
5176 | { |
5177 | uint32_t id = args[1]; |
5178 | uint32_t ptr = args[2]; |
5179 | compiler.set<SPIRExpression>(id, "" , result_type, true); |
5180 | compiler.register_read(id, ptr, true); |
5181 | compiler.ir.ids[id].set_allow_type_rewrite(); |
5182 | } |
5183 | break; |
5184 | } |
5185 | |
5186 | case OpImageTexelPointer: |
5187 | { |
5188 | if (length < 3) |
5189 | return false; |
5190 | |
5191 | uint32_t result_type = args[0]; |
5192 | uint32_t id = args[1]; |
5193 | uint32_t ptr = args[2]; |
5194 | auto &e = compiler.set<SPIRExpression>(id, "" , result_type, true); |
5195 | auto *var = compiler.maybe_get_backing_variable(ptr); |
5196 | if (var) |
5197 | e.loaded_from = var->self; |
5198 | break; |
5199 | } |
5200 | |
5201 | case OpStore: |
5202 | case OpImageWrite: |
5203 | case OpAtomicStore: |
5204 | { |
5205 | if (length < 1) |
5206 | return false; |
5207 | |
5208 | uint32_t ptr = args[0]; |
5209 | auto *var = compiler.maybe_get_backing_variable(ptr); |
5210 | if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || |
5211 | var->storage == StorageClassStorageBuffer)) |
5212 | { |
5213 | access_potential_resource(var->self); |
5214 | } |
5215 | |
5216 | break; |
5217 | } |
5218 | |
5219 | case OpCopyMemory: |
5220 | { |
5221 | if (length < 2) |
5222 | return false; |
5223 | |
5224 | uint32_t dst = args[0]; |
5225 | uint32_t src = args[1]; |
5226 | auto *dst_var = compiler.maybe_get_backing_variable(dst); |
5227 | auto *src_var = compiler.maybe_get_backing_variable(src); |
5228 | |
5229 | if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) |
5230 | access_potential_resource(dst_var->self); |
5231 | |
5232 | if (src_var) |
5233 | { |
5234 | if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) |
5235 | break; |
5236 | |
5237 | if (src_var->storage == StorageClassUniform && |
5238 | !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock)) |
5239 | { |
5240 | break; |
5241 | } |
5242 | |
5243 | access_potential_resource(src_var->self); |
5244 | } |
5245 | |
5246 | break; |
5247 | } |
5248 | |
5249 | case OpImageRead: |
5250 | case OpAtomicLoad: |
5251 | { |
5252 | if (length < 3) |
5253 | return false; |
5254 | |
5255 | uint32_t ptr = args[2]; |
5256 | auto *var = compiler.maybe_get_backing_variable(ptr); |
5257 | |
5258 | // We're only concerned with buffer and image memory here. |
5259 | if (!var) |
5260 | break; |
5261 | |
5262 | switch (var->storage) |
5263 | { |
5264 | default: |
5265 | break; |
5266 | |
5267 | case StorageClassUniform: |
5268 | // Must have BufferBlock; we only care about SSBOs. |
5269 | if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock)) |
5270 | break; |
5271 | // fallthrough |
5272 | case StorageClassUniformConstant: |
5273 | case StorageClassStorageBuffer: |
5274 | access_potential_resource(var->self); |
5275 | break; |
5276 | } |
5277 | break; |
5278 | } |
5279 | |
5280 | case OpAtomicExchange: |
5281 | case OpAtomicCompareExchange: |
5282 | case OpAtomicIIncrement: |
5283 | case OpAtomicIDecrement: |
5284 | case OpAtomicIAdd: |
5285 | case OpAtomicISub: |
5286 | case OpAtomicSMin: |
5287 | case OpAtomicUMin: |
5288 | case OpAtomicSMax: |
5289 | case OpAtomicUMax: |
5290 | case OpAtomicAnd: |
5291 | case OpAtomicOr: |
5292 | case OpAtomicXor: |
5293 | { |
5294 | if (length < 3) |
5295 | return false; |
5296 | |
5297 | uint32_t ptr = args[2]; |
5298 | auto *var = compiler.maybe_get_backing_variable(ptr); |
5299 | if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || |
5300 | var->storage == StorageClassStorageBuffer)) |
5301 | { |
5302 | access_potential_resource(var->self); |
5303 | } |
5304 | |
5305 | break; |
5306 | } |
5307 | |
5308 | default: |
5309 | break; |
5310 | } |
5311 | |
5312 | return true; |
5313 | } |
5314 | |
5315 | void Compiler::analyze_interlocked_resource_usage() |
5316 | { |
5317 | if (get_execution_model() == ExecutionModelFragment && |
5318 | (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || |
5319 | get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || |
5320 | get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || |
5321 | get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) |
5322 | { |
5323 | InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); |
5324 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), prepass_handler); |
5325 | |
5326 | InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); |
5327 | handler.interlock_function_id = prepass_handler.interlock_function_id; |
5328 | handler.split_function_case = prepass_handler.split_function_case; |
5329 | handler.control_flow_interlock = prepass_handler.control_flow_interlock; |
5330 | handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; |
5331 | |
5332 | traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler); |
5333 | |
5334 | // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. |
5335 | interlocked_is_complex = |
5336 | !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point; |
5337 | } |
5338 | } |
5339 | |
5340 | bool Compiler::type_is_array_of_pointers(const SPIRType &type) const |
5341 | { |
5342 | if (!type.pointer) |
5343 | return false; |
5344 | |
5345 | // If parent type has same pointer depth, we must have an array of pointers. |
5346 | return type.pointer_depth == get<SPIRType>(type.parent_type).pointer_depth; |
5347 | } |
5348 | |
5349 | bool Compiler::type_is_top_level_physical_pointer(const SPIRType &type) const |
5350 | { |
5351 | return type.pointer && type.storage == StorageClassPhysicalStorageBuffer && |
5352 | type.pointer_depth > get<SPIRType>(type.parent_type).pointer_depth; |
5353 | } |
5354 | |
5355 | bool Compiler::flush_phi_required(BlockID from, BlockID to) const |
5356 | { |
5357 | auto &child = get<SPIRBlock>(to); |
5358 | for (auto &phi : child.phi_variables) |
5359 | if (phi.parent == from) |
5360 | return true; |
5361 | return false; |
5362 | } |
5363 | |
5364 | void Compiler::add_loop_level() |
5365 | { |
5366 | current_loop_level++; |
5367 | } |
5368 | |