@@ -383,11 +383,6 @@ pub struct Writer<W> {
383
383
/// Set of (struct type, struct field index) denoting which fields require
384
384
/// padding inserted **before** them (i.e. between fields at index - 1 and index)
385
385
struct_member_pads : FastHashSet < ( Handle < crate :: Type > , u32 ) > ,
386
-
387
- /// Name of the force-bounded-loop macro.
388
- ///
389
- /// See `emit_force_bounded_loop_macro` for details.
390
- force_bounded_loop_macro_name : String ,
391
386
}
392
387
393
388
impl crate :: Scalar {
@@ -601,7 +596,7 @@ struct ExpressionContext<'a> {
601
596
/// accesses. These may need to be cached in temporary variables. See
602
597
/// `index::find_checked_indexes` for details.
603
598
guarded_indices : HandleSet < crate :: Expression > ,
604
- /// See [`Writer::emit_force_bounded_loop_macro `] for details.
599
+ /// See [`Writer::gen_force_bounded_loop_statements `] for details.
605
600
force_loop_bounding : bool ,
606
601
}
607
602
@@ -685,7 +680,6 @@ impl<W: Write> Writer<W> {
685
680
#[ cfg( test) ]
686
681
put_block_stack_pointers : Default :: default ( ) ,
687
682
struct_member_pads : FastHashSet :: default ( ) ,
688
- force_bounded_loop_macro_name : String :: default ( ) ,
689
683
}
690
684
}
691
685
@@ -696,17 +690,11 @@ impl<W: Write> Writer<W> {
696
690
self . out
697
691
}
698
692
699
- /// Define a macro to invoke at the bottom of each loop body, to
700
- /// defeat MSL infinite loop reasoning.
701
- ///
702
- /// If we haven't done so already, emit the definition of a preprocessor
703
- /// macro to be invoked at the end of each loop body in the generated MSL,
704
- /// to ensure that the MSL compiler's optimizations do not remove bounds
705
- /// checks.
706
- ///
707
- /// Only the first call to this function for a given module actually causes
708
- /// the macro definition to be written. Subsequent loops can simply use the
709
- /// prior macro definition, since macros aren't block-scoped.
693
+ /// Generates statements to be inserted immediately before and at the very
694
+ /// start of the body of each loop, to defeat MSL infinite loop reasoning.
695
+ /// The 0th item of the returned tuple should be inserted immediately prior
696
+ /// to the loop and the 1st item should be inserted at the very start of
697
+ /// the loop body.
710
698
///
711
699
/// # What is this trying to solve?
712
700
///
@@ -774,7 +762,8 @@ impl<W: Write> Writer<W> {
774
762
/// but which in fact generates no instructions. Unfortunately, inline
775
763
/// assembly is not handled correctly by some Metal device drivers.
776
764
///
777
- /// Instead, we add the following code to the bottom of every loop:
765
+ /// A previously used approach was to add the following code to the bottom
766
+ /// of every loop:
778
767
///
779
768
/// ```ignore
780
769
/// if (volatile bool unpredictable = false; unpredictable)
@@ -785,37 +774,47 @@ impl<W: Write> Writer<W> {
785
774
/// the `volatile` qualifier prevents the compiler from assuming this. Thus,
786
775
/// it must assume that the `break` might be reached, and hence that the
787
776
/// loop is not unbounded. This prevents the range analysis impact described
788
- /// above.
777
+ /// above. Unfortunately this prevented the compiler from making important,
778
+ /// and safe, optimizations such as loop unrolling and was observed to
779
+ /// significantly hurt performance.
789
780
///
790
- /// Unfortunately, what makes this a kludge, not a hack, is that this
791
- /// solution leaves the GPU executing a pointless conditional branch, at
792
- /// runtime, in every iteration of the loop. There's no part of the system
793
- /// that has a global enough view to be sure that `unpredictable` is true,
794
- /// and remove it from the code. Adding the branch also affects
795
- /// optimization: for example, it's impossible to unroll this loop. This
796
- /// transformation has been observed to significantly hurt performance.
781
+ /// Our current approach declares a counter before every loop and
782
+ /// increments it every iteration, breaking after 2^64 iterations:
783
+ ///
784
+ /// ```ignore
785
+ /// uint2 loop_bound = uint2(0);
786
+ /// while (true) {
787
+ /// if (metal::all(loop_bound == uint2(4294967295))) { break; }
788
+ /// loop_bound += uint2(loop_bound.y == 4294967295, 1);
789
+ /// }
790
+ /// ```
797
791
///
798
- /// To make our output a bit more legible, we pull the condition out into a
799
- /// preprocessor macro defined at the top of the module.
792
+ /// This convinces the compiler that the loop is finite and therefore may
793
+ /// execute, whilst at the same time allowing optimizations such as loop
794
+ /// unrolling. Furthermore the 64-bit counter is large enough it seems
795
+ /// implausible that it would affect the execution of any shader.
800
796
///
801
797
/// This approach is also used by Chromium WebGPU's Dawn shader compiler:
802
- /// <https://dawn.googlesource.com/dawn/+/a37557db581c2b60fb1cd2c01abdb232927dd961/src/tint/lang/msl/writer/printer/printer.cc#222>
803
- fn emit_force_bounded_loop_macro ( & mut self ) -> BackendResult {
804
- if !self . force_bounded_loop_macro_name . is_empty ( ) {
805
- return Ok ( ( ) ) ;
798
+ /// <https://dawn.googlesource.com/dawn/+/d9e2d1f718678ebee0728b999830576c410cce0a/src/tint/lang/core/ir/transform/prevent_infinite_loops.cc>
799
+ fn gen_force_bounded_loop_statements (
800
+ & mut self ,
801
+ level : back:: Level ,
802
+ context : & StatementContext ,
803
+ ) -> Option < ( String , String ) > {
804
+ if !context. expression . force_loop_bounding {
805
+ return None ;
806
806
}
807
807
808
- self . force_bounded_loop_macro_name = self . namer . call ( "LOOP_IS_BOUNDED" ) ;
809
- let loop_bounded_volatile_name = self . namer . call ( "unpredictable_break_from_loop" ) ;
810
- writeln ! (
811
- self . out,
812
- "#define {} {{ volatile bool {} = false; if ({}) break; }}" ,
813
- self . force_bounded_loop_macro_name,
814
- loop_bounded_volatile_name,
815
- loop_bounded_volatile_name,
816
- ) ?;
808
+ let loop_bound_name = self . namer . call ( "loop_bound" ) ;
809
+ let decl = format ! ( "{level}uint2 {loop_bound_name} = uint2(0u);" ) ;
810
+ let level = level. next ( ) ;
811
+ let max = u32:: MAX ;
812
+ let break_and_inc = format ! (
813
+ "{level}if ({NAMESPACE}::all({loop_bound_name} == uint2({max}u))) {{ break; }}
814
+ {level}{loop_bound_name} += uint2({loop_bound_name}.y == {max}u, 1u);"
815
+ ) ;
817
816
818
- Ok ( ( ) )
817
+ Some ( ( decl , break_and_inc ) )
819
818
}
820
819
821
820
fn put_call_parameters (
@@ -3201,10 +3200,23 @@ impl<W: Write> Writer<W> {
3201
3200
ref continuing,
3202
3201
break_if,
3203
3202
} => {
3204
- if !continuing. is_empty ( ) || break_if. is_some ( ) {
3205
- let gate_name = self . namer . call ( "loop_init" ) ;
3203
+ let force_loop_bound_statements =
3204
+ self . gen_force_bounded_loop_statements ( level, context) ;
3205
+ let gate_name = ( !continuing. is_empty ( ) || break_if. is_some ( ) )
3206
+ . then ( || self . namer . call ( "loop_init" ) ) ;
3207
+
3208
+ if let Some ( ( ref decl, _) ) = force_loop_bound_statements {
3209
+ writeln ! ( self . out, "{decl}" ) ?;
3210
+ }
3211
+ if let Some ( ref gate_name) = gate_name {
3206
3212
writeln ! ( self . out, "{level}bool {gate_name} = true;" ) ?;
3207
- writeln ! ( self . out, "{level}while(true) {{" , ) ?;
3213
+ }
3214
+
3215
+ writeln ! ( self . out, "{level}while(true) {{" , ) ?;
3216
+ if let Some ( ( _, ref break_and_inc) ) = force_loop_bound_statements {
3217
+ writeln ! ( self . out, "{break_and_inc}" ) ?;
3218
+ }
3219
+ if let Some ( ref gate_name) = gate_name {
3208
3220
let lif = level. next ( ) ;
3209
3221
let lcontinuing = lif. next ( ) ;
3210
3222
writeln ! ( self . out, "{lif}if (!{gate_name}) {{" ) ?;
@@ -3218,19 +3230,9 @@ impl<W: Write> Writer<W> {
3218
3230
}
3219
3231
writeln ! ( self . out, "{lif}}}" ) ?;
3220
3232
writeln ! ( self . out, "{lif}{gate_name} = false;" ) ?;
3221
- } else {
3222
- writeln ! ( self . out, "{level}while(true) {{" , ) ?;
3223
3233
}
3224
3234
self . put_block ( level. next ( ) , body, context) ?;
3225
- if context. expression . force_loop_bounding {
3226
- self . emit_force_bounded_loop_macro ( ) ?;
3227
- writeln ! (
3228
- self . out,
3229
- "{}{}" ,
3230
- level. next( ) ,
3231
- self . force_bounded_loop_macro_name
3232
- ) ?;
3233
- }
3235
+
3234
3236
writeln ! ( self . out, "{level}}}" ) ?;
3235
3237
}
3236
3238
crate :: Statement :: Break => {
@@ -3724,7 +3726,6 @@ impl<W: Write> Writer<W> {
3724
3726
& [ CLAMPED_LOD_LOAD_PREFIX ] ,
3725
3727
& mut self . names ,
3726
3728
) ;
3727
- self . force_bounded_loop_macro_name . clear ( ) ;
3728
3729
self . struct_member_pads . clear ( ) ;
3729
3730
3730
3731
writeln ! (
0 commit comments