Skip to content

[Perf] Windows/x86: 6 Improvements on 3/13/2023 6:11:08 PM #14242

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
performanceautofiler bot opened this issue Mar 21, 2023 · 2 comments
Closed

[Perf] Windows/x86: 6 Improvements on 3/13/2023 6:11:08 PM #14242

performanceautofiler bot opened this issue Mar 21, 2023 · 2 comments

Comments

@performanceautofiler
Copy link

performanceautofiler bot commented Mar 21, 2023

Run Information

Name Value
Architecture x86
OS Windows 10.0.18362
Queue TigerWindows
Baseline 7e3bf878c105ea6114e56d3e6e3014c5480cb060
Compare 1c8d37af80667daffb3cb80ce0fe915621e8f039
Diff Diff
Configs CompliationMode:tiered, RunKind:micro

Improvements in System.Buffers.Text.Tests.Utf8ParserTests

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
TryParseSingle - Duration of single invocation 119.96 ns 109.42 ns 0.91 0.00 False Trace Trace
TryParseSingle - Duration of single invocation 119.45 ns 108.59 ns 0.91 0.00 False Trace Trace
TryParseDouble - Duration of single invocation 155.32 ns 144.82 ns 0.93 0.00 False Trace Trace

graph
graph
graph
Test Report

Repro

General Docs link: https://github.com/dotnet/performance/blob/main/docs/benchmarking-workflow-dotnet-runtime.md

Payloads

Baseline
Compare

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'System.Buffers.Text.Tests.Utf8ParserTests*'

Payloads

Baseline
Compare

Histogram

System.Buffers.Text.Tests.Utf8ParserTests.TryParseSingle(value: -3.4028235E+38)


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 109.41553414063989 < 113.99170304524004.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/21/2023 3:33:39 AM falls between 3/11/2023 9:11:51 PM and 3/21/2023 3:33:39 AM.
IsImprovementStdDev: Marked as improvement because 70.29001117991471 (T) = (0 -108.45736527964182) / Math.Sqrt((1.132660825972663 / (299)) + (0.999022431739063 / (40))) is greater than 1.9670282846691574 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (40) - 2, .975) and 0.09903013779840154 = (120.37846084509063 - 108.45736527964182) / 120.37846084509063 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline BDN Disasm

```assembly
; System.Buffers.Text.Tests.Utf8ParserTests.TryParseSingle(Utf8TestCase)
       push      ebp
       mov       ebp,esp
       sub       esp,8
       xor       eax,eax
       mov       [ebp-4],eax
       mov       [ebp-8],eax
       mov       ecx,[edx+4]
       test      ecx,ecx
       je        short M00_L01
       lea       edx,[ecx+8]
       mov       eax,[ecx+4]
M00_L00:
       push      eax
       push      edx
       push      0
       lea       ecx,[ebp-4]
       lea       edx,[ebp-8]
       call      dword ptr ds:[0B8AD3D8]; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       mov       esp,ebp
       pop       ebp
       ret
M00_L01:
       xor       edx,edx
       xor       eax,eax
       jmp       short M00_L00
; Total bytes of code 53
; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,20
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       vmovdqu   xmmword ptr [ebp-20],xmm4
       xor       eax,eax
       mov       [ebp-10],eax
       mov       [ebp-0C],esp
       mov       dword ptr [ebp-24],90004D49
       mov       edi,ecx
       mov       esi,edx
       test      [esp],esp
       sub       esp,74
       lea       ecx,[esp]
       mov       [ebp-0C],esp
       mov       byte ptr [ebp-16],3
       mov       [ebp-14],ecx
       mov       dword ptr [ebp-10],72
       lea       ecx,[ebp-14]
       cmp       dword ptr [ecx+4],0
       jbe       near ptr M01_L09
       mov       ecx,[ecx]
       mov       byte ptr [ecx],0
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       movzx     ecx,word ptr [ebp+8]
       push      ecx
       lea       ecx,[ebp-20]
       mov       edx,esi
       call      dword ptr ds:[0B8AD408]; System.Buffers.Text.Utf8Parser.TryParseNormalAsFloatingPoint(System.ReadOnlySpan`1<Byte>, NumberBuffer ByRef, Int32 ByRef, Char)
       test      eax,eax
       je        short M01_L07
       cmp       dword ptr [ebp-20],0
       je        short M01_L00
       mov       ecx,[ebp-1C]
       cmp       ecx,0FFFFFFD3
       jge       short M01_L01
M01_L00:
       vxorps    xmm0,xmm0,xmm0
       jmp       short M01_L03
M01_L01:
       cmp       ecx,27
       jle       short M01_L02
       vmovss    xmm0,dword ptr ds:[0B8B2110]
       jmp       short M01_L03
M01_L02:
       lea       ecx,[ebp-20]
       mov       edx,8718388
       call      dword ptr ds:[8D4DDF8]; System.Number.NumberToSingleFloatingPointBits(NumberBuffer ByRef, FloatingPointInfo ByRef)
       vmovd     xmm0,eax
M01_L03:
       cmp       byte ptr [ebp-18],0
       jne       short M01_L04
       jmp       short M01_L05
M01_L04:
       vxorps    xmm0,xmm0,ds:[0B8B2120]
M01_L05:
       vmovss    dword ptr [edi],xmm0
       mov       eax,1
       cmp       dword ptr [ebp-24],90004D49
       je        short M01_L06
       call      CORINFO_HELP_FAIL_FAST
M01_L06:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L07:
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       vmovss    xmm0,dword ptr ds:[0B8B2110]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0B8B2130]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0B8B2134]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       mov       ecx,edi
       mov       edx,esi
       call      dword ptr ds:[0B8D3450]
       cmp       dword ptr [ebp-24],90004D49
       je        short M01_L08
       call      CORINFO_HELP_FAIL_FAST
M01_L08:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L09:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 303

Compare BDN Disasm

; System.Buffers.Text.Tests.Utf8ParserTests.TryParseSingle(Utf8TestCase)
       push      ebp
       mov       ebp,esp
       sub       esp,8
       xor       eax,eax
       mov       [ebp-4],eax
       mov       [ebp-8],eax
       mov       ecx,[edx+4]
       test      ecx,ecx
       je        short M00_L01
       lea       edx,[ecx+8]
       mov       eax,[ecx+4]
M00_L00:
       push      eax
       push      edx
       push      0
       lea       ecx,[ebp-4]
       lea       edx,[ebp-8]
       call      dword ptr ds:[0B73D3D8]; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       mov       esp,ebp
       pop       ebp
       ret
M00_L01:
       xor       edx,edx
       xor       eax,eax
       jmp       short M00_L00
; Total bytes of code 53
; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,20
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       vmovdqu   xmmword ptr [ebp-20],xmm4
       xor       eax,eax
       mov       [ebp-10],eax
       mov       [ebp-0C],esp
       mov       dword ptr [ebp-24],0D0DE933D
       mov       edi,ecx
       mov       esi,edx
       test      [esp],esp
       sub       esp,74
       lea       ecx,[esp]
       mov       [ebp-0C],esp
       mov       byte ptr [ebp-16],3
       mov       [ebp-14],ecx
       mov       dword ptr [ebp-10],72
       lea       ecx,[ebp-14]
       cmp       dword ptr [ecx+4],0
       jbe       near ptr M01_L09
       mov       ecx,[ecx]
       mov       byte ptr [ecx],0
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       movzx     ecx,word ptr [ebp+8]
       push      ecx
       lea       ecx,[ebp-20]
       mov       edx,esi
       call      dword ptr ds:[0B73D408]; System.Buffers.Text.Utf8Parser.TryParseNormalAsFloatingPoint(System.ReadOnlySpan`1<Byte>, NumberBuffer ByRef, Int32 ByRef, Char)
       test      eax,eax
       je        short M01_L07
       cmp       dword ptr [ebp-20],0
       je        short M01_L00
       mov       ecx,[ebp-1C]
       cmp       ecx,0FFFFFFD3
       jge       short M01_L01
M01_L00:
       vxorps    xmm0,xmm0,xmm0
       jmp       short M01_L03
M01_L01:
       cmp       ecx,27
       jle       short M01_L02
       vmovss    xmm0,dword ptr ds:[0B741DF0]
       jmp       short M01_L03
M01_L02:
       lea       ecx,[ebp-20]
       mov       edx,86182E0
       call      dword ptr ds:[8C4DDF8]; System.Number.NumberToSingleFloatingPointBits(NumberBuffer ByRef, FloatingPointInfo ByRef)
       vmovd     xmm0,eax
M01_L03:
       cmp       byte ptr [ebp-18],0
       jne       short M01_L04
       jmp       short M01_L05
M01_L04:
       vxorps    xmm0,xmm0,ds:[0B741E00]
M01_L05:
       vmovss    dword ptr [edi],xmm0
       mov       eax,1
       cmp       dword ptr [ebp-24],0D0DE933D
       je        short M01_L06
       call      CORINFO_HELP_FAIL_FAST
M01_L06:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L07:
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       vmovss    xmm0,dword ptr ds:[0B741DF0]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0B741E10]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0B741E14]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       mov       ecx,edi
       mov       edx,esi
       call      dword ptr ds:[8D03450]
       cmp       dword ptr [ebp-24],0D0DE933D
       je        short M01_L08
       call      CORINFO_HELP_FAIL_FAST
M01_L08:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L09:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 303

System.Buffers.Text.Tests.Utf8ParserTests.TryParseSingle(value: 3.4028235E+38)


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 108.58721430764844 < 113.58832355683091.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/21/2023 3:33:39 AM falls between 3/11/2023 9:11:51 PM and 3/21/2023 3:33:39 AM.
IsImprovementStdDev: Marked as improvement because 70.92277304974526 (T) = (0 -107.74075207392741) / Math.Sqrt((1.4502663391790787 / (299)) + (0.9508004005151528 / (40))) is greater than 1.9670282846691574 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (40) - 2, .975) and 0.1002045289753794 = (119.73915800136247 - 107.74075207392741) / 119.73915800136247 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline BDN Disasm

```assembly
; System.Buffers.Text.Tests.Utf8ParserTests.TryParseSingle(Utf8TestCase)
       push      ebp
       mov       ebp,esp
       sub       esp,8
       xor       eax,eax
       mov       [ebp-4],eax
       mov       [ebp-8],eax
       mov       ecx,[edx+4]
       test      ecx,ecx
       je        short M00_L01
       lea       edx,[ecx+8]
       mov       eax,[ecx+4]
M00_L00:
       push      eax
       push      edx
       push      0
       lea       ecx,[ebp-4]
       lea       edx,[ebp-8]
       call      dword ptr ds:[0AE5D3D8]; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       mov       esp,ebp
       pop       ebp
       ret
M00_L01:
       xor       edx,edx
       xor       eax,eax
       jmp       short M00_L00
; Total bytes of code 53
; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,20
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       vmovdqu   xmmword ptr [ebp-20],xmm4
       xor       eax,eax
       mov       [ebp-10],eax
       mov       [ebp-0C],esp
       mov       dword ptr [ebp-24],0EDDFF450
       mov       edi,ecx
       mov       esi,edx
       test      [esp],esp
       sub       esp,74
       lea       ecx,[esp]
       mov       [ebp-0C],esp
       mov       byte ptr [ebp-16],3
       mov       [ebp-14],ecx
       mov       dword ptr [ebp-10],72
       lea       ecx,[ebp-14]
       cmp       dword ptr [ecx+4],0
       jbe       near ptr M01_L09
       mov       ecx,[ecx]
       mov       byte ptr [ecx],0
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       movzx     ecx,word ptr [ebp+8]
       push      ecx
       lea       ecx,[ebp-20]
       mov       edx,esi
       call      dword ptr ds:[0AE5D408]; System.Buffers.Text.Utf8Parser.TryParseNormalAsFloatingPoint(System.ReadOnlySpan`1<Byte>, NumberBuffer ByRef, Int32 ByRef, Char)
       test      eax,eax
       je        short M01_L07
       cmp       dword ptr [ebp-20],0
       je        short M01_L00
       mov       ecx,[ebp-1C]
       cmp       ecx,0FFFFFFD3
       jge       short M01_L01
M01_L00:
       vxorps    xmm0,xmm0,xmm0
       jmp       short M01_L03
M01_L01:
       cmp       ecx,27
       jle       short M01_L02
       vmovss    xmm0,dword ptr ds:[0AE62110]
       jmp       short M01_L03
M01_L02:
       lea       ecx,[ebp-20]
       mov       edx,7D38388
       call      dword ptr ds:[836DDF8]; System.Number.NumberToSingleFloatingPointBits(NumberBuffer ByRef, FloatingPointInfo ByRef)
       vmovd     xmm0,eax
M01_L03:
       cmp       byte ptr [ebp-18],0
       jne       short M01_L04
       jmp       short M01_L05
M01_L04:
       vxorps    xmm0,xmm0,ds:[0AE62120]
M01_L05:
       vmovss    dword ptr [edi],xmm0
       mov       eax,1
       cmp       dword ptr [ebp-24],0EDDFF450
       je        short M01_L06
       call      CORINFO_HELP_FAIL_FAST
M01_L06:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L07:
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       vmovss    xmm0,dword ptr ds:[0AE62110]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0AE62130]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0AE62134]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       mov       ecx,edi
       mov       edx,esi
       call      dword ptr ds:[0AE83450]
       cmp       dword ptr [ebp-24],0EDDFF450
       je        short M01_L08
       call      CORINFO_HELP_FAIL_FAST
M01_L08:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L09:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 303

Compare BDN Disasm

; System.Buffers.Text.Tests.Utf8ParserTests.TryParseSingle(Utf8TestCase)
       push      ebp
       mov       ebp,esp
       sub       esp,8
       xor       eax,eax
       mov       [ebp-4],eax
       mov       [ebp-8],eax
       mov       ecx,[edx+4]
       test      ecx,ecx
       je        short M00_L01
       lea       edx,[ecx+8]
       mov       eax,[ecx+4]
M00_L00:
       push      eax
       push      edx
       push      0
       lea       ecx,[ebp-4]
       lea       edx,[ebp-8]
       call      dword ptr ds:[0B85D3D8]; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       mov       esp,ebp
       pop       ebp
       ret
M00_L01:
       xor       edx,edx
       xor       eax,eax
       jmp       short M00_L00
; Total bytes of code 53
; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Single ByRef, Int32 ByRef, Char)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,20
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       vmovdqu   xmmword ptr [ebp-20],xmm4
       xor       eax,eax
       mov       [ebp-10],eax
       mov       [ebp-0C],esp
       mov       dword ptr [ebp-24],0BE02A1E
       mov       edi,ecx
       mov       esi,edx
       test      [esp],esp
       sub       esp,74
       lea       ecx,[esp]
       mov       [ebp-0C],esp
       mov       byte ptr [ebp-16],3
       mov       [ebp-14],ecx
       mov       dword ptr [ebp-10],72
       lea       ecx,[ebp-14]
       cmp       dword ptr [ecx+4],0
       jbe       near ptr M01_L09
       mov       ecx,[ecx]
       mov       byte ptr [ecx],0
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       movzx     ecx,word ptr [ebp+8]
       push      ecx
       lea       ecx,[ebp-20]
       mov       edx,esi
       call      dword ptr ds:[0B85D408]; System.Buffers.Text.Utf8Parser.TryParseNormalAsFloatingPoint(System.ReadOnlySpan`1<Byte>, NumberBuffer ByRef, Int32 ByRef, Char)
       test      eax,eax
       je        short M01_L07
       cmp       dword ptr [ebp-20],0
       je        short M01_L00
       mov       ecx,[ebp-1C]
       cmp       ecx,0FFFFFFD3
       jge       short M01_L01
M01_L00:
       vxorps    xmm0,xmm0,xmm0
       jmp       short M01_L03
M01_L01:
       cmp       ecx,27
       jle       short M01_L02
       vmovss    xmm0,dword ptr ds:[0B861DF0]
       jmp       short M01_L03
M01_L02:
       lea       ecx,[ebp-20]
       mov       edx,86C82E0
       call      dword ptr ds:[8CFDDF8]; System.Number.NumberToSingleFloatingPointBits(NumberBuffer ByRef, FloatingPointInfo ByRef)
       vmovd     xmm0,eax
M01_L03:
       cmp       byte ptr [ebp-18],0
       jne       short M01_L04
       jmp       short M01_L05
M01_L04:
       vxorps    xmm0,xmm0,ds:[0B861E00]
M01_L05:
       vmovss    dword ptr [edi],xmm0
       mov       eax,1
       cmp       dword ptr [ebp-24],0BE02A1E
       je        short M01_L06
       call      CORINFO_HELP_FAIL_FAST
M01_L06:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L07:
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       vmovss    xmm0,dword ptr ds:[0B861DF0]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0B861E10]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       vmovss    xmm0,dword ptr ds:[0B861E14]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       mov       ecx,edi
       mov       edx,esi
       call      dword ptr ds:[8DB3450]
       cmp       dword ptr [ebp-24],0BE02A1E
       je        short M01_L08
       call      CORINFO_HELP_FAIL_FAST
M01_L08:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L09:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 303

System.Buffers.Text.Tests.Utf8ParserTests.TryParseDouble(value: -1.7976931348623157e+308)


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 144.82258224628544 < 147.54377592457058.
IsChangePoint: Marked as a change because one of 2/9/2023 4:27:10 AM, 3/13/2023 2:14:21 PM, 3/21/2023 3:33:39 AM falls between 3/11/2023 9:11:51 PM and 3/21/2023 3:33:39 AM.
IsImprovementStdDev: Marked as improvement because 67.86503950349572 (T) = (0 -144.7772247083128) / Math.Sqrt((6.214194891313082 / (299)) + (0.5046326480682843 / (40))) is greater than 1.9670282846691574 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (40) - 2, .975) and 0.07890717394649131 = (157.1798418283439 - 144.7772247083128) / 157.1798418283439 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline BDN Disasm

```assembly
; System.Buffers.Text.Tests.Utf8ParserTests.TryParseDouble(Utf8TestCase)
       push      ebp
       mov       ebp,esp
       sub       esp,0C
       xor       eax,eax
       mov       [ebp-8],eax
       mov       [ebp-4],eax
       mov       [ebp-0C],eax
       mov       ecx,[edx+4]
       test      ecx,ecx
       je        short M00_L01
       lea       edx,[ecx+8]
       mov       eax,[ecx+4]
M00_L00:
       push      eax
       push      edx
       push      0
       lea       ecx,[ebp-8]
       lea       edx,[ebp-0C]
       call      dword ptr ds:[0B06F138]; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Double ByRef, Int32 ByRef, Char)
       mov       esp,ebp
       pop       ebp
       ret
M00_L01:
       xor       edx,edx
       xor       eax,eax
       jmp       short M00_L00
; Total bytes of code 56
; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Double ByRef, Int32 ByRef, Char)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,24
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       vmovdqu   xmmword ptr [ebp-20],xmm4
       xor       eax,eax
       mov       [ebp-10],eax
       mov       [ebp-0C],esp
       mov       dword ptr [ebp-24],0D9066C05
       mov       edi,ecx
       mov       esi,edx
       test      [esp],esp
       sub       esp,304
       lea       ecx,[esp]
       mov       [ebp-0C],esp
       mov       byte ptr [ebp-16],3
       mov       [ebp-14],ecx
       mov       dword ptr [ebp-10],301
       lea       ecx,[ebp-14]
       cmp       dword ptr [ecx+4],0
       jbe       near ptr M01_L03
       mov       ecx,[ecx]
       mov       byte ptr [ecx],0
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       movzx     ecx,word ptr [ebp+8]
       push      ecx
       lea       ecx,[ebp-20]
       mov       edx,esi
       call      dword ptr ds:[0B06F150]; System.Buffers.Text.Utf8Parser.TryParseNormalAsFloatingPoint(System.ReadOnlySpan`1<Byte>, NumberBuffer ByRef, Int32 ByRef, Char)
       test      eax,eax
       je        short M01_L01
       lea       ecx,[ebp-20]
       call      dword ptr ds:[853F438]; System.Number.NumberToDouble(NumberBuffer ByRef)
       fstp      qword ptr [ebp-2C]
       vmovsd    xmm0,qword ptr [ebp-2C]
       vmovsd    qword ptr [edi],xmm0
       mov       eax,1
       cmp       dword ptr [ebp-24],0D9066C05
       je        short M01_L00
       call      CORINFO_HELP_FAIL_FAST
M01_L00:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L01:
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       vmovsd    xmm0,qword ptr ds:[86D2BD8]
       sub       esp,8
       vmovsd    qword ptr [esp],xmm0
       vmovsd    xmm0,qword ptr ds:[86D2BE0]
       sub       esp,8
       vmovsd    qword ptr [esp],xmm0
       vmovsd    xmm0,qword ptr ds:[86D2BE8]
       sub       esp,8
       vmovsd    qword ptr [esp],xmm0
       mov       ecx,edi
       mov       edx,esi
       call      dword ptr ds:[0B0C50A8]
       cmp       dword ptr [ebp-24],0D9066C05
       je        short M01_L02
       call      CORINFO_HELP_FAIL_FAST
M01_L02:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L03:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 254

Compare BDN Disasm

; System.Buffers.Text.Tests.Utf8ParserTests.TryParseDouble(Utf8TestCase)
       push      ebp
       mov       ebp,esp
       sub       esp,0C
       xor       eax,eax
       mov       [ebp-8],eax
       mov       [ebp-4],eax
       mov       [ebp-0C],eax
       mov       ecx,[edx+4]
       test      ecx,ecx
       je        short M00_L01
       lea       edx,[ecx+8]
       mov       eax,[ecx+4]
M00_L00:
       push      eax
       push      edx
       push      0
       lea       ecx,[ebp-8]
       lea       edx,[ebp-0C]
       call      dword ptr ds:[0B8DF138]; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Double ByRef, Int32 ByRef, Char)
       mov       esp,ebp
       pop       ebp
       ret
M00_L01:
       xor       edx,edx
       xor       eax,eax
       jmp       short M00_L00
; Total bytes of code 56
; System.Buffers.Text.Utf8Parser.TryParse(System.ReadOnlySpan`1<Byte>, Double ByRef, Int32 ByRef, Char)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,24
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       vmovdqu   xmmword ptr [ebp-20],xmm4
       xor       eax,eax
       mov       [ebp-10],eax
       mov       [ebp-0C],esp
       mov       dword ptr [ebp-24],1972E3C2
       mov       edi,ecx
       mov       esi,edx
       test      [esp],esp
       sub       esp,304
       lea       ecx,[esp]
       mov       [ebp-0C],esp
       mov       byte ptr [ebp-16],3
       mov       [ebp-14],ecx
       mov       dword ptr [ebp-10],301
       lea       ecx,[ebp-14]
       cmp       dword ptr [ecx+4],0
       jbe       near ptr M01_L03
       mov       ecx,[ecx]
       mov       byte ptr [ecx],0
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       movzx     ecx,word ptr [ebp+8]
       push      ecx
       lea       ecx,[ebp-20]
       mov       edx,esi
       call      dword ptr ds:[0B8DF150]; System.Buffers.Text.Utf8Parser.TryParseNormalAsFloatingPoint(System.ReadOnlySpan`1<Byte>, NumberBuffer ByRef, Int32 ByRef, Char)
       test      eax,eax
       je        short M01_L01
       lea       ecx,[ebp-20]
       call      dword ptr ds:[8DFF438]; System.Number.NumberToDouble(NumberBuffer ByRef)
       fstp      qword ptr [ebp-2C]
       vmovsd    xmm0,qword ptr [ebp-2C]
       vmovsd    qword ptr [edi],xmm0
       mov       eax,1
       cmp       dword ptr [ebp-24],1972E3C2
       je        short M01_L00
       call      CORINFO_HELP_FAIL_FAST
M01_L00:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L01:
       push      dword ptr [ebp+10]
       push      dword ptr [ebp+0C]
       vmovsd    xmm0,qword ptr ds:[0B8E2A58]
       sub       esp,8
       vmovsd    qword ptr [esp],xmm0
       vmovsd    xmm0,qword ptr ds:[0B8E2A60]
       sub       esp,8
       vmovsd    qword ptr [esp],xmm0
       vmovsd    xmm0,qword ptr ds:[0B8E2A68]
       sub       esp,8
       vmovsd    qword ptr [esp],xmm0
       mov       ecx,edi
       mov       edx,esi
       call      dword ptr ds:[0B9150F0]
       cmp       dword ptr [ebp-24],1972E3C2
       je        short M01_L02
       call      CORINFO_HELP_FAIL_FAST
M01_L02:
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret       0C
M01_L03:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 254

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Run Information

Name Value
Architecture x86
OS Windows 10.0.18362
Queue TigerWindows
Baseline 7e3bf878c105ea6114e56d3e6e3014c5480cb060
Compare 1c8d37af80667daffb3cb80ce0fe915621e8f039
Diff Diff
Configs CompliationMode:tiered, RunKind:micro

Improvements in HardwareIntrinsics.RayTracer.SoA

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
Render - Duration of single invocation 10.25 ms 8.27 ms 0.81 0.07 False Trace Trace

graph
Test Report

Repro

General Docs link: https://github.com/dotnet/performance/blob/main/docs/benchmarking-workflow-dotnet-runtime.md

Payloads

Baseline
Compare

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'HardwareIntrinsics.RayTracer.SoA*'

Payloads

Baseline
Compare

Histogram

HardwareIntrinsics.RayTracer.SoA.Render


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 8.269010107526881 < 9.82156054347826.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/21/2023 3:33:39 AM falls between 3/11/2023 9:11:51 PM and 3/21/2023 3:33:39 AM.
IsImprovementStdDev: Marked as improvement because 74.56907915483032 (T) = (0 -8207030.911006687) / Math.Sqrt((40514341296.62523 / (299)) + (22999639214.799816 / (40))) is greater than 1.9670282846691574 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (40) - 2, .975) and 0.1949685928474675 = (10194671.708568096 - 8207030.911006687) / 10194671.708568096 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline BDN Disasm

```assembly
; HardwareIntrinsics.RayTracer.SoA.Render()
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       push      ebx
       sub       esp,2C
       vzeroupper
       xor       eax,eax
       mov       [ebp-30],eax
       mov       esi,ecx
       mov       ecx,offset MT_HardwareIntrinsics.RayTracer.Packet256Tracer
       call      CORINFO_HELP_NEWSFAST
       mov       edi,eax
       push      0F8
       mov       ecx,edi
       mov       edx,0F8
       call      dword ptr ds:[0B5AB690]; HardwareIntrinsics.RayTracer.Packet256Tracer..ctor(Int32, Int32)
       mov       ebx,[edi+4]
       mov       eax,[ebx+4]
       cmp       dword ptr [eax+4],0
       jbe       near ptr M00_L05
       mov       edx,[eax+8]
       mov       eax,edx
       mov       [ebp-34],eax
       test      eax,eax
       je        short M00_L00
       mov       ecx,offset MT_HardwareIntrinsics.RayTracer.SpherePacket256
       cmp       [eax],ecx
       jne       near ptr M00_L04
M00_L00:
       vmovups   ymm0,[eax+84]
       vmovups   [ebp-2C],ymm0
       vmovups   [eax+44],ymm0
       vmovss    xmm0,dword ptr ds:[0B587980]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       call      System.MathF.Sin(Single)
       fstp      dword ptr [ebp-38]
       vmovss    xmm0,dword ptr [ebp-38]
       vandps    xmm0,xmm0,ds:[0B587990]
       vmulss    xmm0,xmm0,dword ptr ds:[0B5879A0]
       vbroadcastss ymm0,xmm0
       vmovups   ymm1,[ebp-2C]
       vaddps    ymm0,ymm1,ymm0
       mov       ecx,[ebp-34]
       vmovups   [ecx+44],ymm0
       mov       ecx,[esi+4]
       mov       [ebp-30],ecx
       test      ecx,ecx
       je        short M00_L01
       mov       ecx,[ebp-30]
       cmp       dword ptr [ecx+4],0
       jne       short M00_L02
M00_L01:
       xor       ecx,ecx
       jmp       short M00_L03
M00_L02:
       mov       ecx,[ebp-30]
       cmp       dword ptr [ecx+4],0
       jbe       short M00_L05
       mov       ecx,[ebp-30]
       add       ecx,8
M00_L03:
       push      ecx
       mov       ecx,edi
       mov       edx,ebx
       call      dword ptr ds:[0B5AB6A8]; HardwareIntrinsics.RayTracer.Packet256Tracer.RenderVectorized(HardwareIntrinsics.RayTracer.Scene, Int32*)
       xor       ecx,ecx
       mov       [ebp-30],ecx
       vzeroupper
       lea       esp,[ebp-0C]
       pop       ebx
       pop       esi
       pop       edi
       pop       ebp
       ret
M00_L04:
       call      dword ptr ds:[844B8B8]
       int       3
M00_L05:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 253
; HardwareIntrinsics.RayTracer.Packet256Tracer..ctor(Int32, Int32)
       push      edi
       push      esi
       mov       edi,ecx
       mov       esi,edx
       call      dword ptr ds:[0B5AB768]; HardwareIntrinsics.RayTracer.Packet256Tracer.CreateDefaultScene()
       lea       edx,[edi+4]
       call      CORINFO_HELP_ASSIGN_REF_EAX
       test      esi,7
       je        short M01_L00
       mov       eax,esi
       sar       eax,1F
       and       eax,7
       add       eax,esi
       and       eax,0FFFFFFF8
       mov       edx,esi
       sub       edx,eax
       mov       eax,edx
       neg       eax
       lea       esi,[eax+esi+8]
M01_L00:
       mov       [edi+8],esi
       mov       esi,[esp+0C]
       mov       [edi+0C],esi
       pop       esi
       pop       edi
       ret       4
; Total bytes of code 68
; HardwareIntrinsics.RayTracer.Packet256Tracer.RenderVectorized(HardwareIntrinsics.RayTracer.Scene, Int32*)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       push      ebx
       sub       esp,158
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       mov       eax,0FFFFFEE0
M02_L00:
       vmovdqu   xmmword ptr [ebp+eax-18],xmm4
       vmovdqu   xmmword ptr [ebp+eax-8],xmm4
       vmovdqu   xmmword ptr [ebp+eax+8],xmm4
       add       eax,30
       jne       short M02_L00
       mov       ebx,ecx
       mov       esi,edx
       mov       eax,[esi+0C]
       mov       [ebp-164],eax
       xor       edx,edx
       xor       ecx,ecx
       cmp       dword ptr [ebx+0C],0
       jle       near ptr M02_L04
M02_L01:
       mov       edx,[ebx+8]
       mov       [ebp-15C],edx
       mov       edi,ecx
       imul      edi,edx
       mov       [ebp-14],edi
       xor       edx,edx
       mov       [ebp-18],edx
       cmp       dword ptr [ebp-15C],0
       jle       near ptr M02_L03
       mov       [ebp-10],ecx
       mov       ecx,0ACC5DB8
       mov       edx,0C6
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       vxorps    xmm0,xmm0,xmm0
       vcvtsi2ss xmm0,xmm0,dword ptr [ebp-10]
       vbroadcastss ymm0,xmm0
       vmovups   [ebp-158],ymm0
       mov       [ebp-160],esi
M02_L02:
       vxorps    xmm1,xmm1,xmm1
       vcvtsi2ss xmm1,xmm1,dword ptr [ebp-18]
       vbroadcastss ymm1,xmm1
       vaddps    ymm1,ymm1,ds:[8488400]
       sub       esp,20
       vmovups   [esp],ymm1
       sub       esp,20
       vmovups   [esp],ymm0
       push      dword ptr [ebp-164]
       lea       edx,[ebp-78]
       mov       ecx,ebx
       call      dword ptr ds:[0B5AB750]; HardwareIntrinsics.RayTracer.Packet256Tracer.GetPoints(System.Runtime.Intrinsics.Vector256`1<Single>, System.Runtime.Intrinsics.Vector256`1<Single>, HardwareIntrinsics.RayTracer.Camera)
       mov       ecx,offset MT_HardwareIntrinsics.RayTracer.RayPacket256
       call      CORINFO_HELP_NEWSFAST
       mov       edx,[ebp-164]
       lea       ecx,[edx+4]
       vmovups   ymm0,[ecx]
       vmovups   ymm1,[ecx+20]
       vmovups   ymm2,[ecx+40]
       lea       ecx,[eax+4]
       vmovups   [ecx],ymm0
       vmovups   [ecx+20],ymm1
       vmovups   [ecx+40],ymm2
       lea       edi,[eax+64]
       lea       esi,[ebp-78]
       mov       ecx,60
       rep movsb
       push      eax
       mov       esi,[ebp-160]
       push      esi
       push      0
       lea       edx,[ebp-0D8]
       mov       ecx,ebx
       call      dword ptr ds:[0B5AB6C0]; HardwareIntrinsics.RayTracer.Packet256Tracer.TraceRay(HardwareIntrinsics.RayTracer.RayPacket256, HardwareIntrinsics.RayTracer.Scene, Int32)
       vmovups   ymm0,[ebp-0D8]
       vmovups   ymm1,[ebp-0B8]
       vshufps   ymm0,ymm0,ymm1,88
       vmovups   ymm1,[ebp-0B8]
       vmovups   ymm2,[ebp-98]
       vshufps   ymm1,ymm1,ymm2,0DD
       vmovups   ymm2,[ebp-98]
       vmovups   ymm3,[ebp-0D8]
       vshufps   ymm2,ymm2,ymm3,0D8
       vshufps   ymm3,ymm0,ymm2,88
       vshufps   ymm0,ymm1,ymm0,0D8
       vshufps   ymm1,ymm2,ymm1,0DD
       vmovaps   ymm2,ymm3
       vmovaps   ymm4,ymm0
       vmovaps   ymm5,ymm1
       vextractf128 xmm3,ymm3,1
       vextractf128 xmm0,ymm0,1
       vextractf128 xmm1,ymm1,1
       vinsertf128 ymm2,ymm2,xmm4,1
       vinsertf128 ymm3,ymm5,xmm3,1
       vinsertf128 ymm0,ymm0,xmm1,1
       sub       esp,20
       vmovups   [esp],ymm0
       sub       esp,20
       vmovups   [esp],ymm3
       sub       esp,20
       vmovups   [esp],ymm2
       lea       ecx,[ebp-138]
       call      dword ptr ds:[0B5ADB10]; HardwareIntrinsics.RayTracer.ColorPacket256Helper.ConvertToIntRGB(HardwareIntrinsics.RayTracer.VectorPacket256)
       mov       edi,[ebp-18]
       mov       eax,[ebp-14]
       lea       edx,[edi+eax]
       lea       edx,[edx+edx*2]
       mov       ecx,[ebp+8]
       lea       edx,[ecx+edx*4]
       vmovups   ymm0,[ebp-138]
       vmovups   [edx],ymm0
       vmovups   ymm0,[ebp-118]
       vmovups   [edx+20],ymm0
       vmovups   ymm0,[ebp-0F8]
       vmovups   [edx+40],ymm0
       add       edi,8
       cmp       edi,[ebx+8]
       mov       [ebp-18],edi
       mov       [ebp-14],eax
       mov       [ebp+8],ecx
       vmovups   ymm0,[ebp-158]
       jl        short M02_L05
       mov       eax,[ebp-164]
       mov       ecx,[ebp-10]
M02_L03:
       inc       ecx
       cmp       ecx,[ebx+0C]
       jl        near ptr M02_L01
M02_L04:
       vzeroupper
       lea       esp,[ebp-0C]
       pop       ebx
       pop       esi
       pop       edi
       pop       ebp
       ret       4
M02_L05:
       mov       [ebp-160],esi
       jmp       near ptr M02_L02
; Total bytes of code 583

Compare BDN Disasm

; HardwareIntrinsics.RayTracer.SoA.Render()
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       push      ebx
       sub       esp,2C
       vzeroupper
       xor       eax,eax
       mov       [ebp-30],eax
       mov       esi,ecx
       mov       ecx,offset MT_HardwareIntrinsics.RayTracer.Packet256Tracer
       call      CORINFO_HELP_NEWSFAST
       mov       edi,eax
       push      0F8
       mov       ecx,edi
       mov       edx,0F8
       call      dword ptr ds:[0BC5D690]; HardwareIntrinsics.RayTracer.Packet256Tracer..ctor(Int32, Int32)
       mov       ebx,[edi+4]
       mov       eax,[ebx+4]
       cmp       dword ptr [eax+4],0
       jbe       near ptr M00_L05
       mov       edx,[eax+8]
       mov       eax,edx
       mov       [ebp-34],eax
       test      eax,eax
       je        short M00_L00
       mov       ecx,offset MT_HardwareIntrinsics.RayTracer.SpherePacket256
       cmp       [eax],ecx
       jne       near ptr M00_L04
M00_L00:
       vmovups   ymm0,[eax+84]
       vmovups   [ebp-2C],ymm0
       vmovups   [eax+44],ymm0
       vmovss    xmm0,dword ptr ds:[0BC67C50]
       sub       esp,4
       vmovss    dword ptr [esp],xmm0
       call      System.MathF.Sin(Single)
       fstp      dword ptr [ebp-38]
       vmovss    xmm0,dword ptr [ebp-38]
       vandps    xmm0,xmm0,ds:[0BC67C60]
       vmulss    xmm0,xmm0,dword ptr ds:[0BC67C70]
       vbroadcastss ymm0,xmm0
       vmovups   ymm1,[ebp-2C]
       vaddps    ymm0,ymm1,ymm0
       mov       ecx,[ebp-34]
       vmovups   [ecx+44],ymm0
       mov       ecx,[esi+4]
       mov       [ebp-30],ecx
       test      ecx,ecx
       je        short M00_L01
       mov       ecx,[ebp-30]
       cmp       dword ptr [ecx+4],0
       jne       short M00_L02
M00_L01:
       xor       ecx,ecx
       jmp       short M00_L03
M00_L02:
       mov       ecx,[ebp-30]
       cmp       dword ptr [ecx+4],0
       jbe       short M00_L05
       mov       ecx,[ebp-30]
       add       ecx,8
M00_L03:
       push      ecx
       mov       ecx,edi
       mov       edx,ebx
       call      dword ptr ds:[0BC5D6A8]; HardwareIntrinsics.RayTracer.Packet256Tracer.RenderVectorized(HardwareIntrinsics.RayTracer.Scene, Int32*)
       xor       ecx,ecx
       mov       [ebp-30],ecx
       vzeroupper
       lea       esp,[ebp-0C]
       pop       ebx
       pop       esi
       pop       edi
       pop       ebp
       ret
M00_L04:
       call      dword ptr ds:[568B8B8]
       int       3
M00_L05:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 253
; HardwareIntrinsics.RayTracer.Packet256Tracer..ctor(Int32, Int32)
       push      edi
       push      esi
       mov       edi,ecx
       mov       esi,edx
       call      dword ptr ds:[0BC5D768]; HardwareIntrinsics.RayTracer.Packet256Tracer.CreateDefaultScene()
       lea       edx,[edi+4]
       call      CORINFO_HELP_ASSIGN_REF_EAX
       test      esi,7
       je        short M01_L00
       mov       eax,esi
       sar       eax,1F
       and       eax,7
       add       eax,esi
       and       eax,0FFFFFFF8
       mov       edx,esi
       sub       edx,eax
       mov       eax,edx
       neg       eax
       lea       esi,[eax+esi+8]
M01_L00:
       mov       [edi+8],esi
       mov       esi,[esp+0C]
       mov       [edi+0C],esi
       pop       esi
       pop       edi
       ret       4
; Total bytes of code 68
; HardwareIntrinsics.RayTracer.Packet256Tracer.RenderVectorized(HardwareIntrinsics.RayTracer.Scene, Int32*)
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       push      ebx
       sub       esp,150
       vzeroupper
       vxorps    xmm4,xmm4,xmm4
       mov       eax,0FFFFFEE0
M02_L00:
       vmovdqu   xmmword ptr [ebp+eax-14],xmm4
       vmovdqu   xmmword ptr [ebp+eax-4],xmm4
       vmovdqu   xmmword ptr [ebp+eax+0C],xmm4
       add       eax,30
       jne       short M02_L00
       mov       esi,ecx
       mov       edi,edx
       mov       eax,[edi+0C]
       mov       [ebp-15C],eax
       xor       ecx,ecx
       cmp       dword ptr [esi+0C],0
       jle       near ptr M02_L04
M02_L01:
       mov       edx,[esi+8]
       mov       [ebp-158],edx
       mov       ebx,ecx
       imul      ebx,edx
       xor       edx,edx
       mov       [ebp-14],edx
       cmp       dword ptr [ebp-158],0
       jle       near ptr M02_L03
       mov       [ebp-10],ecx
       mov       ecx,0B375F10
       mov       edx,0C6
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       vxorps    xmm0,xmm0,xmm0
       vcvtsi2ss xmm0,xmm0,dword ptr [ebp-10]
       vbroadcastss ymm0,xmm0
       vmovups   [ebp-154],ymm0
M02_L02:
       vxorps    xmm1,xmm1,xmm1
       vcvtsi2ss xmm1,xmm1,dword ptr [ebp-14]
       vbroadcastss ymm1,xmm1
       vaddps    ymm1,ymm1,ds:[8BA8358]
       sub       esp,20
       vmovups   [esp],ymm1
       sub       esp,20
       vmovups   [esp],ymm0
       push      dword ptr [ebp-15C]
       lea       edx,[ebp-74]
       mov       ecx,esi
       call      dword ptr ds:[0BC5D750]; HardwareIntrinsics.RayTracer.Packet256Tracer.GetPoints(System.Runtime.Intrinsics.Vector256`1<Single>, System.Runtime.Intrinsics.Vector256`1<Single>, HardwareIntrinsics.RayTracer.Camera)
       mov       ecx,offset MT_HardwareIntrinsics.RayTracer.RayPacket256
       call      CORINFO_HELP_NEWSFAST
       mov       ecx,[ebp-15C]
       lea       edx,[ecx+4]
       vmovups   ymm0,[edx]
       vmovups   ymm1,[edx+20]
       vmovups   ymm2,[edx+40]
       lea       edx,[eax+4]
       vmovups   [edx],ymm0
       vmovups   [edx+20],ymm1
       vmovups   [edx+40],ymm2
       vmovdqu   ymm0,ymmword ptr [ebp-74]
       vmovdqu   ymmword ptr [eax+64],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-54]
       vmovdqu   ymmword ptr [eax+84],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-34]
       vmovdqu   ymmword ptr [eax+0A4],ymm0
       push      eax
       push      edi
       push      0
       lea       edx,[ebp-0D4]
       mov       ecx,esi
       call      dword ptr ds:[0BC5D6C0]; HardwareIntrinsics.RayTracer.Packet256Tracer.TraceRay(HardwareIntrinsics.RayTracer.RayPacket256, HardwareIntrinsics.RayTracer.Scene, Int32)
       vmovups   ymm0,[ebp-0D4]
       vmovups   ymm1,[ebp-0B4]
       vshufps   ymm0,ymm0,ymm1,88
       vmovups   ymm1,[ebp-0B4]
       vmovups   ymm2,[ebp-94]
       vshufps   ymm1,ymm1,ymm2,0DD
       vmovups   ymm2,[ebp-94]
       vmovups   ymm3,[ebp-0D4]
       vshufps   ymm2,ymm2,ymm3,0D8
       vshufps   ymm3,ymm0,ymm2,88
       vshufps   ymm0,ymm1,ymm0,0D8
       vshufps   ymm1,ymm2,ymm1,0DD
       vmovaps   ymm2,ymm3
       vmovaps   ymm4,ymm0
       vmovaps   ymm5,ymm1
       vextractf128 xmm3,ymm3,1
       vextractf128 xmm0,ymm0,1
       vextractf128 xmm1,ymm1,1
       vinsertf128 ymm2,ymm2,xmm4,1
       vinsertf128 ymm3,ymm5,xmm3,1
       vinsertf128 ymm0,ymm0,xmm1,1
       sub       esp,20
       vmovups   [esp],ymm0
       sub       esp,20
       vmovups   [esp],ymm3
       sub       esp,20
       vmovups   [esp],ymm2
       lea       ecx,[ebp-134]
       call      dword ptr ds:[0BC5FB10]; HardwareIntrinsics.RayTracer.ColorPacket256Helper.ConvertToIntRGB(HardwareIntrinsics.RayTracer.VectorPacket256)
       mov       ecx,[ebp-14]
       lea       eax,[ecx+ebx]
       lea       eax,[eax+eax*2]
       mov       edx,[ebp+8]
       lea       eax,[edx+eax*4]
       vmovups   ymm0,[ebp-134]
       vmovups   [eax],ymm0
       vmovups   ymm0,[ebp-114]
       vmovups   [eax+20],ymm0
       vmovups   ymm0,[ebp-0F4]
       vmovups   [eax+40],ymm0
       add       ecx,8
       cmp       ecx,[esi+8]
       mov       [ebp-14],ecx
       mov       [ebp+8],edx
       vmovups   ymm0,[ebp-154]
       jl        near ptr M02_L02
       mov       eax,[ebp-15C]
       mov       ecx,[ebp-10]
M02_L03:
       inc       ecx
       cmp       ecx,[esi+0C]
       jl        near ptr M02_L01
M02_L04:
       vzeroupper
       lea       esp,[ebp-0C]
       pop       ebx
       pop       esi
       pop       edi
       pop       ebp
       ret       4
; Total bytes of code 576

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

@performanceautofiler
Copy link
Author

performanceautofiler bot commented Mar 21, 2023

Run Information

Name Value
Architecture x86
OS Windows 10.0.18362
Queue TigerWindows
Baseline 7e3bf878c105ea6114e56d3e6e3014c5480cb060
Compare 1c8d37af80667daffb3cb80ce0fe915621e8f039
Diff Diff
Configs CompliationMode:tiered, RunKind:micro

Improvements in StoreBlock.LocalAddress

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
CopyBlock128 - Duration of single invocation 11.17 ns 1.90 ns 0.17 0.00 False Trace Trace

graph
Test Report

Repro

General Docs link: https://github.com/dotnet/performance/blob/main/docs/benchmarking-workflow-dotnet-runtime.md

Payloads

Baseline
Compare

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'StoreBlock.LocalAddress*'

Payloads

Baseline
Compare

Histogram

StoreBlock.LocalAddress.CopyBlock128


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 1.8958149384720073 < 10.616326453874086.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/21/2023 3:33:39 AM falls between 3/11/2023 9:11:51 PM and 3/21/2023 3:33:39 AM.
IsImprovementStdDev: Marked as improvement because 6702.799183344415 (T) = (0 -1.8990353615283921) / Math.Sqrt((0.00045510608243344824 / (299)) + (1.594769733798873E-05 / (40))) is greater than 1.9670282846691574 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (40) - 2, .975) and 0.8302706335752279 = (11.188608085508223 - 1.8990353615283921) / 11.188608085508223 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline BDN Disasm

```assembly
; StoreBlock.LocalAddress.CopyBlock128()
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       sub       esp,100
       lea       eax,[ecx+7C]
       lea       edi,[ebp-88]
       mov       esi,eax
       mov       ecx,80
       rep movsb
       xor       edx,edx
       nop
M00_L00:
       lea       edi,[ebp-108]
       lea       esi,[ebp-88]
       mov       ecx,80
       rep movsb
       inc       edx
       cmp       edx,64
       jl        short M00_L00
       mov       edi,eax
       lea       esi,[ebp-108]
       mov       ecx,80
       rep movsb
       lea       esp,[ebp-8]
       pop       esi
       pop       edi
       pop       ebp
       ret
; Total bytes of code 79

Compare BDN Disasm

; StoreBlock.LocalAddress.CopyBlock128()
       push      ebp
       mov       ebp,esp
       sub       esp,100
       vzeroupper
       add       ecx,7C
       vmovdqu   ymm0,ymmword ptr [ecx]
       vmovdqu   ymmword ptr [ebp-80],ymm0
       vmovdqu   ymm0,ymmword ptr [ecx+20]
       vmovdqu   ymmword ptr [ebp-60],ymm0
       vmovdqu   ymm0,ymmword ptr [ecx+40]
       vmovdqu   ymmword ptr [ebp-40],ymm0
       vmovdqu   ymm0,ymmword ptr [ecx+60]
       vmovdqu   ymmword ptr [ebp-20],ymm0
       xor       eax,eax
       nop
       nop
       nop
       nop
       nop
       nop
       nop
       nop
M00_L00:
       vmovdqu   ymm0,ymmword ptr [ebp-80]
       vmovdqu   ymmword ptr [ebp-100],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-60]
       vmovdqu   ymmword ptr [ebp-0E0],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-40]
       vmovdqu   ymmword ptr [ebp-0C0],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-20]
       vmovdqu   ymmword ptr [ebp-0A0],ymm0
       inc       eax
       cmp       eax,64
       jl        short M00_L00
       vmovdqu   ymm0,ymmword ptr [ebp-100]
       vmovdqu   ymmword ptr [ecx],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-0E0]
       vmovdqu   ymmword ptr [ecx+20],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-0C0]
       vmovdqu   ymmword ptr [ecx+40],ymm0
       vmovdqu   ymm0,ymmword ptr [ebp-0A0]
       vmovdqu   ymmword ptr [ecx+60],ymm0
       mov       esp,ebp
       pop       ebp
       ret
; Total bytes of code 177

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Run Information

Name Value
Architecture x86
OS Windows 10.0.18362
Queue TigerWindows
Baseline 7e3bf878c105ea6114e56d3e6e3014c5480cb060
Compare 1c8d37af80667daffb3cb80ce0fe915621e8f039
Diff Diff
Configs CompliationMode:tiered, RunKind:micro

Improvements in StoreBlock.AnyLocation

Benchmark Baseline Test Test/Base Test Quality Edge Detector Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
CopyBlock128 - Duration of single invocation 12.29 ns 3.07 ns 0.25 0.04 False 13.918682587270409 19.212804265486486 1.3803608312081141) Trace Trace

graph
Test Report

Repro

General Docs link: https://github.com/dotnet/performance/blob/main/docs/benchmarking-workflow-dotnet-runtime.md

Payloads

Baseline
Compare

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f net8.0 --filter 'StoreBlock.AnyLocation*'

Payloads

Baseline
Compare

Histogram

StoreBlock.AnyLocation.CopyBlock128


Description of detection logic

IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsRegressionBase: Marked as not a regression because the compare was not 5% greater than the baseline, or the value was too small.
IsImprovementBase: Marked as improvement because the compare was 5% less than the baseline, and the value was not too small.
IsImprovementCheck: Marked as improvement because the three check build points were 0.05 less than the baseline.
IsImprovementWindowed:Marked as improvement because 3.0726536154038437 < 11.67212581849152.
IsChangePoint: Marked as a change because one of 3/13/2023 2:14:21 PM, 3/21/2023 3:33:39 AM falls between 3/11/2023 9:11:51 PM and 3/21/2023 3:33:39 AM.
IsImprovementStdDev: Marked as improvement because 203.51225935804538 (T) = (0 -3.0829169369888465) / Math.Sqrt((0.0017321750189426277 / (299)) + (0.08118575251602794 / (40))) is greater than 1.9670282846691574 = MathNet.Numerics.Distributions.StudentT.InvCDF(0, 1, (299) + (40) - 2, .975) and 0.7486316497071687 = (12.264539005795301 - 3.0829169369888465) / 12.264539005795301 is greater than 0.05.
IsChangeEdgeDetector: Marked not as a regression because Edge Detector said so.

```### Baseline BDN Disasm

```assembly
; StoreBlock.AnyLocation.CopyBlock128()
       push      ebp
       mov       ebp,esp
       push      edi
       push      esi
       mov       eax,ecx
       xor       edx,edx
M00_L00:
       mov       ecx,[eax+8]
       cmp       edx,[ecx+4]
       jae       short M00_L01
       lea       edi,[ecx+edx+8]
       mov       ecx,[eax+4]
       cmp       edx,[ecx+4]
       jae       short M00_L01
       lea       esi,[ecx+edx+8]
       mov       ecx,80
       rep movsb
       add       edx,80
       cmp       edx,1000
       jl        short M00_L00
       pop       esi
       pop       edi
       pop       ebp
       ret
M00_L01:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 64

Compare BDN Disasm

; StoreBlock.AnyLocation.CopyBlock128()
       push      ebp
       mov       ebp,esp
       push      esi
       vzeroupper
       xor       eax,eax
M00_L00:
       mov       edx,[ecx+8]
       cmp       eax,[edx+4]
       jae       short M00_L01
       lea       edx,[edx+eax+8]
       mov       esi,[ecx+4]
       cmp       eax,[esi+4]
       jae       short M00_L01
       lea       esi,[esi+eax+8]
       vmovdqu   ymm0,ymmword ptr [esi]
       vmovdqu   ymmword ptr [edx],ymm0
       vmovdqu   ymm0,ymmword ptr [esi+20]
       vmovdqu   ymmword ptr [edx+20],ymm0
       vmovdqu   ymm0,ymmword ptr [esi+40]
       vmovdqu   ymmword ptr [edx+40],ymm0
       vmovdqu   ymm0,ymmword ptr [esi+60]
       vmovdqu   ymmword ptr [edx+60],ymm0
       add       eax,80
       cmp       eax,1000
       jl        short M00_L00
       pop       esi
       pop       ebp
       ret
M00_L01:
       call      CORINFO_HELP_RNGCHKFAIL
       int       3
; Total bytes of code 92

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

@kunalspathak
Copy link
Member

dotnet/runtime#83274

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant