Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpu/drcbearm64.cpp: Add disassembled UML comments to logged assembly #13472

Merged
merged 1 commit into from
Mar 13, 2025

Conversation

cuavas
Copy link
Member

@cuavas cuavas commented Mar 13, 2025

If you use -drc_log_native on x86-64, you get output like this, with disassembled UML following the binary representation (before resolving forward labels):

    call 0x18E76A10C42                      ; E836F2FFFF
read32mask:                                 ;                         | handle  read32mask
    short jmp L1                            ; EB..
    lea rsp, [rsp-0x28]                     ; 488D6424D8
L1:
    and ebx, 0x7FFFFFFF                     ; 81E3FFFFFF7F            | and     i0,i0,$7FFFFFFF
    cmp ebx, 0xFF01FFFF                     ; 81FBFFFF01FF            | cmp     i0,$FF01FFFF,ZC
    ja PC$1                                 ; 0F87........            | jmp     $       1,a
    cmp ebx, 0xFF000000                     ; 81FB000000FF            | cmp     i0,$FF000000,C
    jb PC$1                                 ; 0F82........            | jmp     $       1,c
    mov rdx, 0x18D7B027090                  ; 48BA9070027B8D010000    | load    i0,[[$0x18d7b027090]],i0,dword_x1
    movsxd rbx, ebx                         ; 4863DB
    mov ebx, dword ptr [rdx+rbx]            ; 8B1C1A
    lea rsp, [rsp+0x28]                     ; 488D642428              | ret
    ret                                     ; C3
PC$1:                                       ;                         | label   $       1
    mov edx, ebx                            ; 89DA                    | readm   i0,i0,i2,program_dword
    mov r8d, edi                            ; 4189F8
    and edx, 0x7FFFFFFC                     ; 81E2FCFFFF7F
    lea rax, [rip+0x40255B3]                ; 488D05B3550204
    mov r10d, edx                           ; 4189D2
    shr edx, 0xE                            ; C1EA0E
    mov rcx, [rax+rdx*8]                    ; 488B0CD0
    mov edx, r10d                           ; 4489D2
    mov rax, [rcx]                          ; 488B01
    call [rax+0x40]                         ; FF5040
    mov ebx, eax                            ; 89C3
    lea rsp, [rsp+0x28]                     ; 488D642428              | ret
    ret                                     ; C3
read32align:                                ;                         | handle  read32align
    short jmp L1                            ; EB..

However, I just found out the other day that for AArch64 it looks like this, without the UML:

    bl 0x7FFEB884CB44                       ; 63FCFF97
    str w28, [x27, 0x1200]                  ; 7C0312B9
read32mask:
    b L1                                    ; 00000014
    stp x29, x30, [sp, 0xFFFFFFFFFFFFFFF0]! ; FD7BBFA9
L1:
    ands w19, w19, 0x7FFFFFFF               ; 737A0072
    nop                                     ; 1F2003D5
    mov w9, w19                             ; E903132A
    mov w10, 0xFF01FFFF                     ; CA1FA012
    cmp w9, w10                             ; 3F010A6B
    cset x12, 5                             ; EC279F9A
    bfi x28, x12, 0, 1                      ; 9C0140B3
    mrs x12, 0xDA10                         ; 0C423BD5
    bfi x12, x28, 0x1D, 1                   ; 8C0363B3
    eor x12, x12, 0x20000000                ; 8C0163D2
    msr 0xDA10, x12                         ; 0C421BD5
    b.hi PC$1                               ; 08000054
    mov w9, w19                             ; E903132A
    mov w10, 0xFF000000                     ; 0AE0BF52
    cmp w9, w10                             ; 3F010A6B
    cset x12, 5                             ; EC279F9A
    bfi x28, x12, 0, 1                      ; 9C0140B3
    mrs x12, 0xDA10                         ; 0C423BD5
    bfi x12, x28, 0x1D, 1                   ; 8C0363B3
    eor x12, x12, 0x20000000                ; 8C0163D2
    msr 0xDA10, x12                         ; 0C421BD5
    b.lo PC$1                               ; 03000054
    nop                                     ; 1F2003D5
    mov x9, 0x555515FA7750                  ; 09EA8ED249BFA2F2A9AACAF2
    ldr w19, [x9, w19]                      ; 336973B8
    ldp x29, x30, [sp], 0x10                ; FD7BC1A8
    ret x30                                 ; C0035FD6
PC$1:
    mov w1, w19                             ; E103132A
    adrp x8, 0x7FFEB67D0000                 ; 08FCFEF0
    add x8, x8, 0x10                        ; 08410091
    ubfx w7, w1, 0xE, 0x11                  ; 27780E53
    and w1, w1, 0x7FFFFFFC                  ; 21701E12
    ldr x0, [x8, x7 lsl 3]                  ; 007967F8
    mov w2, w21                             ; E203152A
    ldr x8, [x0]                            ; 080040F9
    ldr x8, [x8, 0x40]                      ; 082140F9
    blr x8                                  ; 00013FD6
    mov w19, w0                             ; F303002A
    ldp x29, x30, [sp], 0x10                ; FD7BC1A8
    ret x30                                 ; C0035FD6
    str w28, [x27, 0x1200]                  ; 7C0312B9
read32align:
    b L1                                    ; 00000014

This PR is supposed to add the disassembled UML to the comments for AArch64. Can someone test that it does and doesn’t cause stuff to blow up? Just do something like mame -drc_log_native -bench 1 fiveside and then check drcbearm64_asmjit_ppc403ga.asm to see if it gains UML in the comments.

Also, I’m somewhat concerned about those str w28, [x27, 0x1200] instructions that immediately precede the named labels for the UML handle instructions. Where are they coming from? I can’t see anything in op_handle, op_exh or op_ret (the UML instructions immediately preceding the handle instructions are exh and ret). What am I missing?

@cuavas
Copy link
Member Author

cuavas commented Mar 13, 2025

The random nop after the ands is also concerning. I don’t see op_and or op_cmp generating that. (And yes, op_cmp could generate more optimal code – it doesn’t need to always copy register operands to temporary registers.)

@rb6502
Copy link
Contributor

rb6502 commented Mar 13, 2025

Looks good.

entry:                                      ;                         | handle  entry
    b L1                                    ; 00000014
    stp x29, x30, [sp, 0xFFFFFFFFFFFFFFF0]! ; FD7BBFA9
L1:
    ldr w9, [x27, 0xA8]                     ; 69AB40B9                | and     i0,[fpscr],$3
    ands w19, w9, 3                         ; 33050072
    adrp x9, 0x138015000                    ; A97B0FB0                | load    i0,[fpmode],i0,byte
    add x9, x9, 0x510                       ; 29411491
    ldrb w19, [x9, w19]                     ; 33697338
    and w15, w19, 3                         ; 6F060012                | setfmod i0
    adr x14, 0x1190012F4                    ; AE92B010
    strb w15, [x14]                         ; CF010039
    ldur w24, [x27, 0xFFFFFFFFFFFFFF04]     ; 784350B8                | mov     i5,[r0]
    ldur w25, [x27, 0xFFFFFFFFFFFFFF08]     ; 798350B8                | mov     i6,[r1]
    ldur w26, [x27, 0xFFFFFFFFFFFFFF0C]     ; 7AC350B8                | mov     i7,[r2]
    ldur d11, [x27, 0xFFFFFFFFFFFFFF88]     ; 6B8358FC                | fdmov   f3,[fpr0]
    ldur d12, [x27, 0xFFFFFFFFFFFFFF90]     ; 6C0359FC                | fdmov   f4,[fpr1]
    ldur d13, [x27, 0xFFFFFFFFFFFFFF98]     ; 6D8359FC                | fdmov   f5,[fpr2]
    ldur d14, [x27, 0xFFFFFFFFFFFFFFA0]     ; 6E035AFC                | fdmov   f6,[fpr3]
    ldr d15, [x27, 0x78]                    ; 6F3F40FD                | fdmov   f7,[fpr30]
    ldr w9, [x27, 0x10FC]                   ; 69FF50B9                | test    [irq_pending],$FFFFFFFF,Z
    mov w10, 0xFFFFFFFFFFFFFFFF             ; 0A008012
    tst w9, w10                             ; 3F010A6A
    b.eq PC$1                               ; 00000054                | jmp     $       1,z
    ldr w9, [x27, 0xAC]                     ; 69AF40B9                | test    [msr],$8000,Z
    tst w9, 0x8000                          ; 3F011172
    b.eq PC$1                               ; 00000054                | jmp     $       1,z
    ldur w19, [x27, 0xFFFFFFFFFFFFFF00]     ; 730350B8                | mov     i0,[pc]
    mov w20, 0                              ; 14008052                | mov     i1,$0
    ldr x12, [x27, 0x1208]                  ; 6C0749F9                | callh   exception_ei_norecover
    blr x12                                 ; 80013FD6

@cuavas
Copy link
Member Author

cuavas commented Mar 13, 2025

Thanks.

As for the apparent spurious instructions, I wonder if we’re running into asmjit bugs. @987123879113 and I saw an a bug in the function prologue it generates where it sets the frame pointer twice.

@cuavas cuavas merged commit e6c8f8c into mamedev:master Mar 13, 2025
5 checks passed
@cuavas cuavas deleted the armumlcomment branch March 13, 2025 12:41
cuavas referenced this pull request Mar 14, 2025
* Correctly identify valid immediate constants for add/sub/cmp (it was
  too conservative).
* Don't unnecessarily copy UML register values kept in host registers
  for CMP.
* Fixed detection of TST against immediate zero and optimised generated
  code.
* Optimised TST against immediate with all bits set.

-cpu/alto2: Follow the same pattern as the other things that have been
 altered to avoid problematic memsets in this device.

-cpu/powerpc: Realigned some comments that had drifted.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants