|
11 | 11 | include <AsmMacros.inc>
|
12 | 12 | include AsmConstants.inc
|
13 | 13 |
|
| 14 | +extern CreateThreadBlockThrow:proc |
14 | 15 | extern TheUMEntryPrestubWorker:proc
|
15 | 16 | extern UMEntryPrestubUnwindFrameChainHandler:proc
|
| 17 | +extern UMThunkStubUnwindFrameChainHandler:proc |
| 18 | +extern g_TrapReturningThreads:dword |
| 19 | +extern UMThunkStubRareDisableWorker:proc |
| 20 | +extern ReversePInvokeBadTransition:proc |
16 | 21 |
|
17 | 22 | ;
|
18 | 23 | ; METHODDESC_REGISTER: UMEntryThunk*
|
@@ -73,4 +78,240 @@ endif
|
73 | 78 |
|
74 | 79 | NESTED_END TheUMEntryPrestub, _TEXT
|
75 | 80 |
|
| 81 | + |
| 82 | +; |
| 83 | +; METHODDESC_REGISTER: UMEntryThunk* |
| 84 | +; |
| 85 | +NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler |
| 86 | + |
| 87 | +UMThunkStubAMD64_STACK_FRAME_SIZE = 0 |
| 88 | + |
| 89 | +; number of integer registers saved in prologue |
| 90 | +UMThunkStubAMD64_NUM_REG_PUSHES = 2 |
| 91 | +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8) |
| 92 | + |
| 93 | +; rare path spill area |
| 94 | +UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h |
| 95 | +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE |
| 96 | +UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE |
| 97 | + |
| 98 | + |
| 99 | + |
| 100 | +; HOST_NOTIFY_FLAG |
| 101 | +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 |
| 102 | +UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE |
| 103 | + |
| 104 | +; XMM save area |
| 105 | +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL |
| 106 | + |
| 107 | +; Ensure that the offset of the XMM save area will be 16-byte aligned. |
| 108 | +if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address |
| 109 | +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 |
| 110 | +endif |
| 111 | + |
| 112 | +UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE |
| 113 | + |
| 114 | +; Add in the callee scratch area size. |
| 115 | +UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES |
| 116 | +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE |
| 117 | + |
| 118 | +; Now we have the full size of the stack frame. The offsets have been computed relative to the |
| 119 | +; top, so negate them to make them relative to the post-prologue rsp. |
| 120 | +UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE |
| 121 | +UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET |
| 122 | +UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET |
| 123 | +UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET |
| 124 | +UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET ; +8 for return address |
| 125 | +UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8) |
| 126 | + |
| 127 | +.errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET |
| 128 | + |
| 129 | + |
| 130 | +; |
| 131 | +; [ callee scratch ] <-- new RSP |
| 132 | +; [ callee scratch ] |
| 133 | +; [ callee scratch ] |
| 134 | +; [ callee scratch ] |
| 135 | +; {optional stack args passed to callee} |
| 136 | +; xmm0 <-- RBP |
| 137 | +; xmm1 |
| 138 | +; xmm2 |
| 139 | +; xmm3 |
| 140 | +; {optional padding to align xmm regs} |
| 141 | +; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag) |
| 142 | +; [rare path spill area] |
| 143 | +; [rare path spill area] |
| 144 | +; rbp save |
| 145 | +; r12 save |
| 146 | +; return address <-- entry RSP |
| 147 | +; [rcx home] |
| 148 | +; [rdx home] |
| 149 | +; [r8 home] |
| 150 | +; [r9 home] |
| 151 | +; stack arg 0 |
| 152 | +; stack arg 1 |
| 153 | +; ... |
| 154 | + |
| 155 | + push_nonvol_reg r12 |
| 156 | + push_nonvol_reg rbp ; stack_args |
| 157 | + alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE |
| 158 | + set_frame rbp, UMThunkStubAMD64_FRAME_OFFSET ; stack_args |
| 159 | + mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted |
| 160 | + END_PROLOGUE |
| 161 | + |
| 162 | + ; |
| 163 | + ; Call GetThread() |
| 164 | + ; |
| 165 | + INLINE_GETTHREAD r12 ; will not trash r10 |
| 166 | + test r12, r12 |
| 167 | + jz DoThreadSetup |
| 168 | + |
| 169 | +HaveThread: |
| 170 | + |
| 171 | + ;FailFast if a native callable method invoked via ldftn and calli. |
| 172 | + cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 |
| 173 | + jz InvalidTransition |
| 174 | + |
| 175 | + ; |
| 176 | + ; disable preemptive GC |
| 177 | + ; |
| 178 | + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 |
| 179 | + |
| 180 | + ; |
| 181 | + ; catch returning thread here if a GC is in progress |
| 182 | + ; |
| 183 | + cmp [g_TrapReturningThreads], 0 |
| 184 | + jnz DoTrapReturningThreadsTHROW |
| 185 | + |
| 186 | +InCooperativeMode: |
| 187 | + |
| 188 | + mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo] |
| 189 | + mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] ; stack_args |
| 190 | + test rax, rax ; stack_args |
| 191 | + jnz CopyStackArgs ; stack_args |
| 192 | + |
| 193 | +ArgumentsSetup: |
| 194 | + |
| 195 | + mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub* |
| 196 | + call rax |
| 197 | + |
| 198 | +PostCall: |
| 199 | + ; |
| 200 | + ; enable preemptive GC |
| 201 | + ; |
| 202 | + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0 |
| 203 | + |
| 204 | + ; epilog |
| 205 | + lea rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE] |
| 206 | + pop rbp ; stack_args |
| 207 | + pop r12 |
| 208 | + ret |
| 209 | + |
| 210 | + |
| 211 | +DoThreadSetup: |
| 212 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx |
| 213 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx |
| 214 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 |
| 215 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 |
| 216 | + |
| 217 | + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls |
| 218 | + ; initial measurements indidcate that this could be worth about a 5% savings in reverse |
| 219 | + ; pinvoke overhead. |
| 220 | + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 |
| 221 | + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 |
| 222 | + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 |
| 223 | + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 |
| 224 | + |
| 225 | + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER |
| 226 | + call CreateThreadBlockThrow |
| 227 | + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] |
| 228 | + |
| 229 | + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] |
| 230 | + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] |
| 231 | + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] |
| 232 | + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] |
| 233 | + |
| 234 | + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls |
| 235 | + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] |
| 236 | + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] |
| 237 | + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] |
| 238 | + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] |
| 239 | + |
| 240 | + mov r12, rax |
| 241 | + |
| 242 | + jmp HaveThread |
| 243 | + |
| 244 | +InvalidTransition: |
| 245 | + ; ReversePInvokeBadTransition will failfast |
| 246 | + call ReversePInvokeBadTransition |
| 247 | + |
| 248 | +DoTrapReturningThreadsTHROW: |
| 249 | + |
| 250 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx |
| 251 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx |
| 252 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 |
| 253 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 |
| 254 | + |
| 255 | + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls |
| 256 | + ; initial measurements indidcate that this could be worth about a 5% savings in reverse |
| 257 | + ; pinvoke overhead. |
| 258 | + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 |
| 259 | + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 |
| 260 | + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 |
| 261 | + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 |
| 262 | + |
| 263 | + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER |
| 264 | + mov rcx, r12 ; Thread* pThread |
| 265 | + mov rdx, METHODDESC_REGISTER ; UMEntryThunk* pUMEntry |
| 266 | + call UMThunkStubRareDisableWorker |
| 267 | + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] |
| 268 | + |
| 269 | + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] |
| 270 | + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] |
| 271 | + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] |
| 272 | + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] |
| 273 | + |
| 274 | + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls |
| 275 | + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] |
| 276 | + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] |
| 277 | + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] |
| 278 | + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] |
| 279 | + |
| 280 | + jmp InCooperativeMode |
| 281 | + |
| 282 | +CopyStackArgs: |
| 283 | + ; rax = cbStackArgs (with 20h for register args subtracted out already) |
| 284 | + |
| 285 | + sub rsp, rax |
| 286 | + and rsp, -16 |
| 287 | + |
| 288 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx |
| 289 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx |
| 290 | + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 |
| 291 | + |
| 292 | + ; rax = number of bytes |
| 293 | + |
| 294 | + lea rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES] |
| 295 | + lea rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE] |
| 296 | + |
| 297 | +CopyLoop: |
| 298 | + ; rax = number of bytes |
| 299 | + ; rcx = src |
| 300 | + ; rdx = dest |
| 301 | + ; r8 = sratch |
| 302 | + |
| 303 | + add rax, -8 |
| 304 | + mov r8, [rcx + rax] |
| 305 | + mov [rdx + rax], r8 |
| 306 | + jnz CopyLoop |
| 307 | + |
| 308 | + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] |
| 309 | + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] |
| 310 | + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] |
| 311 | + |
| 312 | + jmp ArgumentsSetup |
| 313 | + |
| 314 | +NESTED_END UMThunkStub, _TEXT |
| 315 | + |
76 | 316 | end
|
| 317 | + |
0 commit comments