Skip to content

Commit 94c4b52

Browse files
authored
Revert "Switch reverse PInvoke to the NativeCallable plan (#34251)" (#34306)
This reverts commit 4e30ff0.
1 parent d9b1a28 commit 94c4b52

File tree

20 files changed

+936
-75
lines changed

20 files changed

+936
-75
lines changed

src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2921,7 +2921,8 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes)
29212921
if (this.MethodBeingCompiled.IsNativeCallable)
29222922
{
29232923
#if READYTORUN
2924-
if (targetArchitecture == TargetArchitecture.X86)
2924+
if (targetArchitecture == TargetArchitecture.X86
2925+
&& _compilation.TypeSystemContext.Target.OperatingSystem == TargetOS.Windows)
29252926
{
29262927
throw new RequiresRuntimeJitException("ReadyToRun: Methods with NativeCallableAttribute not implemented");
29272928
}

src/coreclr/src/tools/crossgen2/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1661,7 +1661,9 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO
16611661
pResult->methodFlags = FilterNamedIntrinsicMethodAttribs(pResult->methodFlags, methodToCall);
16621662

16631663
var targetDetails = _compilation.TypeSystemContext.Target;
1664-
if (targetDetails.Architecture == TargetArchitecture.X86 && targetMethod.IsNativeCallable)
1664+
if (targetDetails.Architecture == TargetArchitecture.X86
1665+
&& targetDetails.OperatingSystem == TargetOS.Windows
1666+
&& targetMethod.IsNativeCallable)
16651667
{
16661668
throw new RequiresRuntimeJitException("ReadyToRun: References to methods with NativeCallableAttribute not implemented");
16671669
}

src/coreclr/src/vm/amd64/UMThunkStub.asm

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,13 @@
1111
include <AsmMacros.inc>
1212
include AsmConstants.inc
1313

14+
extern CreateThreadBlockThrow:proc
1415
extern TheUMEntryPrestubWorker:proc
1516
extern UMEntryPrestubUnwindFrameChainHandler:proc
17+
extern UMThunkStubUnwindFrameChainHandler:proc
18+
extern g_TrapReturningThreads:dword
19+
extern UMThunkStubRareDisableWorker:proc
20+
extern ReversePInvokeBadTransition:proc
1621

1722
;
1823
; METHODDESC_REGISTER: UMEntryThunk*
@@ -73,4 +78,240 @@ endif
7378

7479
NESTED_END TheUMEntryPrestub, _TEXT
7580

81+
82+
;
83+
; METHODDESC_REGISTER: UMEntryThunk*
84+
;
85+
NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler
86+
87+
UMThunkStubAMD64_STACK_FRAME_SIZE = 0
88+
89+
; number of integer registers saved in prologue
90+
UMThunkStubAMD64_NUM_REG_PUSHES = 2
91+
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8)
92+
93+
; rare path spill area
94+
UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h
95+
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE
96+
UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
97+
98+
99+
100+
; HOST_NOTIFY_FLAG
101+
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
102+
UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
103+
104+
; XMM save area
105+
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL
106+
107+
; Ensure that the offset of the XMM save area will be 16-byte aligned.
108+
if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address
109+
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8
110+
endif
111+
112+
UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE
113+
114+
; Add in the callee scratch area size.
115+
UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES
116+
UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
117+
118+
; Now we have the full size of the stack frame. The offsets have been computed relative to the
119+
; top, so negate them to make them relative to the post-prologue rsp.
120+
UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE
121+
UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET
122+
UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET
123+
UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET
124+
UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET ; +8 for return address
125+
UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8)
126+
127+
.errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET
128+
129+
130+
;
131+
; [ callee scratch ] <-- new RSP
132+
; [ callee scratch ]
133+
; [ callee scratch ]
134+
; [ callee scratch ]
135+
; {optional stack args passed to callee}
136+
; xmm0 <-- RBP
137+
; xmm1
138+
; xmm2
139+
; xmm3
140+
; {optional padding to align xmm regs}
141+
; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag)
142+
; [rare path spill area]
143+
; [rare path spill area]
144+
; rbp save
145+
; r12 save
146+
; return address <-- entry RSP
147+
; [rcx home]
148+
; [rdx home]
149+
; [r8 home]
150+
; [r9 home]
151+
; stack arg 0
152+
; stack arg 1
153+
; ...
154+
155+
push_nonvol_reg r12
156+
push_nonvol_reg rbp ; stack_args
157+
alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE
158+
set_frame rbp, UMThunkStubAMD64_FRAME_OFFSET ; stack_args
159+
mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted
160+
END_PROLOGUE
161+
162+
;
163+
; Call GetThread()
164+
;
165+
INLINE_GETTHREAD r12 ; will not trash r10
166+
test r12, r12
167+
jz DoThreadSetup
168+
169+
HaveThread:
170+
171+
;FailFast if a native callable method invoked via ldftn and calli.
172+
cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1
173+
jz InvalidTransition
174+
175+
;
176+
; disable preemptive GC
177+
;
178+
mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1
179+
180+
;
181+
; catch returning thread here if a GC is in progress
182+
;
183+
cmp [g_TrapReturningThreads], 0
184+
jnz DoTrapReturningThreadsTHROW
185+
186+
InCooperativeMode:
187+
188+
mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo]
189+
mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] ; stack_args
190+
test rax, rax ; stack_args
191+
jnz CopyStackArgs ; stack_args
192+
193+
ArgumentsSetup:
194+
195+
mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub*
196+
call rax
197+
198+
PostCall:
199+
;
200+
; enable preemptive GC
201+
;
202+
mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0
203+
204+
; epilog
205+
lea rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE]
206+
pop rbp ; stack_args
207+
pop r12
208+
ret
209+
210+
211+
DoThreadSetup:
212+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
213+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
214+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
215+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
216+
217+
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
218+
; initial measurements indidcate that this could be worth about a 5% savings in reverse
219+
; pinvoke overhead.
220+
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0
221+
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
222+
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
223+
movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
224+
225+
mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
226+
call CreateThreadBlockThrow
227+
mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
228+
229+
mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
230+
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
231+
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
232+
mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
233+
234+
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
235+
movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h]
236+
movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
237+
movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
238+
movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
239+
240+
mov r12, rax
241+
242+
jmp HaveThread
243+
244+
InvalidTransition:
245+
; ReversePInvokeBadTransition will failfast
246+
call ReversePInvokeBadTransition
247+
248+
DoTrapReturningThreadsTHROW:
249+
250+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
251+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
252+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
253+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9
254+
255+
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
256+
; initial measurements indidcate that this could be worth about a 5% savings in reverse
257+
; pinvoke overhead.
258+
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0
259+
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1
260+
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2
261+
movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3
262+
263+
mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER
264+
mov rcx, r12 ; Thread* pThread
265+
mov rdx, METHODDESC_REGISTER ; UMEntryThunk* pUMEntry
266+
call UMThunkStubRareDisableWorker
267+
mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET]
268+
269+
mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
270+
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
271+
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
272+
mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h]
273+
274+
; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls
275+
movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h]
276+
movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h]
277+
movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h]
278+
movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h]
279+
280+
jmp InCooperativeMode
281+
282+
CopyStackArgs:
283+
; rax = cbStackArgs (with 20h for register args subtracted out already)
284+
285+
sub rsp, rax
286+
and rsp, -16
287+
288+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx
289+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx
290+
mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8
291+
292+
; rax = number of bytes
293+
294+
lea rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES]
295+
lea rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE]
296+
297+
CopyLoop:
298+
; rax = number of bytes
299+
; rcx = src
300+
; rdx = dest
301+
; r8 = sratch
302+
303+
add rax, -8
304+
mov r8, [rcx + rax]
305+
mov [rdx + rax], r8
306+
jnz CopyLoop
307+
308+
mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h]
309+
mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h]
310+
mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h]
311+
312+
jmp ArgumentsSetup
313+
314+
NESTED_END UMThunkStub, _TEXT
315+
76316
end
317+

src/coreclr/src/vm/amd64/asmconstants.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,21 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__ComPrestubMethodFrame
9898
#define SIZEOF__ComMethodFrame 0x20
9999
ASMCONSTANTS_C_ASSERT(SIZEOF__ComMethodFrame
100100
== sizeof(ComMethodFrame));
101+
#endif // FEATURE_COMINTEROP
102+
103+
#define OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo 0x18
104+
ASMCONSTANTS_C_ASSERT(OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo
105+
== offsetof(UMEntryThunk, m_pUMThunkMarshInfo));
106+
107+
#define OFFSETOF__UMThunkMarshInfo__m_pILStub 0x00
108+
ASMCONSTANTS_C_ASSERT(OFFSETOF__UMThunkMarshInfo__m_pILStub
109+
== offsetof(UMThunkMarshInfo, m_pILStub));
110+
111+
#define OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize 0x08
112+
ASMCONSTANTS_C_ASSERT(OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize
113+
== offsetof(UMThunkMarshInfo, m_cbActualArgSize));
114+
115+
#ifdef FEATURE_COMINTEROP
101116

102117
#define OFFSETOF__ComPlusCallMethodDesc__m_pComPlusCallInfo DBG_FRE(0x30, 0x08)
103118
ASMCONSTANTS_C_ASSERT(OFFSETOF__ComPlusCallMethodDesc__m_pComPlusCallInfo
@@ -482,6 +497,8 @@ ASMCONSTANTS_C_ASSERT(OFFSET__TEB__ThreadLocalStoragePointer == offsetof(TEB, Th
482497

483498
#define THROWSTUB_ESTABLISHER_OFFSET_FaultingExceptionFrame 0x30
484499

500+
#define UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET (0x40) // xmm save size
501+
485502
#define Thread__ObjectRefFlush ?ObjectRefFlush@Thread@@SAXPEAV1@@Z
486503

487504

0 commit comments

Comments
 (0)