@@ -8,8 +8,9 @@ L_dyn_call_begin:
8
8
# At this point , the following registers are bound :
9
9
#
10
10
# rdi < - callee
11
- # rsi < - argv
12
- # rdx < - argc
11
+ # rsi < - process
12
+ # rdx < - argv
13
+ # rcx < - argc
13
14
#
14
15
# Save the parent base pointer for when control returns to this call frame.
15
16
# CFA directives will inform the unwinder to expect rbp at the bottom of the
@@ -20,14 +21,16 @@ L_dyn_call_begin:
20
21
mov rbp , rsp
21
22
.cfi_def_cfa_register rbp
22
23
23
- # Save our callee and argv pointers , and argc
24
+ # Pin callee pointer to r10
24
25
mov r10 , rdi
25
- mov r11 , rsi
26
- mov rax , rdx
26
+ # Pin the argv pointer to r11
27
+ mov r11 , rdx
28
+ # The process pointer needs to be in rdi
29
+ mov rdi , rsi
27
30
28
- # Determine if spills are needed
31
+ # Determine if spills are needed (argc + 1 should be <= 8 when not needed)
29
32
# In the common case in which they are not , we perform a tail call
30
- cmp rdx , 7
33
+ cmp rcx , 6
31
34
ja L_dyn_call_spill
32
35
33
36
L_dyn_call_no_spill:
@@ -39,70 +42,70 @@ L_dyn_call_no_spill:
39
42
# Calculate offset in jump table to block which handles the specific
40
43
# number of registers we have arguments for , then jump to th at block
41
44
lea rcx , [ rip + L_dyn_call_jt ]
42
- mov rax , [ rcx + rax * 4 ]
45
+ movsxd rax , dword ptr [ rcx + 4 * rax ]
43
46
add rax , rcx
44
- jmp [ rax ]
47
+ jmp rax
45
48
46
49
# All of these basic blocks perform a tail call . As such ,
47
50
# the unwinder will skip over this frame should the callee
48
51
# throw an exception
49
52
L_dyn_call_regs0:
50
53
pop rbp
51
- jmp [ r10 ]
54
+ jmp r10
52
55
53
56
L_dyn_call_regs1:
54
- mov rdi , [ r11 ]
57
+ mov rsi , [ r11 ]
55
58
pop rbp
56
- jmp [ r10 ]
59
+ jmp r10
57
60
58
61
L_dyn_call_regs2:
59
- mov rdi , [ r11 ]
60
- mov rsi , [ r11 + 8 ]
62
+ mov rsi , [ r11 ]
63
+ mov rdx , [ r11 + 8 ]
61
64
pop rbp
62
- jmp [ r10 ]
65
+ jmp r10
63
66
64
67
L_dyn_call_regs3:
65
- mov rdi , [ r11 ]
66
- mov rsi , [ r11 + 8 ]
67
- mov rdx , [ r11 + 16 ]
68
+ mov rsi , [ r11 ]
69
+ mov rdx , [ r11 + 8 ]
70
+ mov rcx , [ r11 + 16 ]
68
71
pop rbp
69
- jmp [ r10 ]
72
+ jmp r10
70
73
71
74
L_dyn_call_regs4:
72
- mov rdi , [ r11 ]
73
- mov rsi , [ r11 + 8 ]
74
- mov rdx , [ r11 + 16 ]
75
- mov rcx , [ r11 + 24 ]
75
+ mov rsi , [ r11 ]
76
+ mov rdx , [ r11 + 8 ]
77
+ mov rcx , [ r11 + 16 ]
78
+ mov r8 , [ r11 + 24 ]
76
79
pop rbp
77
- jmp [ r10 ]
80
+ jmp r10
78
81
79
82
L_dyn_call_regs5:
80
- mov rdi , [ r11 ]
81
- mov rsi , [ r11 + 8 ]
82
- mov rdx , [ r11 + 16 ]
83
- mov rcx , [ r11 + 24 ]
84
- mov r8 , [ r11 + 32 ]
83
+ mov rsi , [ r11 ]
84
+ mov rdx , [ r11 + 8 ]
85
+ mov rcx , [ r11 + 16 ]
86
+ mov r8 , [ r11 + 24 ]
87
+ mov r9 , [ r11 + 32 ]
85
88
pop rbp
86
- jmp [ r10 ]
87
-
88
- L_dyn_call_regs6:
89
- mov rdi , [ r11 ]
90
- mov rsi , [ r11 + 8 ]
91
- mov rdx , [ r11 + 16 ]
92
- mov rcx , [ r11 + 24 ]
93
- mov r8 , [ r11 + 32 ]
94
- mov r9 , [ r11 + 40 ]
95
- pop rbp
96
- jmp [ r10 ]
89
+ jmp r10
97
90
98
91
L_dyn_call_spill:
99
92
# If we hit this block , we have identified th at there are
100
93
# arguments to spill. We perform some setup for the actual
101
94
# spilling , which is a loop built on ` rep movsq `
95
+ #
96
+ # At this point , the following registers are occupied/hold these values:
97
+ #
98
+ # r10 < - callee
99
+ # rdi < - process
100
+ # r11 < - argv
101
+ # rcx < - argc
102
+
103
+ # rcx , rdi , and rsi are used by ` rep movsq ` , so save them temporarily
104
+ mov r8 , rcx
105
+ mov r9 , rdi
102
106
103
- # Calculate spill count for later ( rep uses rcx for the iteration count ,
107
+ # Calculate spill count for later ( rep uses rcx for the iteration count `i` ,
104
108
# which in this case is the number of quadwords to copy)
105
- mov rcx , rdx
106
109
sub rcx , 6
107
110
108
111
# Calculate spill space , and ensure it is rounded up to the nearest 16 bytes.
@@ -113,21 +116,22 @@ L_dyn_call_spill:
113
116
sub rsp , rax
114
117
115
118
# load source pointer (last item of argv)
116
- lea rsi , [ r11 + rdx * 8 - 8 ]
119
+ lea rsi , [ r11 + r8 * 8 - 8 ]
117
120
# load destination pointer (top of spill region)
118
- lea rdi , [ rsp + rcx * 8 - 8 ]
119
- # copy rcx quadwords from rsi to rdi , in reverse
121
+ lea rdi , [ rsp + rcx * 8 - 8 ]
122
+ # copy `i` quadwords from source to destination , in reverse
120
123
std
121
124
rep movsq
122
125
cld
123
126
124
- # We've spilled arguments , so we have at least 6 args
125
- mov rdi , [ r11 ]
126
- mov rsi , [ r11 + 8 ]
127
- mov rdx , [ r11 + 16 ]
128
- mov rcx , [ r11 + 24 ]
129
- mov r8 , [ r11 + 32 ]
130
- mov r9 , [ r11 + 40 ]
127
+ # We've spilled arguments , so we have at least 6 args , move them into their
128
+ # final destination registers in preparation for the call
129
+ mov rdi , r9
130
+ mov rsi , [ r11 ]
131
+ mov rdx , [ r11 + 8 ]
132
+ mov rcx , [ r11 + 16 ]
133
+ mov r8 , [ r11 + 24 ]
134
+ mov r9 , [ r11 + 32 ]
131
135
132
136
L_dyn_call_exec:
133
137
# If we spill arguments to the stack , we can't perform
@@ -141,7 +145,7 @@ L_dyn_call_exec:
141
145
# This instruction will push the return address and jump ,
142
146
# and we can expect rbp to be the same as we left it upon
143
147
# return.
144
- call [ r10 ]
148
+ call r10
145
149
146
150
L_dyn_call_ret:
147
151
# Non - tail call completed successfully
@@ -156,21 +160,19 @@ L_dyn_call_end:
156
160
# a variable number of register - based arguments
157
161
.p2align 2
158
162
.data_region jt32
159
- .set L_dyn_call_jt_entry0 , L_dyn_call_exec - L_dyn_call_jt
163
+ .set L_dyn_call_jt_entry0 , L_dyn_call_regs0 - L_dyn_call_jt
160
164
.set L_dyn_call_jt_entry1 , L_dyn_call_regs1 - L_dyn_call_jt
161
165
.set L_dyn_call_jt_entry2 , L_dyn_call_regs2 - L_dyn_call_jt
162
166
.set L_dyn_call_jt_entry3 , L_dyn_call_regs3 - L_dyn_call_jt
163
167
.set L_dyn_call_jt_entry4 , L_dyn_call_regs4 - L_dyn_call_jt
164
168
.set L_dyn_call_jt_entry5 , L_dyn_call_regs5 - L_dyn_call_jt
165
- .set L_dyn_call_jt_entry6 , L_dyn_call_regs6 - L_dyn_call_jt
166
169
L_dyn_call_jt:
167
170
.long L_dyn_call_jt_entry0
168
171
.long L_dyn_call_jt_entry1
169
172
.long L_dyn_call_jt_entry2
170
173
.long L_dyn_call_jt_entry3
171
174
.long L_dyn_call_jt_entry4
172
175
.long L_dyn_call_jt_entry5
173
- .long L_dyn_call_jt_entry6
174
176
.end_data_region
175
177
176
178
# The following is the LSDA metadata for exception handling
0 commit comments