@@ -8,8 +8,9 @@ L_dyn_call_begin:
8
8
# At this point , the following registers are bound :
9
9
#
10
10
# rdi < - callee
11
- # rsi < - argv
12
- # rdx < - argc
11
+ # rsi < - process
12
+ # rdx < - argv
13
+ # rcx < - argc
13
14
#
14
15
# Save the parent base pointer for when control returns to this call frame.
15
16
# CFA directives will inform the unwinder to expect rbp at the bottom of the
@@ -20,14 +21,15 @@ L_dyn_call_begin:
20
21
mov rbp , rsp
21
22
.cfi_def_cfa_register rbp
22
23
23
- # Save our callee and argv pointers , and argc
24
+ # Save our callee , process and argv pointers , and argc
24
25
mov r10 , rdi
25
26
mov r11 , rsi
26
- mov rax , rdx
27
+ mov rdi , rdx
28
+ mov rax , rcx
27
29
28
- # Determine if spills are needed
30
+ # Determine if spills are needed (argc + 1 should be <= 8 when not needed)
29
31
# In the common case in which they are not , we perform a tail call
30
- cmp rdx , 7
32
+ cmp rcx , 6
31
33
ja L_dyn_call_spill
32
34
33
35
L_dyn_call_no_spill:
@@ -38,62 +40,52 @@ L_dyn_call_no_spill:
38
40
39
41
# Calculate offset in jump table to block which handles the specific
40
42
# number of registers we have arguments for , then jump to th at block
41
- lea rcx , [ rip + L_dyn_call_jt ]
42
- mov rax , [ rcx + rax * 4 ]
43
- add rax , rcx
44
- jmp [ rax ]
43
+ lea rdx , [ rip + L_dyn_call_jt ]
44
+ movsxd rax , dword ptr [ rdx + 4 * rax ]
45
+ add rax , rdx
46
+ jmp rax
45
47
46
48
# All of these basic blocks perform a tail call . As such ,
47
49
# the unwinder will skip over this frame should the callee
48
50
# throw an exception
49
51
L_dyn_call_regs0:
50
52
pop rbp
51
- jmp [ r10 ]
53
+ jmp r10
52
54
53
55
L_dyn_call_regs1:
54
- mov rdi , [ r11 ]
56
+ mov rsi , [ rdi ]
55
57
pop rbp
56
- jmp [ r10 ]
58
+ jmp r10
57
59
58
60
L_dyn_call_regs2:
59
- mov rdi , [ r11 ]
60
- mov rsi , [ r11 + 8 ]
61
+ mov rsi , [ rdi ]
62
+ mov rdx , [ rdi + 8 ]
61
63
pop rbp
62
- jmp [ r10 ]
64
+ jmp r10
63
65
64
66
L_dyn_call_regs3:
65
- mov rdi , [ r11 ]
66
- mov rsi , [ r11 + 8 ]
67
- mov rdx , [ r11 + 16 ]
67
+ mov rsi , [ rdi ]
68
+ mov rdx , [ rdi + 8 ]
69
+ mov rcx , [ rdi + 16 ]
68
70
pop rbp
69
- jmp [ r10 ]
71
+ jmp r10
70
72
71
73
L_dyn_call_regs4:
72
- mov rdi , [ r11 ]
73
- mov rsi , [ r11 + 8 ]
74
- mov rdx , [ r11 + 16 ]
75
- mov rcx , [ r11 + 24 ]
74
+ mov rsi , [ rdi ]
75
+ mov rdx , [ rdi + 8 ]
76
+ mov rcx , [ rdi + 16 ]
77
+ mov r8 , [ rdi + 24 ]
76
78
pop rbp
77
- jmp [ r10 ]
79
+ jmp r10
78
80
79
81
L_dyn_call_regs5:
80
- mov rdi , [ r11 ]
81
- mov rsi , [ r11 + 8 ]
82
- mov rdx , [ r11 + 16 ]
83
- mov rcx , [ r11 + 24 ]
84
- mov r8 , [ r11 + 32 ]
82
+ mov rsi , [ rdi ]
83
+ mov rdx , [ rdi + 8 ]
84
+ mov rcx , [ rdi + 16 ]
85
+ mov r8 , [ rdi + 24 ]
86
+ mov r9 , [ rdi + 32 ]
85
87
pop rbp
86
- jmp [ r10 ]
87
-
88
- L_dyn_call_regs6:
89
- mov rdi , [ r11 ]
90
- mov rsi , [ r11 + 8 ]
91
- mov rdx , [ r11 + 16 ]
92
- mov rcx , [ r11 + 24 ]
93
- mov r8 , [ r11 + 32 ]
94
- mov r9 , [ r11 + 40 ]
95
- pop rbp
96
- jmp [ r10 ]
88
+ jmp r10
97
89
98
90
L_dyn_call_spill:
99
91
# If we hit this block , we have identified th at there are
@@ -102,7 +94,7 @@ L_dyn_call_spill:
102
94
103
95
# Calculate spill count for later ( rep uses rcx for the iteration count ,
104
96
# which in this case is the number of quadwords to copy)
105
- mov rcx , rdx
97
+ mov r8 , rcx
106
98
sub rcx , 6
107
99
108
100
# Calculate spill space , and ensure it is rounded up to the nearest 16 bytes.
@@ -113,21 +105,24 @@ L_dyn_call_spill:
113
105
sub rsp , rax
114
106
115
107
# load source pointer (last item of argv)
116
- lea rsi , [ r11 + rdx * 8 - 8 ]
108
+ lea rsi , [ rdi + r8 * 8 - 8 ]
117
109
# load destination pointer (top of spill region)
118
- lea rdi , [ rsp + rcx * 8 - 8 ]
110
+ lea rdi , [ rsp + rcx * 8 - 8 ]
119
111
# copy rcx quadwords from rsi to rdi , in reverse
120
112
std
121
113
rep movsq
122
114
cld
123
115
124
116
# We've spilled arguments , so we have at least 6 args
125
- mov rdi , [ r11 ]
126
- mov rsi , [ r11 + 8 ]
117
+ mov r8 , rdi # We need to move rdi to r11 , but it is occupied , so temporarily move to r8
118
+ mov rdi , r11 # Move process pointer to rdi
119
+ mov r11 , r8 # Move r8 to r11
120
+ mov rsi , [ r11 ]
121
+ mov rsi , [ r11 + 8 ]
127
122
mov rdx , [ r11 + 16 ]
128
123
mov rcx , [ r11 + 24 ]
129
- mov r8 , [ r11 + 32 ]
130
- mov r9 , [ r11 + 40 ]
124
+ mov r8 , [ r11 + 32 ]
125
+ mov r9 , [ r11 + 40 ]
131
126
132
127
L_dyn_call_exec:
133
128
# If we spill arguments to the stack , we can't perform
@@ -141,7 +136,7 @@ L_dyn_call_exec:
141
136
# This instruction will push the return address and jump ,
142
137
# and we can expect rbp to be the same as we left it upon
143
138
# return.
144
- call [ r10 ]
139
+ call r10
145
140
146
141
L_dyn_call_ret:
147
142
# Non - tail call completed successfully
@@ -156,21 +151,19 @@ L_dyn_call_end:
156
151
# a variable number of register - based arguments
157
152
.p2align 2
158
153
.data_region jt32
159
- .set L_dyn_call_jt_entry0 , L_dyn_call_exec - L_dyn_call_jt
154
+ .set L_dyn_call_jt_entry0 , L_dyn_call_regs0 - L_dyn_call_jt
160
155
.set L_dyn_call_jt_entry1 , L_dyn_call_regs1 - L_dyn_call_jt
161
156
.set L_dyn_call_jt_entry2 , L_dyn_call_regs2 - L_dyn_call_jt
162
157
.set L_dyn_call_jt_entry3 , L_dyn_call_regs3 - L_dyn_call_jt
163
158
.set L_dyn_call_jt_entry4 , L_dyn_call_regs4 - L_dyn_call_jt
164
159
.set L_dyn_call_jt_entry5 , L_dyn_call_regs5 - L_dyn_call_jt
165
- .set L_dyn_call_jt_entry6 , L_dyn_call_regs6 - L_dyn_call_jt
166
160
L_dyn_call_jt:
167
161
.long L_dyn_call_jt_entry0
168
162
.long L_dyn_call_jt_entry1
169
163
.long L_dyn_call_jt_entry2
170
164
.long L_dyn_call_jt_entry3
171
165
.long L_dyn_call_jt_entry4
172
166
.long L_dyn_call_jt_entry5
173
- .long L_dyn_call_jt_entry6
174
167
.end_data_region
175
168
176
169
# The following is the LSDA metadata for exception handling
0 commit comments