Skip to content

Commit 0760a3d

Browse files
konstantin-s-bogomgvisor-bot
authored andcommitted
kvm: reduce stack usage
Debug build functions use more stack space than normal, such that the KVM-nosplit function call chain doesn't fit. This patch replaces calls into unix.RawSyscall* functions with variants that do not grow the stack, and inlines some functions in ring0/pagetables in order to reduce stack usage. Additionally seccompMmapHandler is not used during debug builds anymore for making it fit into the nosplit stack size requirements. PiperOrigin-RevId: 679774881
1 parent 5e8dd64 commit 0760a3d

18 files changed

+232
-82
lines changed

pkg/ring0/pagetables/pagetables.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ type mapVisitor struct {
114114
//
115115
//go:nosplit
116116
func (v *mapVisitor) visit(start uintptr, pte *PTE, align uintptr) bool {
117-
p := v.physical + (start - uintptr(v.target))
117+
p := v.physical + (start - v.target)
118118
if pte.Valid() && (pte.Address() != p || pte.Opts() != v.opts) {
119119
v.prev = true
120120
}

pkg/ring0/pagetables/pagetables_aarch64.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,14 +158,20 @@ func (p *PTE) IsSect() bool {
158158
//go:nosplit
159159
func (p *PTE) Set(addr uintptr, opts MapOpts) {
160160
v := (addr &^ optionMask) | nG | readOnly | protDefault
161-
if p.IsSect() {
161+
// Note: p.IsSect is manually inlined to reduce stack size for
162+
// nosplit-ness.
163+
isSect := atomic.LoadUintptr((*uintptr)(p))&pteTypeMask == typeSect
164+
if isSect {
162165
// Note that this is inherited from the previous instance. Set
163166
// does not change the value of Sect. See above.
164167
v |= typeSect
165168
} else {
166169
v |= typePage
167170
}
168-
if !opts.AccessType.Any() {
171+
// Note: AccessType.Any() is manually inlined to reduce stack size for
172+
// nosplit-ness.
173+
accessTypeAny := opts.AccessType.Read || opts.AccessType.Write || opts.AccessType.Execute
174+
if !accessTypeAny {
169175
// Leave as non-valid if no access is available.
170176
v &^= pteValid
171177
}

pkg/seccomp/seccomp_unsafe.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ func isKillProcessAvailable() (bool, error) {
113113
//
114114
//go:nosplit
115115
func seccomp(op, flags uint32, ptr unsafe.Pointer) (uintptr, unix.Errno) {
116-
n, _, errno := unix.RawSyscall(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr))
116+
// Note: Usage of RawSyscall6 over RawSyscall is intentional in order to
117+
// reduce stack-growth.
118+
n, _, errno := unix.RawSyscall6(SYS_SECCOMP, uintptr(op), uintptr(flags), uintptr(ptr), 0, 0, 0)
117119
return n, errno
118120
}

pkg/sentry/platform/kvm/BUILD

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,30 @@ go_template_instance(
1717
},
1818
)
1919

20+
config_setting(
21+
name = "debug_build",
22+
values = {
23+
"compilation_mode": "dbg",
24+
},
25+
)
26+
27+
# @unused
28+
glaze_ignore = [
29+
"seccomp_mmap_dbg.go",
30+
"seccomp_mmap_real.go",
31+
]
32+
33+
# Use either seccomp_mmap_dbg.go or seccomp_mmap_real.go as seccomp_mmap.go.
34+
genrule(
35+
name = "seccomp_mmap",
36+
srcs = select({
37+
":debug_build": ["seccomp_mmap_dbg.go"],
38+
"//conditions:default": ["seccomp_mmap_real.go"],
39+
}),
40+
outs = ["seccomp_mmap_unsafe.go"],
41+
cmd = "cat < $(SRCS) > $(OUTS)",
42+
)
43+
2044
go_library(
2145
name = "kvm",
2246
srcs = [
@@ -57,6 +81,7 @@ go_library(
5781
"physical_map.go",
5882
"physical_map_amd64.go",
5983
"physical_map_arm64.go",
84+
"seccomp_mmap_unsafe.go",
6085
"virtual_map.go",
6186
],
6287
visibility = ["//pkg/sentry:internal"],

pkg/sentry/platform/kvm/bluepill.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ const _SYS_KVM_RETURN_TO_HOST = ^uintptr(0)
7070
//
7171
//go:nosplit
7272
func redpill() {
73-
unix.RawSyscall(_SYS_KVM_RETURN_TO_HOST, 0, 0, 0)
73+
kvmSyscallErrno(_SYS_KVM_RETURN_TO_HOST, 0, 0, 0)
7474
}
7575

7676
// dieHandler is called by dieTrampoline.

pkg/sentry/platform/kvm/bluepill_amd64.s

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,38 @@ TEXT ·currentCPU(SB), $0-8
9191
MOVQ ENTRY_CPU_SELF(GS), AX
9292
MOVQ AX, ret+0(FP)
9393
RET
94+
95+
// func kvmSyscallErrno6(trap, a1, a2, a3, a4, a5, a6 uintptr) (ret unix.Errno)
96+
TEXT ·kvmSyscallErrno6(SB),NOSPLIT,$0-64
97+
MOVQ a1+8(FP), DI
98+
MOVQ a2+16(FP), SI
99+
MOVQ a3+24(FP), DX
100+
MOVQ a4+32(FP), R10
101+
MOVQ a5+40(FP), R8
102+
MOVQ a6+48(FP), R9
103+
MOVQ trap+0(FP), AX // syscall entry
104+
SYSCALL
105+
CMPQ AX, $0xfffffffffffff001
106+
JLS ok
107+
NEGQ AX
108+
MOVQ AX, ret+56(FP) // ret
109+
RET
110+
ok:
111+
MOVQ $0, ret+56(FP) // ret
112+
RET
113+
114+
// func kvmSyscallErrno(trap, a1, a2, a3 uintptr) (ret unix.Errno)
115+
TEXT ·kvmSyscallErrno(SB),NOSPLIT,$0-40
116+
MOVQ a1+8(FP), DI
117+
MOVQ a2+16(FP), SI
118+
MOVQ a3+24(FP), DX
119+
MOVQ trap+0(FP), AX // syscall entry
120+
SYSCALL
121+
CMPQ AX, $0xfffffffffffff001
122+
JLS ok
123+
NEGQ AX
124+
MOVQ AX, ret+32(FP) // ret
125+
RET
126+
ok:
127+
MOVQ $0, ret+32(FP) // ret
128+
RET

pkg/sentry/platform/kvm/bluepill_amd64_unsafe.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ func getHypercallID(addr uintptr) int {
7474
func bluepillStopGuest(c *vCPU) {
7575
// Interrupt: we must have requested an interrupt
7676
// window; set the interrupt line.
77-
if _, _, errno := unix.RawSyscall(
77+
if errno := kvmSyscallErrno(
7878
unix.SYS_IOCTL,
7979
uintptr(c.fd),
8080
KVM_INTERRUPT,
@@ -89,7 +89,7 @@ func bluepillStopGuest(c *vCPU) {
8989
//
9090
//go:nosplit
9191
func bluepillSigBus(c *vCPU) {
92-
if _, _, errno := unix.RawSyscall( // escapes: no.
92+
if errno := kvmSyscallErrno(
9393
unix.SYS_IOCTL,
9494
uintptr(c.fd),
9595
KVM_NMI, 0); errno != 0 {
@@ -188,7 +188,6 @@ func bluepillUserHandler(frame uintptr) {
188188
sigframe.Sigreturn(c.bluepillSigframe)
189189
}
190190

191-
//go:nosplit
192191
func (c *vCPU) initBluepillHandler() error {
193192
stackSize := uintptr(hostarch.PageSize)
194193

pkg/sentry/platform/kvm/bluepill_arm64.s

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,38 @@ TEXT ·addrOfDieTrampoline(SB), $0-8
145145
MOVD $·dieTrampoline(SB), R0
146146
MOVD R0, ret+0(FP)
147147
RET
148+
149+
// func kvmSyscallErrno6(trap, a1, a2, a3, a4, a5, a6 uintptr) (errno unix.Errno)
150+
TEXT ·kvmSyscallErrno6(SB),NOSPLIT,$0-64
151+
MOVD trap+0(FP), R8 // syscall entry
152+
MOVD a1+8(FP), R0
153+
MOVD a2+16(FP), R1
154+
MOVD a3+24(FP), R2
155+
MOVD a4+32(FP), R3
156+
MOVD a5+40(FP), R4
157+
MOVD a6+48(FP), R5
158+
SVC
159+
CMN $4095, R0
160+
BCC ok
161+
NEG R0, R0
162+
MOVD R0, ret+56(FP)
163+
RET
164+
ok:
165+
MOVD $0, ret+56(FP)
166+
RET
167+
168+
// func kvmSyscallErrno(trap, a1, a2, a3 uintptr) (errno unix.Errno)
169+
TEXT ·kvmSyscallErrno(SB),NOSPLIT,$0-40
170+
MOVD trap+0(FP), R8 // syscall entry
171+
MOVD a1+8(FP), R0
172+
MOVD a2+16(FP), R1
173+
MOVD a3+24(FP), R2
174+
SVC
175+
CMN $4095, R0
176+
BCC ok
177+
NEG R0, R0
178+
MOVD R0, ret+32(FP)
179+
RET
180+
ok:
181+
MOVD ZR, ret+32(FP)
182+
RET

pkg/sentry/platform/kvm/bluepill_arm64_unsafe.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func bluepillStopGuest(c *vCPU) {
8888
},
8989
}
9090

91-
if _, _, errno := unix.RawSyscall( // escapes: no.
91+
if errno := kvmSyscallErrno( // escapes: no.
9292
unix.SYS_IOCTL,
9393
uintptr(c.fd),
9494
KVM_SET_VCPU_EVENTS,
@@ -111,7 +111,7 @@ func bluepillSigBus(c *vCPU) {
111111
}
112112

113113
// Host must support ARM64_HAS_RAS_EXTN.
114-
if _, _, errno := unix.RawSyscall( // escapes: no.
114+
if errno := kvmSyscallErrno( // escapes: no.
115115
unix.SYS_IOCTL,
116116
uintptr(c.fd),
117117
KVM_SET_VCPU_EVENTS,
@@ -134,7 +134,7 @@ func bluepillExtDabt(c *vCPU) {
134134
},
135135
}
136136

137-
if _, _, errno := unix.RawSyscall( // escapes: no.
137+
if errno := kvmSyscallErrno( // escapes: no.
138138
unix.SYS_IOCTL,
139139
uintptr(c.fd),
140140
KVM_SET_VCPU_EVENTS,

pkg/sentry/platform/kvm/bluepill_fault.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ var (
4040
//
4141
//go:nosplit
4242
func yield() {
43-
unix.RawSyscall(unix.SYS_SCHED_YIELD, 0, 0, 0)
43+
kvmSyscallErrno(unix.SYS_SCHED_YIELD, 0, 0, 0)
4444
}
4545

4646
// calculateBluepillFault calculates the fault address range.

0 commit comments

Comments
 (0)