Skip to content

Commit 9014cf5

Browse files
Xu KuohaiAlexei Starovoitov
authored andcommitted
bpf, arm64: Support up to 12 function arguments
Currently ARM64 bpf trampoline supports up to 8 function arguments. According to the statistics from commit 473e315 ("bpf, x86: allow function arguments up to 12 for TRACING"), there are about 200 functions accept 9 to 12 arguments, so adding support for up to 12 function arguments. Due to bpf only supporting function arguments up to 16 bytes, according to AAPCS64, starting from the first argument, each argument is first attempted to be loaded to 1 or 2 smallest registers from x0-x7, if there are no enough registers to hold the entire argument, then all remaining arguments starting from this one are pushed to the stack for passing. There are some non-trivial cases for which it is not possible to correctly read arguments from/write arguments to the stack: for example struct variables may have custom packing/alignment attributes that are invisible in BTF info. Such cases are denied for now to make sure not to read incorrect values. Signed-off-by: Xu Kuohai <[email protected]> Co-developed-by: Alexis Lothoré (eBPF Foundation) <[email protected]> Signed-off-by: Alexis Lothoré (eBPF Foundation) <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent d496557 commit 9014cf5

File tree

1 file changed

+171
-54
lines changed

1 file changed

+171
-54
lines changed

arch/arm64/net/bpf_jit_comp.c

Lines changed: 171 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2064,7 +2064,7 @@ bool bpf_jit_supports_subprog_tailcalls(void)
20642064
}
20652065

20662066
static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
2067-
int args_off, int retval_off, int run_ctx_off,
2067+
int bargs_off, int retval_off, int run_ctx_off,
20682068
bool save_ret)
20692069
{
20702070
__le32 *branch;
@@ -2106,7 +2106,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
21062106
branch = ctx->image + ctx->idx;
21072107
emit(A64_NOP, ctx);
21082108

2109-
emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
2109+
emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx);
21102110
if (!p->jited)
21112111
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
21122112

@@ -2131,7 +2131,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
21312131
}
21322132

21332133
static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
2134-
int args_off, int retval_off, int run_ctx_off,
2134+
int bargs_off, int retval_off, int run_ctx_off,
21352135
__le32 **branches)
21362136
{
21372137
int i;
@@ -2141,7 +2141,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
21412141
*/
21422142
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
21432143
for (i = 0; i < tl->nr_links; i++) {
2144-
invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
2144+
invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
21452145
run_ctx_off, true);
21462146
/* if (*(u64 *)(sp + retval_off) != 0)
21472147
* goto do_fexit;
@@ -2155,23 +2155,125 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
21552155
}
21562156
}
21572157

2158-
static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
2158+
struct arg_aux {
2159+
/* how many args are passed through registers, the rest of the args are
2160+
* passed through stack
2161+
*/
2162+
int args_in_regs;
2163+
/* how many registers are used to pass arguments */
2164+
int regs_for_args;
2165+
/* how much stack is used for additional args passed to bpf program
2166+
* that did not fit in original function registers
2167+
*/
2168+
int bstack_for_args;
2169+
/* home much stack is used for additional args passed to the
2170+
* original function when called from trampoline (this one needs
2171+
* arguments to be properly aligned)
2172+
*/
2173+
int ostack_for_args;
2174+
};
2175+
2176+
static int calc_arg_aux(const struct btf_func_model *m,
2177+
struct arg_aux *a)
21592178
{
2160-
int i;
2179+
int stack_slots, nregs, slots, i;
2180+
2181+
/* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */
2182+
for (i = 0, nregs = 0; i < m->nr_args; i++) {
2183+
slots = (m->arg_size[i] + 7) / 8;
2184+
if (nregs + slots <= 8) /* passed through register ? */
2185+
nregs += slots;
2186+
else
2187+
break;
2188+
}
2189+
2190+
a->args_in_regs = i;
2191+
a->regs_for_args = nregs;
2192+
a->ostack_for_args = 0;
2193+
a->bstack_for_args = 0;
21612194

2162-
for (i = 0; i < nregs; i++) {
2163-
emit(A64_STR64I(i, A64_SP, args_off), ctx);
2164-
args_off += 8;
2195+
/* the rest arguments are passed through stack */
2196+
for (; i < m->nr_args; i++) {
2197+
/* We can not know for sure about exact alignment needs for
2198+
* struct passed on stack, so deny those
2199+
*/
2200+
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
2201+
return -ENOTSUPP;
2202+
stack_slots = (m->arg_size[i] + 7) / 8;
2203+
a->bstack_for_args += stack_slots * 8;
2204+
a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
2205+
}
2206+
2207+
return 0;
2208+
}
2209+
2210+
static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
2211+
{
2212+
if (effective_bytes) {
2213+
int garbage_bits = 64 - 8 * effective_bytes;
2214+
#ifdef CONFIG_CPU_BIG_ENDIAN
2215+
/* garbage bits are at the right end */
2216+
emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2217+
emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2218+
#else
2219+
/* garbage bits are at the left end */
2220+
emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
2221+
emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
2222+
#endif
21652223
}
21662224
}
21672225

2168-
static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
2226+
static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off,
2227+
const struct btf_func_model *m,
2228+
const struct arg_aux *a,
2229+
bool for_call_origin)
21692230
{
21702231
int i;
2232+
int reg;
2233+
int doff;
2234+
int soff;
2235+
int slots;
2236+
u8 tmp = bpf2a64[TMP_REG_1];
2237+
2238+
/* store arguments to the stack for the bpf program, or restore
2239+
* arguments from stack for the original function
2240+
*/
2241+
for (reg = 0; reg < a->regs_for_args; reg++) {
2242+
emit(for_call_origin ?
2243+
A64_LDR64I(reg, A64_SP, bargs_off) :
2244+
A64_STR64I(reg, A64_SP, bargs_off),
2245+
ctx);
2246+
bargs_off += 8;
2247+
}
21712248

2172-
for (i = 0; i < nregs; i++) {
2173-
emit(A64_LDR64I(i, A64_SP, args_off), ctx);
2174-
args_off += 8;
2249+
soff = 32; /* on stack arguments start from FP + 32 */
2250+
doff = (for_call_origin ? oargs_off : bargs_off);
2251+
2252+
/* save on stack arguments */
2253+
for (i = a->args_in_regs; i < m->nr_args; i++) {
2254+
slots = (m->arg_size[i] + 7) / 8;
2255+
/* verifier ensures arg_size <= 16, so slots equals 1 or 2 */
2256+
while (slots-- > 0) {
2257+
emit(A64_LDR64I(tmp, A64_FP, soff), ctx);
2258+
/* if there is unused space in the last slot, clear
2259+
* the garbage contained in the space.
2260+
*/
2261+
if (slots == 0 && !for_call_origin)
2262+
clear_garbage(ctx, tmp, m->arg_size[i] % 8);
2263+
emit(A64_STR64I(tmp, A64_SP, doff), ctx);
2264+
soff += 8;
2265+
doff += 8;
2266+
}
2267+
}
2268+
}
2269+
2270+
static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
2271+
{
2272+
int reg;
2273+
2274+
for (reg = 0; reg < nregs; reg++) {
2275+
emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx);
2276+
bargs_off += 8;
21752277
}
21762278
}
21772279

@@ -2194,17 +2296,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
21942296
*/
21952297
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
21962298
struct bpf_tramp_links *tlinks, void *func_addr,
2197-
int nregs, u32 flags)
2299+
const struct btf_func_model *m,
2300+
const struct arg_aux *a,
2301+
u32 flags)
21982302
{
21992303
int i;
22002304
int stack_size;
22012305
int retaddr_off;
22022306
int regs_off;
22032307
int retval_off;
2204-
int args_off;
2205-
int nregs_off;
2308+
int bargs_off;
2309+
int nfuncargs_off;
22062310
int ip_off;
22072311
int run_ctx_off;
2312+
int oargs_off;
2313+
int nfuncargs;
22082314
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
22092315
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
22102316
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2213,31 +2319,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
22132319
bool is_struct_ops = is_struct_ops_tramp(fentry);
22142320

22152321
/* trampoline stack layout:
2216-
* [ parent ip ]
2217-
* [ FP ]
2218-
* SP + retaddr_off [ self ip ]
2219-
* [ FP ]
2322+
* [ parent ip ]
2323+
* [ FP ]
2324+
* SP + retaddr_off [ self ip ]
2325+
* [ FP ]
22202326
*
2221-
* [ padding ] align SP to multiples of 16
2327+
* [ padding ] align SP to multiples of 16
22222328
*
2223-
* [ x20 ] callee saved reg x20
2224-
* SP + regs_off [ x19 ] callee saved reg x19
2329+
* [ x20 ] callee saved reg x20
2330+
* SP + regs_off [ x19 ] callee saved reg x19
22252331
*
2226-
* SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
2227-
* BPF_TRAMP_F_RET_FENTRY_RET
2332+
* SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
2333+
* BPF_TRAMP_F_RET_FENTRY_RET
2334+
* [ arg reg N ]
2335+
* [ ... ]
2336+
* SP + bargs_off [ arg reg 1 ] for bpf
22282337
*
2229-
* [ arg reg N ]
2230-
* [ ... ]
2231-
* SP + args_off [ arg reg 1 ]
2338+
* SP + nfuncargs_off [ arg regs count ]
22322339
*
2233-
* SP + nregs_off [ arg regs count ]
2340+
* SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
22342341
*
2235-
* SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
2342+
* SP + run_ctx_off [ bpf_tramp_run_ctx ]
22362343
*
2237-
* SP + run_ctx_off [ bpf_tramp_run_ctx ]
2344+
* [ stack arg N ]
2345+
* [ ... ]
2346+
* SP + oargs_off [ stack arg 1 ] for original func
22382347
*/
22392348

22402349
stack_size = 0;
2350+
oargs_off = stack_size;
2351+
if (flags & BPF_TRAMP_F_CALL_ORIG)
2352+
stack_size += a->ostack_for_args;
2353+
22412354
run_ctx_off = stack_size;
22422355
/* room for bpf_tramp_run_ctx */
22432356
stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
@@ -2247,13 +2360,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
22472360
if (flags & BPF_TRAMP_F_IP_ARG)
22482361
stack_size += 8;
22492362

2250-
nregs_off = stack_size;
2363+
nfuncargs_off = stack_size;
22512364
/* room for args count */
22522365
stack_size += 8;
22532366

2254-
args_off = stack_size;
2367+
bargs_off = stack_size;
22552368
/* room for args */
2256-
stack_size += nregs * 8;
2369+
nfuncargs = a->regs_for_args + a->bstack_for_args / 8;
2370+
stack_size += 8 * nfuncargs;
22572371

22582372
/* room for return value */
22592373
retval_off = stack_size;
@@ -2300,11 +2414,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23002414
}
23012415

23022416
/* save arg regs count*/
2303-
emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
2304-
emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
2417+
emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx);
2418+
emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx);
23052419

2306-
/* save arg regs */
2307-
save_args(ctx, args_off, nregs);
2420+
/* save args for bpf */
2421+
save_args(ctx, bargs_off, oargs_off, m, a, false);
23082422

23092423
/* save callee saved registers */
23102424
emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2320,7 +2434,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23202434
}
23212435

23222436
for (i = 0; i < fentry->nr_links; i++)
2323-
invoke_bpf_prog(ctx, fentry->links[i], args_off,
2437+
invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
23242438
retval_off, run_ctx_off,
23252439
flags & BPF_TRAMP_F_RET_FENTRY_RET);
23262440

@@ -2330,12 +2444,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23302444
if (!branches)
23312445
return -ENOMEM;
23322446

2333-
invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
2447+
invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off,
23342448
run_ctx_off, branches);
23352449
}
23362450

23372451
if (flags & BPF_TRAMP_F_CALL_ORIG) {
2338-
restore_args(ctx, args_off, nregs);
2452+
/* save args for original func */
2453+
save_args(ctx, bargs_off, oargs_off, m, a, true);
23392454
/* call original func */
23402455
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
23412456
emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
@@ -2354,7 +2469,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23542469
}
23552470

23562471
for (i = 0; i < fexit->nr_links; i++)
2357-
invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
2472+
invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
23582473
run_ctx_off, false);
23592474

23602475
if (flags & BPF_TRAMP_F_CALL_ORIG) {
@@ -2368,7 +2483,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
23682483
}
23692484

23702485
if (flags & BPF_TRAMP_F_RESTORE_REGS)
2371-
restore_args(ctx, args_off, nregs);
2486+
restore_args(ctx, bargs_off, a->regs_for_args);
23722487

23732488
/* restore callee saved register x19 and x20 */
23742489
emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2428,14 +2543,16 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
24282543
.idx = 0,
24292544
};
24302545
struct bpf_tramp_image im;
2546+
struct arg_aux aaux;
24312547
int nregs, ret;
24322548

24332549
nregs = btf_func_model_nregs(m);
2434-
/* the first 8 registers are used for arguments */
2435-
if (nregs > 8)
2436-
return -ENOTSUPP;
24372550

2438-
ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
2551+
ret = calc_arg_aux(m, &aaux);
2552+
if (ret < 0)
2553+
return ret;
2554+
2555+
ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
24392556
if (ret < 0)
24402557
return ret;
24412558

@@ -2462,9 +2579,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
24622579
u32 flags, struct bpf_tramp_links *tlinks,
24632580
void *func_addr)
24642581
{
2465-
int ret, nregs;
2466-
void *image, *tmp;
24672582
u32 size = ro_image_end - ro_image;
2583+
struct arg_aux aaux;
2584+
void *image, *tmp;
2585+
int ret;
24682586

24692587
/* image doesn't need to be in module memory range, so we can
24702588
* use kvmalloc.
@@ -2480,13 +2598,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
24802598
.write = true,
24812599
};
24822600

2483-
nregs = btf_func_model_nregs(m);
2484-
/* the first 8 registers are used for arguments */
2485-
if (nregs > 8)
2486-
return -ENOTSUPP;
24872601

24882602
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2489-
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
2603+
ret = calc_arg_aux(m, &aaux);
2604+
if (ret)
2605+
goto out;
2606+
ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
24902607

24912608
if (ret > 0 && validate_code(&ctx) < 0) {
24922609
ret = -EINVAL;

0 commit comments

Comments
 (0)