Skip to content

Commit 1bec43f

Browse files
namhyungacmel
authored andcommitted
perf trace: Implement syscall summary in BPF
When -s/--summary option is used, it doesn't need (augmented) arguments of syscalls. Let's skip the augmentation and load another small BPF program to collect the statistics in the kernel instead of copying the data to the ring-buffer to calculate the stats in userspace. This will be much more light-weight than the existing approach and remove any lost events. Let's add a new option --bpf-summary to control this behavior. I cannot make it default because there's no way to get e_machine in the BPF which is needed for detecting different ABIs like 32-bit compat mode. No functional changes intended except for no more LOST events. :) $ sudo ./perf trace -as --summary-mode=total --bpf-summary sleep 1 Summary of events: total, 6194 events syscall calls errors total min avg max stddev (msec) (msec) (msec) (msec) (%) --------------- -------- ------ -------- --------- --------- --------- ------ epoll_wait 561 0 4530.843 0.000 8.076 520.941 18.75% futex 693 45 4317.231 0.000 6.230 500.077 21.98% poll 300 0 1040.109 0.000 3.467 120.928 17.02% clock_nanosleep 1 0 1000.172 1000.172 1000.172 1000.172 0.00% ppoll 360 0 872.386 0.001 2.423 253.275 41.91% epoll_pwait 14 0 384.349 0.001 27.453 380.002 98.79% pselect6 14 0 108.130 7.198 7.724 8.206 0.85% nanosleep 39 0 43.378 0.069 1.112 10.084 44.23% ... Reviewed-by: Howard Chu <[email protected]> Signed-off-by: Namhyung Kim <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Howard Chu <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Kan Liang <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected] [ Added fixup sent from Namhyung in response to my report to make it also dependent on CONFIG_TRACE ] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 85447f6 commit 1bec43f

File tree

8 files changed

+580
-13
lines changed

8 files changed

+580
-13
lines changed

tools/perf/Documentation/perf-trace.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,12 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
251251
pretty-printing serves as a fallback to hand-crafted pretty printers, as the latter can
252252
better pretty-print integer flags and struct pointers.
253253

254+
--bpf-summary::
255+
Collect system call statistics in BPF. This is only for live mode and
256+
works well with -s/--summary option where no argument information is
257+
required.
258+
259+
254260
PAGEFAULTS
255261
----------
256262

tools/perf/Makefile.perf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,7 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
11751175
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
11761176
SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
11771177
SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
1178-
SKELETONS += $(SKEL_OUT)/kwork_top.skel.h
1178+
SKELETONS += $(SKEL_OUT)/kwork_top.skel.h $(SKEL_OUT)/syscall_summary.skel.h
11791179
SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
11801180
SKELETONS += $(SKEL_OUT)/augmented_raw_syscalls.skel.h
11811181

tools/perf/builtin-trace.c

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
#include "util/thread_map.h"
5656
#include "util/stat.h"
5757
#include "util/tool.h"
58+
#include "util/trace.h"
5859
#include "util/util.h"
5960
#include "trace/beauty/beauty.h"
6061
#include "trace-event.h"
@@ -141,12 +142,6 @@ struct syscall_fmt {
141142
bool hexret;
142143
};
143144

144-
enum summary_mode {
145-
SUMMARY__NONE = 0,
146-
SUMMARY__BY_TOTAL,
147-
SUMMARY__BY_THREAD,
148-
};
149-
150145
struct trace {
151146
struct perf_tool tool;
152147
struct {
@@ -205,7 +200,7 @@ struct trace {
205200
} stats;
206201
unsigned int max_stack;
207202
unsigned int min_stack;
208-
enum summary_mode summary_mode;
203+
enum trace_summary_mode summary_mode;
209204
int raw_augmented_syscalls_args_size;
210205
bool raw_augmented_syscalls;
211206
bool fd_path_disabled;
@@ -234,6 +229,7 @@ struct trace {
234229
bool force;
235230
bool vfs_getname;
236231
bool force_btf;
232+
bool summary_bpf;
237233
int trace_pgfaults;
238234
char *perfconfig_events;
239235
struct {
@@ -2614,6 +2610,9 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr
26142610
struct syscall_stats *stats = NULL;
26152611
u64 duration = 0;
26162612

2613+
if (trace->summary_bpf)
2614+
return;
2615+
26172616
if (trace->summary_mode == SUMMARY__BY_TOTAL)
26182617
syscall_stats = trace->syscall_stats;
26192618

@@ -4377,6 +4376,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
43774376

43784377
trace->live = true;
43794378

4379+
if (trace->summary_bpf) {
4380+
if (trace_prepare_bpf_summary(trace->summary_mode) < 0)
4381+
goto out_delete_evlist;
4382+
4383+
if (trace->summary_only)
4384+
goto create_maps;
4385+
}
4386+
43804387
if (!trace->raw_augmented_syscalls) {
43814388
if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
43824389
goto out_error_raw_syscalls;
@@ -4435,6 +4442,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
44354442
if (trace->cgroup)
44364443
evlist__set_default_cgroup(trace->evlist, trace->cgroup);
44374444

4445+
create_maps:
44384446
err = evlist__create_maps(evlist, &trace->opts.target);
44394447
if (err < 0) {
44404448
fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
@@ -4447,7 +4455,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
44474455
goto out_delete_evlist;
44484456
}
44494457

4450-
if (trace->summary_mode == SUMMARY__BY_TOTAL) {
4458+
if (trace->summary_mode == SUMMARY__BY_TOTAL && !trace->summary_bpf) {
44514459
trace->syscall_stats = alloc_syscall_stats();
44524460
if (trace->syscall_stats == NULL)
44534461
goto out_delete_evlist;
@@ -4535,9 +4543,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
45354543
if (err < 0)
45364544
goto out_error_apply_filters;
45374545

4538-
err = evlist__mmap(evlist, trace->opts.mmap_pages);
4539-
if (err < 0)
4540-
goto out_error_mmap;
4546+
if (!trace->summary_only || !trace->summary_bpf) {
4547+
err = evlist__mmap(evlist, trace->opts.mmap_pages);
4548+
if (err < 0)
4549+
goto out_error_mmap;
4550+
}
45414551

45424552
if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay)
45434553
evlist__enable(evlist);
@@ -4550,6 +4560,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
45504560
evlist__enable(evlist);
45514561
}
45524562

4563+
if (trace->summary_bpf)
4564+
trace_start_bpf_summary();
4565+
45534566
trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
45544567
perf_thread_map__nr(evlist->core.threads) > 1 ||
45554568
evlist__first(evlist)->core.attr.inherit;
@@ -4617,12 +4630,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
46174630

46184631
evlist__disable(evlist);
46194632

4633+
if (trace->summary_bpf)
4634+
trace_end_bpf_summary();
4635+
46204636
if (trace->sort_events)
46214637
ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL);
46224638

46234639
if (!err) {
46244640
if (trace->summary) {
4625-
if (trace->summary_mode == SUMMARY__BY_TOTAL)
4641+
if (trace->summary_bpf)
4642+
trace_print_bpf_summary(trace->output);
4643+
else if (trace->summary_mode == SUMMARY__BY_TOTAL)
46264644
trace__fprintf_total_summary(trace, trace->output);
46274645
else
46284646
trace__fprintf_thread_summary(trace, trace->output);
@@ -4638,6 +4656,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
46384656
}
46394657

46404658
out_delete_evlist:
4659+
trace_cleanup_bpf_summary();
46414660
delete_syscall_stats(trace->syscall_stats);
46424661
trace__symbols__exit(trace);
46434662
evlist__free_syscall_tp_fields(evlist);
@@ -5473,6 +5492,7 @@ int cmd_trace(int argc, const char **argv)
54735492
"start"),
54745493
OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
54755494
"to customized ones"),
5495+
OPT_BOOLEAN(0, "bpf-summary", &trace.summary_bpf, "Summary syscall stats in BPF"),
54765496
OPTS_EVSWITCH(&trace.evswitch),
54775497
OPT_END()
54785498
};
@@ -5564,6 +5584,16 @@ int cmd_trace(int argc, const char **argv)
55645584
goto skip_augmentation;
55655585
}
55665586

5587+
if (trace.summary_bpf) {
5588+
if (!trace.opts.target.system_wide) {
5589+
/* TODO: Add filters in the BPF to support other targets. */
5590+
pr_err("Error: --bpf-summary only works for system-wide mode.\n");
5591+
goto out;
5592+
}
5593+
if (trace.summary_only)
5594+
goto skip_augmentation;
5595+
}
5596+
55675597
trace.skel = augmented_raw_syscalls_bpf__open();
55685598
if (!trace.skel) {
55695599
pr_debug("Failed to open augmented syscalls BPF skeleton");

tools/perf/util/Build

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
173173
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
174174
perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
175175

176+
ifeq ($(CONFIG_TRACE),y)
177+
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o
178+
endif
179+
176180
ifeq ($(CONFIG_LIBTRACEEVENT),y)
177181
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
178182
endif

0 commit comments

Comments
 (0)