Skip to content

Commit daa5f57

Browse files
committed
Calculate the padding automatically
1 parent f89e006 commit daa5f57

File tree

1 file changed

+43
-23
lines changed

1 file changed

+43
-23
lines changed

Python/perf_jit_trampoline.c

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,9 @@
101101
* /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding]
102102
* /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding]
103103
*
104-
* The padding size (0x100) is chosen to accommodate typical unwind info sizes
105-
* while maintaining 16-byte alignment requirements.
104+
* The padding size is now calculated automatically during initialization
105+
* based on the actual unwind information requirements.
106106
*/
107-
#define PERF_JIT_CODE_PADDING 0x100
108107

109108

110109
/* These constants are defined inside <elf.h>, which we can't use outside of linux. */
@@ -666,6 +665,8 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
666665
// DWARF EH FRAME GENERATION
667666
// =============================================================================
668667

668+
static void elf_init_ehframe(ELFObjectContext* ctx);
669+
669670
/*
670671
* Initialize DWARF .eh_frame section for a code region
671672
*
@@ -680,6 +681,23 @@ static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) {
680681
* Args:
681682
* ctx: ELF object context containing code size and buffer pointers
682683
*/
684+
static size_t calculate_eh_frame_size(void) {
685+
/* Calculate the EH frame size for the trampoline function */
686+
extern void *_Py_trampoline_func_start;
687+
extern void *_Py_trampoline_func_end;
688+
689+
size_t code_size = (char*)&_Py_trampoline_func_end - (char*)&_Py_trampoline_func_start;
690+
691+
ELFObjectContext ctx;
692+
char buffer[1024]; // Buffer for DWARF data (1KB should be sufficient)
693+
ctx.code_size = code_size;
694+
ctx.startp = ctx.p = (uint8_t*)buffer;
695+
ctx.fde_p = NULL;
696+
697+
elf_init_ehframe(&ctx);
698+
return ctx.p - ctx.startp;
699+
}
700+
683701
static void elf_init_ehframe(ELFObjectContext* ctx) {
684702
uint8_t* p = ctx->p;
685703
uint8_t* framep = p; // Remember start of frame data
@@ -876,7 +894,7 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
876894
*
877895
* The FDE describes unwinding information specific to this function.
878896
* It references the CIE and provides function-specific CFI instructions.
879-
*
897+
*
880898
* The PC-relative offset is calculated after the entire EH frame is built
881899
* to ensure accurate positioning relative to the synthesized DSO layout.
882900
*/
@@ -901,16 +919,16 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
901919
# endif
902920
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past push %rbp (1 byte)
903921
DWRF_U8(DWRF_CFA_def_cfa_offset); // def_cfa_offset 16
904-
DWRF_UV(16);
922+
DWRF_UV(16); // New offset: SP + 16
905923
DWRF_U8(DWRF_CFA_offset | DWRF_REG_BP); // offset r6 at cfa-16
906-
DWRF_UV(2);
924+
DWRF_UV(2); // Offset factor: 2 * 8 = 16 bytes
907925
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past mov %rsp,%rbp (3 bytes)
908926
DWRF_U8(DWRF_CFA_def_cfa_register); // def_cfa_register r6
909-
DWRF_UV(DWRF_REG_BP);
927+
DWRF_UV(DWRF_REG_BP); // Use base pointer register
910928
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance past call *%rcx (2 bytes) + pop %rbp (1 byte) = 3
911929
DWRF_U8(DWRF_CFA_def_cfa); // def_cfa r7 ofs 8
912-
DWRF_UV(DWRF_REG_SP);
913-
DWRF_UV(8);
930+
DWRF_UV(DWRF_REG_SP); // Use stack pointer register
931+
DWRF_UV(8); // New offset: SP + 8
914932
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
915933
/* AArch64 calling convention unwinding rules */
916934
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance location by 1 instruction (stp x29, x30)
@@ -934,11 +952,11 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
934952
)
935953

936954
ctx->p = p; // Update context pointer to end of generated data
937-
955+
938956
/* Calculate and update the PC-relative offset in the FDE
939-
*
957+
*
940958
* When perf processes the jitdump, it creates a synthesized DSO with this layout:
941-
*
959+
*
942960
* Synthesized DSO Memory Layout:
943961
* ┌─────────────────────────────────────────────────────────────┐ < code_start
944962
* │ Code Section │
@@ -956,33 +974,33 @@ static void elf_init_ehframe(ELFObjectContext* ctx) {
956974
* │ │ CFI Instructions... │ │
957975
* │ └─────────────────────────────────────────────────────┘ │
958976
* ├─────────────────────────────────────────────────────────────┤ < reference_point
959-
* │ EhFrameHeader │
977+
* │ EhFrameHeader │
960978
* │ (navigation metadata) │
961979
* └─────────────────────────────────────────────────────────────┘
962-
*
980+
*
963981
* The PC offset field in the FDE must contain the distance from itself to code_start:
964-
*
982+
*
965983
* distance = code_start - fde_pc_field
966-
*
984+
*
967985
* Where:
968986
* fde_pc_field_location = reference_point - eh_frame_size + fde_offset_in_frame
969987
* code_start_location = reference_point - eh_frame_size - round_up(code_size, 8)
970-
*
988+
*
971989
* Therefore:
972990
* distance = code_start_location - fde_pc_field_location
973991
* = (ref - eh_frame_size - rounded_code_size) - (ref - eh_frame_size + fde_offset_in_frame)
974992
* = -rounded_code_size - fde_offset_in_frame
975993
* = -(round_up(code_size, 8) + fde_offset_in_frame)
976994
*
977995
* Note: fde_offset_in_frame is the offset from EH frame start to the PC offset field,
978-
*
996+
*
979997
*/
980998
if (ctx->fde_p != NULL) {
981999
int32_t fde_offset_in_frame = (ctx->fde_p - ctx->startp);
9821000
int32_t rounded_code_size = round_up(ctx->code_size, 8);
9831001
int32_t pc_relative_offset = -(rounded_code_size + fde_offset_in_frame);
984-
985-
1002+
1003+
9861004
// Update the PC-relative offset in the FDE
9871005
*(int32_t*)ctx->fde_p = pc_relative_offset;
9881006
}
@@ -1091,8 +1109,10 @@ static void* perf_map_jit_init(void) {
10911109
/* Initialize code ID counter */
10921110
perf_jit_map_state.code_id = 0;
10931111

1094-
/* Configure trampoline API with padding information */
1095-
trampoline_api.code_padding = PERF_JIT_CODE_PADDING;
1112+
/* Calculate padding size based on actual unwind info requirements */
1113+
size_t eh_frame_size = calculate_eh_frame_size();
1114+
size_t unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size;
1115+
trampoline_api.code_padding = round_up(unwind_data_size, 16);
10961116

10971117
return &perf_jit_map_state;
10981118
}
@@ -1200,7 +1220,7 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
12001220
ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size;
12011221

12021222
/* Verify we don't exceed our padding budget */
1203-
assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING);
1223+
assert(ev2.unwind_data_size <= (uint64_t)trampoline_api.code_padding);
12041224

12051225
ev2.eh_frame_hdr_size = sizeof(EhFrameHeader);
12061226
ev2.mapped_size = round_up(ev2.unwind_data_size, 16); // 16-byte alignment

0 commit comments

Comments
 (0)