Skip to content

Commit e76c3af

Browse files
committed
Support dynamic linking
1 parent e95fccc commit e76c3af

File tree

15 files changed

+788
-168
lines changed

15 files changed

+788
-168
lines changed

Makefile

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ STAGE0 := shecc
3636
STAGE1 := shecc-stage1.elf
3737
STAGE2 := shecc-stage2.elf
3838

39+
BUILTIN_LIBC ?= c.c
40+
STAGE0_FLAGS ?= --dump-ir
41+
STAGE1_FLAGS ?=
42+
ifeq ($(DYNLINK),1)
43+
BUILTIN_LIBC := c.h
44+
STAGE0_FLAGS += --dynlink
45+
STAGE1_FLAGS += --dynlink
46+
endif
47+
3948
OUT ?= out
4049
ARCHS = arm riscv
4150
ARCH ?= $(firstword $(ARCHS))
@@ -122,9 +131,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
122131
$(VECHO) " CC+LD\t$@\n"
123132
$(Q)$(CC) $(CFLAGS) -o $@ $^
124133

125-
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
134+
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
126135
$(VECHO) " GEN\t$@\n"
127-
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
136+
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
128137
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
129138
$(Q)$(RM) $(OUT)/c.normalized.c
130139

@@ -143,12 +152,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
143152
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
144153
$(Q)$(STAGE1_CHECK_CMD)
145154
$(VECHO) " SHECC\t$@\n"
146-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
155+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
147156
$(Q)chmod a+x $@
148157

149158
$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
150159
$(VECHO) " SHECC\t$@\n"
151-
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
160+
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c
152161

153162
bootstrap: $(OUT)/$(STAGE2)
154163
$(Q)chmod 775 $(OUT)/$(STAGE2)

lib/c.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
#pragma once
9+
/* Declarations of C standard library functions */
10+
11+
#define NULL 0
12+
13+
#define bool _Bool
14+
#define true 1
15+
#define false 0
16+
17+
/* File I/O */
18+
typedef int FILE;
19+
FILE *fopen(char *filename, char *mode);
20+
int fclose(FILE *stream);
21+
int fgetc(FILE *stream);
22+
char *fgets(char *str, int n, FILE *stream);
23+
int fputc(int c, FILE *stream);
24+
25+
/* string-related functions */
26+
int strlen(char *str);
27+
int strcmp(char *s1, char *s2);
28+
int strncmp(char *s1, char *s2, int len);
29+
char *strcpy(char *dest, char *src);
30+
char *strncpy(char *dest, char *src, int len);
31+
char *memcpy(char *dest, char *src, int count);
32+
int memcmp(void *s1, void *s2, int n);
33+
void *memset(void *s, int c, int n);
34+
35+
/* formatted output string */
36+
int printf(char *str, ...);
37+
int sprintf(char *buffer, char *str, ...);
38+
int snprintf(char *buffer, int n, char *str, ...);
39+
40+
/* Terminating program */
41+
void exit(int exit_code);
42+
void abort(void);
43+
44+
/* Dynamic memory allocation/deallocation functions */
45+
void *malloc(int size);
46+
void *calloc(int n, int size);
47+
void free(void *ptr);

mk/arm.mk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,10 @@ ARCH_DEFS = \
66
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
77
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
88
\#define ELF_FLAGS 0x5000200\n$\
9+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
10+
\#define LIBC_SO \"libc.so.6\"\n$\
11+
\#define PLT_FIXUP_SIZE 20\n$\
12+
\#define PLT_ENT_SIZE 12\n$\
13+
\#define R_ARCH_JUMP_SLOT 0x16\n$\
914
"
15+
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/

mk/common.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))
3636

3737
# Generate the path to the architecture-specific qemu
3838
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
39+
ifeq ($(DYNLINK),1)
40+
TARGET_EXEC += $(RUNNER_LD_PREFIX)
41+
endif
3942
endif
4043
export TARGET_EXEC
4144

mk/riscv.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,12 @@ ARCH_DEFS = \
77
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
88
\#define ELF_MACHINE 0xf3\n$\
99
\#define ELF_FLAGS 0\n$\
10+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
11+
\#define LIBC_SO \"libc.so.6\"\n$\
12+
\#define PLT_FIXUP_SIZE 20\n$\
13+
\#define PLT_ENT_SIZE 12\n$\
14+
\#define R_ARCH_JUMP_SLOT 0x5\n$\
1015
"
16+
17+
# TODO: Set this variable for RISC-V architecture
18+
RUNNER_LD_PREFIX=

src/arm-codegen.c

Lines changed: 103 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,17 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
136136

137137
void cfg_flatten(void)
138138
{
139-
func_t *func = find_func("__syscall");
140-
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
139+
func_t *func;
140+
141+
if (dynlink)
142+
elf_offset = 88; /* offset of start + branch + exit in codegen */
143+
else {
144+
func = find_func("__syscall");
145+
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
146+
elf_offset =
147+
84; /* offset of start + branch + exit + syscall in codegen */
148+
}
141149

142-
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
143150
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
144151

145152
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -148,7 +155,10 @@ void cfg_flatten(void)
148155
}
149156

150157
/* prepare 'argc' and 'argv', then proceed to 'main' function */
151-
elf_offset += 32; /* 6 insns for main call + 2 for exit */
158+
if (dynlink)
159+
elf_offset += 20;
160+
else
161+
elf_offset += 32; /* 6 insns for main call + 2 for exit */
152162

153163
for (func = FUNC_LIST.head; func; func = func->next) {
154164
/* Skip function declarations without bodies */
@@ -287,7 +297,12 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
287297
return;
288298
case OP_call:
289299
func = find_func(ph2_ir->func_name);
290-
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
300+
if (func->bbs)
301+
ofs = func->bbs->elf_offset - elf_code->size;
302+
else
303+
ofs = (elf_plt_start + func->plt_offset) -
304+
(elf_code_start + elf_code->size);
305+
emit(__bl(__AL, ofs));
291306
return;
292307
case OP_load_data_address:
293308
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
@@ -299,7 +314,10 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
299314
return;
300315
case OP_address_of_func:
301316
func = find_func(ph2_ir->func_name);
302-
ofs = elf_code_start + func->bbs->elf_offset;
317+
if (func->bbs)
318+
ofs = elf_code_start + func->bbs->elf_offset;
319+
else
320+
ofs = elf_plt_start + func->plt_offset;
303321
emit(__movw(__AL, __r8, ofs));
304322
emit(__movt(__AL, __r8, ofs));
305323
emit(__sw(__AL, __r8, rn, 0));
@@ -456,13 +474,42 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
456474
}
457475
}
458476

477+
void plt_generate(void);
459478
void code_generate(void)
460479
{
461-
elf_data_start = elf_code_start + elf_offset;
462-
elf_rodata_start = elf_data_start + elf_data->size;
463-
elf_bss_start = elf_rodata_start + elf_rodata->size;
480+
if (dynlink) {
481+
plt_generate();
482+
/* Call __libc_start_main() */
483+
emit(__mov_i(__AL, __r11, 0));
484+
emit(__mov_i(__AL, __lr, 0));
485+
emit(__pop_word(__AL, __r1));
486+
emit(__mov_r(__AL, __r2, __sp));
487+
emit(__push_reg(__AL, __r2));
488+
emit(__push_reg(__AL, __r0));
489+
emit(__mov_i(__AL, __r12, 0));
490+
emit(__push_reg(__AL, __r12));
491+
492+
int main_wrapper_offset = elf_code->size + 24;
493+
emit(__movw(__AL, __r0, elf_code_start + main_wrapper_offset));
494+
emit(__movt(__AL, __r0, elf_code_start + main_wrapper_offset));
495+
emit(__mov_i(__AL, __r3, 0));
496+
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
497+
(elf_code_start + elf_code->size)));
498+
/* Goto the 'exit' code snippet if __libc_start_main returns */
499+
emit(__mov_i(__AL, __r0, 127));
500+
emit(__bl(__AL, 28));
464501

465-
/* start */
502+
/* If the compiled program is dynamic linking, the starting
503+
* point of 'start' is located here.
504+
*
505+
* Preserve 'argc' and 'argv' for the 'main' function.
506+
* */
507+
emit(__mov_r(__AL, __r9, __r0));
508+
emit(__mov_r(__AL, __r10, __r1));
509+
}
510+
/* For both static and dynamic linking, we need to set up the stack
511+
* and call the main function.
512+
* */
466513
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
467514
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
468515
emit(__sub_r(__AL, __sp, __sp, __r8));
@@ -471,24 +518,26 @@ void code_generate(void)
471518
/* After global init, jump to main preparation */
472519
emit(__b(__AL, 56)); /* PC+8: skip exit (24) + syscall (36) + ret (4) - 8 */
473520

474-
/* exit */
475-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
476-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
477-
emit(__add_r(__AL, __sp, __sp, __r8));
478-
emit(__mov_r(__AL, __r0, __r0));
479-
emit(__mov_i(__AL, __r7, 1));
480-
emit(__svc());
521+
if (!dynlink) {
522+
/* exit - only for static linking */
523+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
524+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
525+
emit(__add_r(__AL, __sp, __sp, __r8));
526+
emit(__mov_r(__AL, __r0, __r0));
527+
emit(__mov_i(__AL, __r7, 1));
528+
emit(__svc());
481529

482-
/* syscall */
483-
emit(__mov_r(__AL, __r7, __r0));
484-
emit(__mov_r(__AL, __r0, __r1));
485-
emit(__mov_r(__AL, __r1, __r2));
486-
emit(__mov_r(__AL, __r2, __r3));
487-
emit(__mov_r(__AL, __r3, __r4));
488-
emit(__mov_r(__AL, __r4, __r5));
489-
emit(__mov_r(__AL, __r5, __r6));
490-
emit(__svc());
491-
emit(__bx(__AL, __lr));
530+
/* syscall */
531+
emit(__mov_r(__AL, __r7, __r0));
532+
emit(__mov_r(__AL, __r0, __r1));
533+
emit(__mov_r(__AL, __r1, __r2));
534+
emit(__mov_r(__AL, __r2, __r3));
535+
emit(__mov_r(__AL, __r3, __r4));
536+
emit(__mov_r(__AL, __r4, __r5));
537+
emit(__mov_r(__AL, __r5, __r6));
538+
emit(__svc());
539+
emit(__bx(__AL, __lr));
540+
}
492541

493542
ph2_ir_t *ph2_ir;
494543
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -497,11 +546,16 @@ void code_generate(void)
497546

498547
/* prepare 'argc' and 'argv', then proceed to 'main' function */
499548
if (MAIN_BB) {
500-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
501-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
502-
emit(__add_r(__AL, __r8, __r12, __r8));
503-
emit(__lw(__AL, __r0, __r8, 0));
504-
emit(__add_i(__AL, __r1, __r8, 4));
549+
if (dynlink) {
550+
emit(__mov_r(__AL, __r0, __r9));
551+
emit(__mov_r(__AL, __r1, __r10));
552+
} else {
553+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
554+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
555+
emit(__add_r(__AL, __r8, __r12, __r8));
556+
emit(__lw(__AL, __r0, __r8, 0));
557+
emit(__add_i(__AL, __r1, __r8, 4));
558+
}
505559
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
506560

507561
/* exit with main's return value - r0 already has the return value */
@@ -514,3 +568,20 @@ void code_generate(void)
514568
emit_ph2_ir(ph2_ir);
515569
}
516570
}
571+
572+
void plt_generate(void)
573+
{
574+
int addr_of_got = elf_got_start + PTR_SIZE * 2;
575+
int end = plt_size - PLT_FIXUP_SIZE;
576+
elf_write_int(elf_plt, __push_reg(__AL, __lr));
577+
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
578+
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
579+
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
580+
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
581+
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
582+
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
583+
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
584+
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
585+
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
586+
}
587+
}

src/arm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
312312
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
313313
}
314314

315+
int __push_reg(arm_cond_t cond, arm_reg rt)
316+
{
317+
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
318+
}
319+
320+
int __pop_word(arm_cond_t cond, arm_reg rt)
321+
{
322+
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
323+
}
324+
315325
int __b(arm_cond_t cond, int ofs)
316326
{
317327
int o = (ofs - 8) >> 2;

src/defs.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,18 @@
3131
#define MAX_SYMTAB 65536
3232
#define MAX_STRTAB 65536
3333
#define MAX_HEADER 1024
34+
#define MAX_PROGRAM_HEADER 1024
3435
#define MAX_SECTION 1024
3536
#define MAX_ALIASES 128
37+
#define MAX_SECTION_HEADER 1024
38+
#define MAX_SHSTR 1024
39+
#define MAX_INTERP 1024
40+
#define MAX_DYNAMIC 1024
41+
#define MAX_DYNSYM 1024
42+
#define MAX_DYNSTR 1024
43+
#define MAX_RELPLT 1024
44+
#define MAX_PLT 1024
45+
#define MAX_GOTPLT 1024
3646
#define MAX_CONSTANTS 1024
3747
#define MAX_CASES 128
3848
#define MAX_NESTING 128
@@ -581,6 +591,11 @@ struct func {
581591
int bb_cnt;
582592
int visited;
583593

594+
/* Information used for dynamic linking */
595+
bool is_used;
596+
int plt_offset;
597+
int got_offset;
598+
584599
struct func *next;
585600
};
586601

@@ -643,3 +658,26 @@ typedef struct {
643658
int sh_addralign;
644659
int sh_entsize;
645660
} elf32_shdr_t;
661+
662+
/* Structures for dynamic linked program */
663+
/* For .dynsym section. */
664+
typedef struct {
665+
int st_name;
666+
int st_value;
667+
int st_size;
668+
char st_info;
669+
char st_other;
670+
char st_shndx[2];
671+
} elf32_sym_t;
672+
673+
/* For .rel.plt section */
674+
typedef struct {
675+
int r_offset;
676+
int r_info;
677+
} elf32_rel_t;
678+
679+
/* For .dynamic section */
680+
typedef struct {
681+
int d_tag;
682+
int d_un;
683+
} elf32_dyn_t;

0 commit comments

Comments
 (0)