Skip to content

Commit 8312e7f

Browse files
committed
Support dynamic linking
1 parent c38ee81 commit 8312e7f

File tree

15 files changed

+782
-122
lines changed

15 files changed

+782
-122
lines changed

Makefile

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ STAGE0 := shecc
3636
STAGE1 := shecc-stage1.elf
3737
STAGE2 := shecc-stage2.elf
3838

39+
BUILTIN_LIBC ?= c.c
40+
STAGE0_FLAGS ?= --dump-ir
41+
STAGE1_FLAGS ?=
42+
ifeq ($(DYNLINK),1)
43+
BUILTIN_LIBC := c.h
44+
STAGE0_FLAGS += --dynlink
45+
STAGE1_FLAGS += --dynlink
46+
endif
47+
3948
OUT ?= out
4049
ARCHS = arm riscv
4150
ARCH ?= $(firstword $(ARCHS))
@@ -122,9 +131,9 @@ $(OUT)/norm-lf: tools/norm-lf.c
122131
$(VECHO) " CC+LD\t$@\n"
123132
$(Q)$(CC) $(CFLAGS) -o $@ $^
124133

125-
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/c.c
134+
$(OUT)/libc.inc: $(OUT)/inliner $(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC)
126135
$(VECHO) " GEN\t$@\n"
127-
$(Q)$(OUT)/norm-lf $(LIBDIR)/c.c $(OUT)/c.normalized.c
136+
$(Q)$(OUT)/norm-lf $(LIBDIR)/$(BUILTIN_LIBC) $(OUT)/c.normalized.c
128137
$(Q)$(OUT)/inliner $(OUT)/c.normalized.c $@
129138
$(Q)$(RM) $(OUT)/c.normalized.c
130139

@@ -143,12 +152,12 @@ $(OUT)/$(STAGE0)-sanitizer: $(OUT)/libc.inc $(OBJS)
143152
$(OUT)/$(STAGE1): $(OUT)/$(STAGE0)
144153
$(Q)$(STAGE1_CHECK_CMD)
145154
$(VECHO) " SHECC\t$@\n"
146-
$(Q)$(OUT)/$(STAGE0) --dump-ir -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
155+
$(Q)$(OUT)/$(STAGE0) $(STAGE0_FLAGS) -o $@ $(SRCDIR)/main.c > $(OUT)/shecc-stage1.log
147156
$(Q)chmod a+x $@
148157

149158
$(OUT)/$(STAGE2): $(OUT)/$(STAGE1)
150159
$(VECHO) " SHECC\t$@\n"
151-
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) -o $@ $(SRCDIR)/main.c
160+
$(Q)$(TARGET_EXEC) $(OUT)/$(STAGE1) $(STAGE1_FLAGS) -o $@ $(SRCDIR)/main.c
152161

153162
bootstrap: $(OUT)/$(STAGE2)
154163
$(Q)chmod 775 $(OUT)/$(STAGE2)

lib/c.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* shecc - Self-Hosting and Educational C Compiler.
3+
*
4+
* shecc is freely redistributable under the BSD 2 clause license. See the
5+
* file "LICENSE" for information on usage and redistribution of this file.
6+
*/
7+
8+
#pragma once
9+
/* Declarations of C standard library functions */
10+
11+
#define NULL 0
12+
13+
#define bool _Bool
14+
#define true 1
15+
#define false 0
16+
17+
/* File I/O */
18+
typedef int FILE;
19+
FILE *fopen(char *filename, char *mode);
20+
int fclose(FILE *stream);
21+
int fgetc(FILE *stream);
22+
char *fgets(char *str, int n, FILE *stream);
23+
int fputc(int c, FILE *stream);
24+
25+
/* string-related functions */
26+
int strlen(char *str);
27+
int strcmp(char *s1, char *s2);
28+
int strncmp(char *s1, char *s2, int len);
29+
char *strcpy(char *dest, char *src);
30+
char *strncpy(char *dest, char *src, int len);
31+
char *memcpy(char *dest, char *src, int count);
32+
void *memset(void *s, int c, int n);
33+
34+
/* formatted output string */
35+
int printf(char *str, ...);
36+
int sprintf(char *buffer, char *str, ...);
37+
int snprintf(char *buffer, int n, char *str, ...);
38+
39+
/* Terminating program */
40+
void exit(int exit_code);
41+
void abort(void);
42+
43+
/* Dynamic memory allocation/deallocation functions */
44+
void *malloc(int size);
45+
void *calloc(int n, int size);
46+
void free(void *ptr);

mk/arm.mk

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,10 @@ ARCH_DEFS = \
66
\#define ARCH_PREDEFINED \"__arm__\" /* defined by GNU C and RealView */\n$\
77
\#define ELF_MACHINE 0x28 /* up to ARMv7/Aarch32 */\n$\
88
\#define ELF_FLAGS 0x5000200\n$\
9+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
10+
\#define LIBC_SO \"libc.so.6\"\n$\
11+
\#define PLT_FIXUP_SIZE 20\n$\
12+
\#define PLT_ENT_SIZE 12\n$\
13+
\#define R_ARCH_JUMP_SLOT 0x16\n$\
914
"
15+
RUNNER_LD_PREFIX=-L /usr/arm-linux-gnueabi/

mk/common.mk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ ifneq ($(HOST_ARCH),$(ARCH_NAME))
3636

3737
# Generate the path to the architecture-specific qemu
3838
TARGET_EXEC = $(shell which $(ARCH_RUNNER))
39+
ifeq ($(DYNLINK),1)
40+
TARGET_EXEC += $(RUNNER_LD_PREFIX)
41+
endif
3942
endif
4043
export TARGET_EXEC
4144

mk/riscv.mk

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,12 @@ ARCH_DEFS = \
77
\#define ARCH_PREDEFINED \"__riscv\" /* Older versions of the GCC toolchain defined __riscv__ */\n$\
88
\#define ELF_MACHINE 0xf3\n$\
99
\#define ELF_FLAGS 0\n$\
10+
\#define DYN_LINKER \"/lib/ld-linux.so.3\"\n$\
11+
\#define LIBC_SO \"libc.so.6\"\n$\
12+
\#define PLT_FIXUP_SIZE 20\n$\
13+
\#define PLT_ENT_SIZE 12\n$\
14+
\#define R_ARCH_JUMP_SLOT 0x5\n$\
1015
"
16+
17+
# TODO: Set this variable for RISC-V architecture
18+
RUNNER_LD_PREFIX=

src/arm-codegen.c

Lines changed: 96 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,16 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
135135

136136
void cfg_flatten(void)
137137
{
138-
func_t *func = find_func("__syscall");
139-
func->bbs->elf_offset = 48; /* offset of start + branch + exit in codegen */
138+
func_t *func;
139+
140+
if (dynlink)
141+
elf_offset = 112; /* offset of start + branch + exit in codegen */
142+
else {
143+
func = find_func("__syscall");
144+
func->bbs->elf_offset = 48; /* offset of start + exit in codegen */
145+
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
146+
}
140147

141-
elf_offset = 84; /* offset of start + branch + exit + syscall in codegen */
142148
GLOBAL_FUNC->bbs->elf_offset = elf_offset;
143149

144150
for (ph2_ir_t *ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -147,9 +153,15 @@ void cfg_flatten(void)
147153
}
148154

149155
/* prepare 'argc' and 'argv', then proceed to 'main' function */
150-
elf_offset += 32; /* 6 insns for main call + 2 for exit */
156+
if (dynlink)
157+
elf_offset += 20;
158+
else
159+
elf_offset += 32; /* 6 insns for main call + 2 for exit */
151160

152161
for (func = FUNC_LIST.head; func; func = func->next) {
162+
if (!func->bbs)
163+
continue;
164+
153165
/* reserve stack */
154166
ph2_ir_t *flatten_ir = add_ph2_ir(OP_define);
155167
flatten_ir->src0 = func->stack_size;
@@ -282,15 +294,23 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
282294
return;
283295
case OP_call:
284296
func = find_func(ph2_ir->func_name);
285-
emit(__bl(__AL, func->bbs->elf_offset - elf_code->size));
297+
if (func->bbs)
298+
ofs = func->bbs->elf_offset - elf_code->size;
299+
else
300+
ofs = (elf_plt_start + func->plt_offset) -
301+
(elf_code_start + elf_code->size);
302+
emit(__bl(__AL, ofs));
286303
return;
287304
case OP_load_data_address:
288305
emit(__movw(__AL, rd, ph2_ir->src0 + elf_data_start));
289306
emit(__movt(__AL, rd, ph2_ir->src0 + elf_data_start));
290307
return;
291308
case OP_address_of_func:
292309
func = find_func(ph2_ir->func_name);
293-
ofs = elf_code_start + func->bbs->elf_offset;
310+
if (func->bbs)
311+
ofs = elf_code_start + func->bbs->elf_offset;
312+
else
313+
ofs = elf_plt_start + func->plt_offset;
294314
emit(__movw(__AL, __r8, ofs));
295315
emit(__movt(__AL, __r8, ofs));
296316
emit(__sw(__AL, __r8, rn, 0));
@@ -447,11 +467,40 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
447467
}
448468
}
449469

470+
void plt_generate(void);
450471
void code_generate(void)
451472
{
452-
elf_data_start = elf_code_start + elf_offset;
473+
if (dynlink) {
474+
plt_generate();
475+
/* Call __libc_start_main() */
476+
emit(__mov_i(__AL, __r11, 0));
477+
emit(__mov_i(__AL, __lr, 0));
478+
emit(__pop_word(__AL, __r1));
479+
emit(__mov_r(__AL, __r2, __sp));
480+
emit(__push_reg(__AL, __r2));
481+
emit(__push_reg(__AL, __r0));
482+
emit(__mov_i(__AL, __r12, 0));
483+
emit(__push_reg(__AL, __r12));
484+
emit(__movw(__AL, __r0, elf_code_start + 56));
485+
emit(__movt(__AL, __r0, elf_code_start + 56));
486+
emit(__mov_i(__AL, __r3, 0));
487+
emit(__bl(__AL, (elf_plt_start + PLT_FIXUP_SIZE) -
488+
(elf_code_start + elf_code->size)));
489+
/* Goto the 'exit' code snippet if __libc_start_main returns */
490+
emit(__mov_i(__AL, __r0, 127));
491+
emit(__bl(__AL, 28));
453492

454-
/* start */
493+
/* If the compiled program is dynamic linking, the starting
494+
* point of 'start' is located here.
495+
*
496+
* Preserve the 'argc' and 'argv' for the 'main' function.
497+
* */
498+
emit(__mov_r(__AL, __r9, __r0));
499+
emit(__mov_r(__AL, __r10, __r1));
500+
}
501+
/* If the compiled program is static linking, the starting point
502+
* of 'start' is here.
503+
* */
455504
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
456505
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
457506
emit(__sub_r(__AL, __sp, __sp, __r8));
@@ -468,16 +517,18 @@ void code_generate(void)
468517
emit(__mov_i(__AL, __r7, 1));
469518
emit(__svc());
470519

471-
/* syscall */
472-
emit(__mov_r(__AL, __r7, __r0));
473-
emit(__mov_r(__AL, __r0, __r1));
474-
emit(__mov_r(__AL, __r1, __r2));
475-
emit(__mov_r(__AL, __r2, __r3));
476-
emit(__mov_r(__AL, __r3, __r4));
477-
emit(__mov_r(__AL, __r4, __r5));
478-
emit(__mov_r(__AL, __r5, __r6));
479-
emit(__svc());
480-
emit(__mov_r(__AL, __pc, __lr));
520+
if (!dynlink) {
521+
/* syscall */
522+
emit(__mov_r(__AL, __r7, __r0));
523+
emit(__mov_r(__AL, __r0, __r1));
524+
emit(__mov_r(__AL, __r1, __r2));
525+
emit(__mov_r(__AL, __r2, __r3));
526+
emit(__mov_r(__AL, __r3, __r4));
527+
emit(__mov_r(__AL, __r4, __r5));
528+
emit(__mov_r(__AL, __r5, __r6));
529+
emit(__svc());
530+
emit(__mov_r(__AL, __pc, __lr));
531+
}
481532

482533
ph2_ir_t *ph2_ir;
483534
for (ph2_ir = GLOBAL_FUNC->bbs->ph2_ir_list.head; ph2_ir;
@@ -486,11 +537,16 @@ void code_generate(void)
486537

487538
/* prepare 'argc' and 'argv', then proceed to 'main' function */
488539
if (MAIN_BB) {
489-
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
490-
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
491-
emit(__add_r(__AL, __r8, __r12, __r8));
492-
emit(__lw(__AL, __r0, __r8, 0));
493-
emit(__add_i(__AL, __r1, __r8, 4));
540+
if (dynlink) {
541+
emit(__mov_r(__AL, __r0, __r9));
542+
emit(__mov_r(__AL, __r1, __r10));
543+
} else {
544+
emit(__movw(__AL, __r8, GLOBAL_FUNC->stack_size));
545+
emit(__movt(__AL, __r8, GLOBAL_FUNC->stack_size));
546+
emit(__add_r(__AL, __r8, __r12, __r8));
547+
emit(__lw(__AL, __r0, __r8, 0));
548+
emit(__add_i(__AL, __r1, __r8, 4));
549+
}
494550
emit(__bl(__AL, MAIN_BB->elf_offset - elf_code->size));
495551

496552
/* exit with main's return value - r0 already has the return value */
@@ -503,3 +559,20 @@ void code_generate(void)
503559
emit_ph2_ir(ph2_ir);
504560
}
505561
}
562+
563+
void plt_generate(void)
564+
{
565+
int addr_of_got = elf_got_start + PTR_SIZE * 2;
566+
int end = plt_sz - PLT_FIXUP_SIZE;
567+
elf_write_int(elf_plt, __push_reg(__AL, __lr));
568+
elf_write_int(elf_plt, __movw(__AL, __r10, addr_of_got));
569+
elf_write_int(elf_plt, __movt(__AL, __r10, addr_of_got));
570+
elf_write_int(elf_plt, __mov_r(__AL, __lr, __r10));
571+
elf_write_int(elf_plt, __lw(__AL, __pc, __lr, 0));
572+
for (int i = 0; i * PLT_ENT_SIZE < end; i++) {
573+
addr_of_got = elf_got_start + PTR_SIZE * (i + 3);
574+
elf_write_int(elf_plt, __movw(__AL, __r12, addr_of_got));
575+
elf_write_int(elf_plt, __movt(__AL, __r12, addr_of_got));
576+
elf_write_int(elf_plt, __lw(__AL, __pc, __r12, 0));
577+
}
578+
}

src/arm.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,16 @@ int __ldm(arm_cond_t cond, int w, arm_reg rn, int reg_list)
312312
return arm_encode(cond, arm_ldm + (0x2 << 6) + (w << 1), rn, 0, reg_list);
313313
}
314314

315+
int __push_reg(arm_cond_t cond, arm_reg rt)
316+
{
317+
return arm_encode(cond, (0x5 << 4) | 0x2, 0xd, rt, 0x4);
318+
}
319+
320+
int __pop_word(arm_cond_t cond, arm_reg rt)
321+
{
322+
return arm_encode(cond, (0x4 << 4) | 0x9, 0xd, rt, 0x4);
323+
}
324+
315325
int __b(arm_cond_t cond, int ofs)
316326
{
317327
int o = (ofs - 8) >> 2;

src/defs.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,18 @@
3131
#define MAX_SYMTAB 65536
3232
#define MAX_STRTAB 65536
3333
#define MAX_HEADER 1024
34+
#define MAX_PROGRAM_HEADER 1024
3435
#define MAX_SECTION 1024
3536
#define MAX_ALIASES 128
37+
#define MAX_SECTION_HEADER 1024
38+
#define MAX_SHSTR 1024
39+
#define MAX_INTERP 1024
40+
#define MAX_DYNAMIC 1024
41+
#define MAX_DYNSYM 1024
42+
#define MAX_DYNSTR 1024
43+
#define MAX_RELPLT 1024
44+
#define MAX_PLT 1024
45+
#define MAX_GOTPLT 1024
3646
#define MAX_CONSTANTS 1024
3747
#define MAX_CASES 128
3848
#define MAX_NESTING 128
@@ -578,6 +588,11 @@ struct func {
578588
int bb_cnt;
579589
int visited;
580590

591+
/* Information used for dynamic linking */
592+
bool is_used;
593+
int plt_offset;
594+
int got_offset;
595+
581596
struct func *next;
582597
};
583598

@@ -640,3 +655,26 @@ typedef struct {
640655
int sh_addralign;
641656
int sh_entsize;
642657
} elf32_shdr_t;
658+
659+
/* Structures for dynamic linked program */
660+
/* For .dynsym section. */
661+
typedef struct {
662+
int st_name;
663+
int st_value;
664+
int st_size;
665+
char st_info;
666+
char st_other;
667+
char st_shndx[2];
668+
} elf32_sym_t;
669+
670+
/* For .rel.plt section */
671+
typedef struct {
672+
int r_offset;
673+
int r_info;
674+
} elf32_rel_t;
675+
676+
/* For .dynamic section */
677+
typedef struct {
678+
int d_tag;
679+
int d_un;
680+
} elf32_dyn_t;

0 commit comments

Comments
 (0)