From d54b6d7990ed1f76ffebe732f4595d1497ccff9a Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau <16990250+laurenthuberdeau@users.noreply.github.com> Date: Sun, 19 Jan 2025 22:02:47 -0500 Subject: [PATCH 01/89] Add safe get_child functions (#133) * Add safe mode option to pnut Because the subset of C is relatively unsafe type-wise, the safe mode checks the get_child calls and errors when the parent and child nodes are not of the expected type. This will make it easier to catch errors when changes to the AST are made. * Factor get_child calls out of code walkers get_child was called in a bunch of places and we want to reduce the number of get_child calls as much as possible for next commits. * Add msg when bootstrap-pnut fails to compile pnut * Use get_child_ in pnut.c where possible * Use get_child_ in sh.c where possible * Use get_child_ in exe.c where possible pnut.c, sh.c and exe.c are the only files that used get_child. * Fix bug where file path in error msg was incorrect * Add safe mode to tests and bootstrap scripts * Add bound checks and safe get_val * Remove calls to unchecked get_val * Fix built macro test The built-in stubbed test assumes that INCLUDE_LINE_NUMBER_ON_ERROR was undefined so it needed some small modifications. * Add CI check for safe mode --- .github/workflows/main.yml | 31 +- bootstrap-pnut-exe.sh | 11 +- bootstrap-pnut-sh.sh | 20 +- exe.c | 485 +++++++++--------- pnut.c | 210 ++++++-- run-tests.sh | 24 + sh.c | 474 +++++++++-------- .../_all/preprocessor/macro/builtin-stubbed.c | 1 + tests/_all/preprocessor/macro/builtin.golden | 2 +- 9 files changed, 732 insertions(+), 526 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 36e0c034..4163e93b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -99,9 +99,6 @@ jobs: strategy: matrix: shell: ["bash", "dash", "ksh", "mksh", "yash", "zsh"] - include: - - shell: dash # Using dash because it's the fastest - pnut_opts: "'-DSH_SAVE_VARS_WITH_SET' '-DOPTIMIZE_CONSTANT_PARAM'" runs-on: ubuntu-latest steps: - name: Checkout code @@ -116,10 +113,11 @@ jobs: run: | set -e ./run-tests.sh sh --shell ${{ matrix.shell }} - for pnut_opts in ${{ matrix.pnut_opts }}; do - echo "Running tests with pnut options: $pnut_opts" - PNUT_OPTIONS="${pnut_opts}" ./run-tests.sh sh --shell ${{ matrix.shell }} - done + + - name: Run tests with ${{ matrix.shell }} (fast) + run: | + set -e + ./run-tests.sh sh --shell ${{ matrix.shell }} --fast bootstrap-pnut-sh: strategy: @@ -206,6 +204,25 @@ jobs: set -e ./bootstrap-pnut-sh-by-pnut-exe.sh --backend ${{ matrix.target }} + compile-in-safe-mode: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y coreutils time + + - name: Compile pnut-sh, pnut-exe and tests in safe mode + run: | + set -e + ./bootstrap-pnut-sh.sh --safe --compile-only + ./bootstrap-pnut-exe.sh --safe # No compile-only flag for pnut-exe since it's fast enough + ./run-tests.sh sh --safe --compile-only + ./run-tests.sh i386_linux --safe --compile-only + bootstrap-bash-2_05a: runs-on: ubuntu-latest steps: diff --git a/bootstrap-pnut-exe.sh b/bootstrap-pnut-exe.sh index 295eb176..ec1a2c91 100755 --- a/bootstrap-pnut-exe.sh +++ b/bootstrap-pnut-exe.sh @@ -27,7 +27,11 @@ bootstrap_with_gcc() { gcc -o $TEMP_DIR/pnut-x86-by-gcc.exe $PNUT_EXE_OPTIONS pnut.c # gcc -E -P -DPNUT_CC $PNUT_EXE_OPTIONS pnut.c > "$TEMP_DIR/pnut-after-cpp.c" - ./$TEMP_DIR/pnut-x86-by-gcc.exe $PNUT_EXE_OPTIONS pnut.c > $TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe + ./$TEMP_DIR/pnut-x86-by-gcc.exe $PNUT_EXE_OPTIONS pnut.c > $TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe || { + echo "Failed to compile pnut-x86-by-pnut-x86-by-gcc.exe" + tail -n 20 $TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe + exit 1 + } chmod +x $TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe @@ -121,12 +125,14 @@ bootstrap_with_shell() { # Parse the arguments backend="x86_64_linux" # Default to x86_64_linux shell= # Defined if doing the full bootstrap using pnut.sh on Posix shell. "all" to test with all shells (slow). +safe=0 # Whether to use safe mode when compiling pnut (adds checks at run time) while [ $# -gt 0 ]; do case $1 in --backend) backend="$2"; shift 2 ;; --shell) shell="$2"; shift 2 ;; --fast) PNUT_SH_OPTIONS="$PNUT_SH_OPTIONS_FAST"; shift 1 ;; + --safe) safe=1; shift 1 ;; *) echo "Unknown option: $1"; exit 1;; esac done @@ -141,6 +147,9 @@ case $backend in ;; esac +# Add safe mode if requested +if [ $safe -eq 1 ]; then PNUT_EXE_OPTIONS="$PNUT_EXE_OPTIONS -DSAFE_MODE"; fi + if [ -z "$shell" ]; then bootstrap_with_gcc else diff --git a/bootstrap-pnut-sh.sh b/bootstrap-pnut-sh.sh index 717de18a..60fa7b4e 100755 --- a/bootstrap-pnut-sh.sh +++ b/bootstrap-pnut-sh.sh @@ -21,20 +21,36 @@ bootstrap_with_shell() { # Parse the arguments shell="$SHELL" # Use current shell as the default. "all" to test all shells. +safe=0 +compile_only=0 while [ $# -gt 0 ]; do case $1 in - --shell) shell="$2"; shift 2 ;; + --shell) shell="$2"; shift 2 ;; --fast) PNUT_SH_OPTIONS="$PNUT_SH_OPTIONS_FAST"; shift 1 ;; + --safe) safe=1; shift 1 ;; + --compile-only) compile_only=1; shift 1 ;; *) echo "Unknown option: $1"; exit 1;; esac done if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi +if [ $safe -eq 1 ]; then PNUT_SH_OPTIONS="$PNUT_SH_OPTIONS -DSAFE_MODE"; fi + gcc -o "$TEMP_DIR/pnut.exe" $PNUT_SH_OPTIONS pnut.c -./$TEMP_DIR/pnut.exe $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-sh.sh" +./$TEMP_DIR/pnut.exe $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-sh.sh" || { + echo "Failed to compile pnut" + tail -n 20 "$TEMP_DIR/pnut-sh.sh" + exit 1 +} + +# Exit now if we only want to compile +if [ $compile_only -eq 1 ]; then + echo "Compiled pnut.sh successfully" + exit 0; +fi if [ "$shell" = "all" ]; then set +e # Don't exit on error because we want to test all shells. diff --git a/exe.c b/exe.c index 58a583a6..dd440c8d 100644 --- a/exe.c +++ b/exe.c @@ -425,7 +425,7 @@ int struct_union_size(ast struct_type); // A pointer type is either an array type or a type with at least one star bool is_pointer_type(ast type) { bool op = get_op(type); - bool stars = get_val(type); + bool stars = get_stars(type); return op == '[' || stars > 0; } @@ -436,7 +436,7 @@ bool is_struct_or_union_type(ast type) { // An aggregate type is either an array type or a struct/union type (that's not a reference) bool is_aggregate_type(ast type) { - if ((is_struct_or_union_type(type) && get_val(type) == 0) || get_op(type) == '[') { + if ((is_struct_or_union_type(type) && get_stars(type) == 0) || get_op(type) == '[') { return true; } else { return false; @@ -474,7 +474,7 @@ int type_width(ast type, int stars, bool array_value, bool word_align) { // sizeof, in struct definitions, etc.) while in other contexts we care // about the pointer (i.e. when passing an array to a function, etc.) if (array_value) { - return round_up_to_word_size(get_val(get_child(type, 0)) * type_width_ast(get_child(type, 1), true, false)); + return round_up_to_word_size(get_val_(INTEGER, get_child_('[', type, 0)) * type_width_ast(get_child_('[', type, 1), true, false)); } else { return word_size; // Array is a pointer to the first element } @@ -498,7 +498,7 @@ int type_width(ast type, int stars, bool array_value, bool word_align) { } int type_width_ast(ast type, bool array_value, bool word_align) { - return type_width(type, get_val(type), array_value, word_align); + return type_width(type, get_stars(type), array_value, word_align); } // Structs, enums and unions types come in 2 variants: @@ -511,29 +511,29 @@ ast canonicalize_type(ast type) { ast res = type; int binding; - if (get_op(type) == STRUCT_KW && get_child(type, 2) == 0) { // struct with empty def => reference - binding = cgc_lookup_struct(get_val(get_child(type, 1)), cgc_globals); + if (get_op(type) == STRUCT_KW && get_child_opt_(STRUCT_KW, ',', type, 2) == 0) { // struct with empty def => reference + binding = cgc_lookup_struct(get_val_(IDENTIFIER, get_child__(STRUCT_KW, IDENTIFIER, type, 1)), cgc_globals); if (binding == 0) fatal_error("canonicalize_type: struct type not defined"); res = heap[binding+3]; - if (get_val(type) != 0) { // Copy stars + if (get_stars(type) != 0) { // Copy stars res = clone_ast(res); - set_child(res, 0, get_child(type, 0)); + set_child(res, 0, get_child_(STRUCT_KW, type, 0)); } - } else if (get_op(type) == UNION_KW && get_child(type, 2) == 0) { // union with empty def => reference - binding = cgc_lookup_union(get_val(get_child(type, 1)), cgc_globals); + } else if (get_op(type) == UNION_KW && get_child_opt_(UNION_KW, ',', type, 2) == 0) { // union with empty def => reference + binding = cgc_lookup_union(get_val_(IDENTIFIER, get_child__(UNION_KW, IDENTIFIER, type, 1)), cgc_globals); if (binding == 0) fatal_error("canonicalize_type: union type not defined"); res = heap[binding+3]; - if (get_val(type) != 0) { // Copy stars + if (get_stars(type) != 0) { // Copy stars res = clone_ast(res); - set_child(res, 0, get_child(type, 0)); + set_child(res, 0, get_child_(UNION_KW, type, 0)); } - } else if (get_op(type) == ENUM_KW && get_child(type, 1) == 0) { // enum with empty def => reference - binding = cgc_lookup_enum(get_val(get_child(type, 0)), cgc_globals); + } else if (get_op(type) == ENUM_KW && get_child_opt_(ENUM_KW, ',', type, 1) == 0) { // enum with empty def => reference + binding = cgc_lookup_enum(get_val_(IDENTIFIER, get_child__(ENUM_KW, IDENTIFIER, type, 0)), cgc_globals); if (binding == 0) fatal_error("canonicalize_type: enum type not defined"); res = heap[binding+3]; - if (get_val(type) != 0) { // Copy stars + if (get_stars(type) != 0) { // Copy stars res = clone_ast(res); - set_child(res, 0, get_child(type, 0)); + set_child(res, 0, get_child_(ENUM_KW, type, 0)); } } @@ -553,16 +553,16 @@ int struct_union_size(ast type) { switch (get_op(type)) { case STRUCT_KW: while (get_op(members) == ',') { - member_type = get_child(members, 1); - members = get_child(members, 2); + member_type = get_child_(',', members, 1); + members = get_child_opt_(',', ',', members, 2); member_size = type_width_ast(member_type, true, true); size += member_size; } break; case UNION_KW: while (get_op(members) == ',') { - member_type = get_child(members, 1); - members = get_child(members, 2); + member_type = get_child_(',', members, 1); + members = get_child_opt_(',', ',', members, 2); member_size = type_width_ast(member_type, true, true); // Union size is the max of its members if (member_size > size) size = member_size; @@ -583,20 +583,20 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { ast ident; while (get_op(members) == ',') { - ident = get_val(get_child(members, 0)); + ident = get_child_opt_(',', IDENTIFIER, members, 0); if (ident == 0) { // Anonymous struct member, search that struct - sub_offset = struct_member_offset_go(get_child(members, 1), member_ident); + sub_offset = struct_member_offset_go(get_child_(',', members, 1), member_ident); if (sub_offset != -1) return offset + sub_offset; - } else if (get_val(member_ident) == get_val(get_child(members, 0))) { + } else if (get_val_(IDENTIFIER, member_ident) == get_val_(IDENTIFIER, ident)) { return offset; } if (get_op(struct_type) == STRUCT_KW) { // For unions, fields are always at offset 0. We must still iterate // because the field may be in an anonymous struct. - offset += round_up_to_word_size(type_width_ast(get_child(members, 1), true, true)); + offset += round_up_to_word_size(type_width_ast(get_child_(',', members, 1), true, true)); } - members = get_child(members, 2); + members = get_child_opt_(',', ',', members, 2); } return -1; @@ -614,15 +614,14 @@ ast struct_member_go(ast struct_type, ast member_ident) { ast ident; while (members != 0) { - ident = get_val(get_child(members, 0)); + ident = get_child_opt_(',', IDENTIFIER, members, 0); if (ident == 0) { // Anonymous struct member, search that struct - ident = struct_member_go(get_child(members, 1), member_ident); + ident = struct_member_go(get_child_(',', members, 1), member_ident); if (ident != 0) return ident; // Found member in the anonymous struct - } else if (get_val(member_ident) == ident) { + } else if (get_val_(IDENTIFIER, member_ident) == get_val_(IDENTIFIER, ident)) { return members; } - - members = get_child(members, 2); + members = get_child_opt_(',', ',', members, 2); } return -1; @@ -637,8 +636,8 @@ ast struct_member(ast struct_type, ast member_ident) { // Width of an object pointed to by a reference type. int ref_type_width(ast type) { if (get_op(type) == '[') { - return type_width_ast(get_child(type, 1), false, false); // size of inner type - } else if (get_val(type) == 1) { // pointer * + return type_width_ast(get_child_('[', type, 1), false, false); // size of inner type + } else if (get_stars(type) == 1) { // pointer * return type_width(type, 0, false, false); // size of inner type } else { return word_size; @@ -657,9 +656,11 @@ ast value_type(ast node) { int nb_children = get_nb_children(node); int binding; int ident; + ast left_type, right_type; + ast child0, child1; - ast left_type; - ast right_type; + if (nb_children >= 1) child0 = get_child(node, 0); + if (nb_children >= 2) child1 = get_child(node, 1); if (nb_children == 0) { if (op == INTEGER) { @@ -669,7 +670,7 @@ ast value_type(ast node) { } else if (op == STRING) { return string_type; } else if (op == IDENTIFIER) { - ident = get_val(node); + ident = get_val_(IDENTIFIER, node); binding = cgc_lookup_var(ident, cgc_locals); if (binding != 0) { return heap[binding+5]; @@ -683,7 +684,7 @@ ast value_type(ast node) { return int_type; // Enums are always integers } else { putstr("ident = "); - putstr(string_pool+get_val(ident)); + putstr(string_pool + probe_string(ident)); putchar('\n'); fatal_error("value_type: identifier not found"); return -1; @@ -699,12 +700,12 @@ ast value_type(ast node) { } else if (nb_children == 1) { if (op == '*') { - left_type = value_type(get_child(node, 0)); + left_type = value_type(child0); if (get_op(left_type) == '[') { // Array type - return get_child(left_type, 1); - } else if (get_val(left_type) != 0) { // Pointer type + return get_child_('[', left_type, 1); + } else if (get_stars(left_type) != 0) { // Pointer type left_type = clone_ast(left_type); - set_val(left_type, get_val(left_type) - 1); // one less indirection + set_stars(left_type, get_stars(left_type) - 1); // one less indirection return left_type; } else { putstr("left_type="); putint(left_type); putchar('\n'); @@ -712,18 +713,18 @@ ast value_type(ast node) { return -1; } } else if (op == '&') { - left_type = value_type(get_child(node, 0)); + left_type = value_type(child0); if (get_op(left_type) == '[') { - left_type = clone_ast(get_child(left_type, 1)); // Inner type - set_val(left_type, get_val(left_type) + 1); // Increment star by 2, to account for the [ we just removed + left_type = clone_ast(get_child_('[', left_type, 1)); // Inner type + set_stars(left_type, get_stars(left_type) + 1); // Increment star by 2, to account for the [ we just removed } else { left_type = clone_ast(left_type); - set_val(left_type, get_val(left_type) + 1); // Increment star by 1 + set_stars(left_type, get_stars(left_type) + 1); // Increment star by 1 } return left_type; } else if (op == '+' || op == '-' || op == '~' || op == '!' || op == MINUS_MINUS || op == PLUS_PLUS || op == MINUS_MINUS_POST || op == PLUS_PLUS_POST || op == PLUS_PLUS_PRE || op == MINUS_MINUS_PRE || op == PARENS) { // Unary operation don't change the type - return value_type(get_child(node, 0)); + return value_type(child0); } else if (op == SIZEOF_KW) { return int_type; // sizeof always returns an integer } else { @@ -736,8 +737,8 @@ ast value_type(ast node) { if (op == '+' || op == '-' || op == '*' || op == '/' || op == '%' || op == '&' || op == '|' || op == '^' || op == LSHIFT || op == RSHIFT || op == '<' || op == '>' || op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ) { - left_type = value_type(get_child(node, 0)); - right_type = value_type(get_child(node, 1)); + left_type = value_type(child0); + right_type = value_type(child1); if (is_pointer_type(left_type) && is_pointer_type(right_type) && op == '-') { return int_type; // Pointer - Pointer = Integer } else if (is_pointer_type(left_type)) { @@ -748,22 +749,22 @@ ast value_type(ast node) { return right_type; } } else if (op == ',') { - return value_type(get_child(node, 1)); // The type of the right operand + return value_type(child1); // The type of the right operand } else if (op == '[') { - left_type = value_type(get_child(node, 0)); - right_type = value_type(get_child(node, 1)); + left_type = value_type(child0); + right_type = value_type(child1); if (get_op(left_type) == '[') { // Array - return get_child(left_type, 1); // array inner type - } else if (get_val(left_type) != 0) { // Pointer + return get_child_('[', left_type, 1); // array inner type + } else if (get_stars(left_type) != 0) { // Pointer left_type = clone_ast(left_type); - set_val(left_type, get_val(left_type) - 1); // one less indirection + set_stars(left_type, get_stars(left_type) - 1); // one less indirection return left_type; } else if (get_op(right_type) == '[') { // Array, but with the operands flipped (i.e. 0[arr] instead of arr[0]) - return get_child(right_type, 1); // array inner type - } else if (get_val(right_type) != 0) { + return get_child_('[', right_type, 1); // array inner type + } else if (get_stars(right_type) != 0) { right_type = clone_ast(right_type); - set_val(right_type, get_val(right_type) - 1); // one less indirection + set_stars(right_type, get_stars(right_type) - 1); // one less indirection return right_type; } else { putstr("left_type="); putint(left_type); putchar('\n'); @@ -771,40 +772,40 @@ ast value_type(ast node) { return -1; } } else if (op == '=' || op == AMP_EQ || op == BAR_EQ || op == CARET_EQ || op == LSHIFT_EQ || op == MINUS_EQ || op == PERCENT_EQ || op == PLUS_EQ || op == RSHIFT_EQ || op == SLASH_EQ || op == STAR_EQ) { - return value_type(get_child(node, 0)); // Only the left side is relevant here + return value_type(child0); // Only the left side is relevant here } else if (op == AMP_AMP || op == BAR_BAR) { // TODO: Check that the operands have compatible types? - return value_type(get_child(node, 0)); + return value_type(child0); } else if (op == '(') { - binding = cgc_lookup_fun(get_val(get_child(node, 0)), cgc_globals); + binding = cgc_lookup_fun(get_val_(IDENTIFIER, child0), cgc_globals); if (binding != 0) { return heap[binding+5]; } else { putstr("ident = "); - putstr(string_pool + get_val(get_val(get_child(node, 0)))); + putstr(string_pool + probe_string(get_val_(IDENTIFIER, child0))); putchar('\n'); fatal_error("value_type: function not found"); return -1; } } else if (op == '.') { - left_type = value_type(get_child(node, 0)); - if (is_struct_or_union_type(left_type) && get_val(left_type) == 0) { - return get_child(struct_member(left_type, get_child(node, 1)), 1); // child 1 of member is the type + left_type = value_type(child0); + if (is_struct_or_union_type(left_type) && get_stars(left_type) == 0) { + return get_child_(',', struct_member(left_type, child1), 1); // child 1 of member is the type } else { fatal_error("value_type: . operator on non-struct pointer type"); return -1; } } else if (op == ARROW) { // Same as '.', but left_type must be a pointer - left_type = value_type(get_child(node, 0)); - if (is_struct_or_union_type(left_type) && get_val(left_type) == 1) { - return get_child(struct_member(left_type, get_child(node, 1)), 1); // child 1 of member is the type + left_type = value_type(child0); + if (is_struct_or_union_type(left_type) && get_stars(left_type) == 1) { + return get_child_(',', struct_member(left_type, child1), 1); // child 1 of member is the type } else { fatal_error("value_type: -> operator on non-struct pointer type"); return -1; } } else if (op == CAST) { - return get_child(node, 0); + return child0; } else { fatal_error("value_type: unknown expression with 2 children"); return -1; @@ -814,7 +815,7 @@ ast value_type(ast node) { if (op == '?') { // We assume that the 2 cases have the same type. - return value_type(get_child(node, 1)); + return value_type(child1); } else { putstr("op="); putint(op); putchar('\n'); fatal_error("value_type: unknown expression with 3 children"); @@ -932,7 +933,7 @@ int codegen_param(ast param) { int type = value_type(param); int left_width; - if (is_struct_or_union_type(type) && get_val(type) == 0) { + if (is_struct_or_union_type(type) && get_stars(type) == 0) { left_width = codegen_lvalue(param); pop_reg(reg_X); grow_fs(-1); @@ -950,10 +951,14 @@ int codegen_params(ast params) { int fs = 0; + // Function params are comma expressions that aren't exactly like comma lists. + // Comma lists end with a new_ast2(',', last, 0) node, while function params + // end with a new_ast2(',', second_last, last) if there are more than one param + // and are just the last param if there is only one. if (params != 0) { if (get_op(params) == ',') { - fs = codegen_params(get_child(params, 1)); - fs += codegen_param(get_child(params, 0)); + fs = codegen_params(get_child_(',', params, 1)); + fs += codegen_param(get_child_(',', params, 0)); } else { fs = codegen_param(params); } @@ -963,18 +968,17 @@ int codegen_params(ast params) { } void codegen_call(ast node) { - - ast fun = get_child(node, 0); - ast name = get_val(fun); + ast fun_ident = get_child__('(', IDENTIFIER, node, 0); + ast ident_probe = get_val_(IDENTIFIER, fun_ident); ast params = get_child(node, 1); ast nb_params = codegen_params(params); - int binding = cgc_lookup_fun(name, cgc_globals); + int binding = cgc_lookup_fun(ident_probe, cgc_globals); int lbl; if (binding == 0) { lbl = alloc_label(); - cgc_add_global_fun(name, lbl, 0); + cgc_add_global_fun(ident_probe, lbl, 0); binding = cgc_globals; } @@ -987,8 +991,7 @@ void codegen_call(ast node) { } void codegen_goto(ast node) { - - ast label_ident = get_val(node); + ast label_ident = get_val_(GOTO_KW, node); int binding = cgc_lookup_goto_label(label_ident, cgc_locals_fun); int goto_lbl; @@ -1004,22 +1007,25 @@ void codegen_goto(ast node) { // Return the width of the lvalue int codegen_lvalue(ast node) { - int op = get_op(node); int nb_children = get_nb_children(node); int binding; int lvalue_width = 0; ast type; + ast child0, child1; + + if (nb_children >= 1) child0 = get_child(node, 0); + if (nb_children >= 2) child1 = get_child(node, 1); if (nb_children == 0) { if (op == IDENTIFIER) { - binding = cgc_lookup_var(get_val(node), cgc_locals); + binding = cgc_lookup_var(get_val_(IDENTIFIER, node), cgc_locals); if (binding != 0) { mov_reg_imm(reg_X, (cgc_fs - heap[binding+4]) * word_size); add_reg_reg(reg_X, reg_SP); push_reg(reg_X); } else { - binding = cgc_lookup_var(get_val(node), cgc_globals); + binding = cgc_lookup_var(get_val_(IDENTIFIER, node), cgc_globals); if (binding != 0) { mov_reg_imm(reg_X, heap[binding+4]); add_reg_reg(reg_X, reg_glo); @@ -1037,11 +1043,11 @@ int codegen_lvalue(ast node) { } else if (nb_children == 1) { if (op == '*') { - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); grow_fs(-1); - lvalue_width = ref_type_width(value_type(get_child(node, 0))); + lvalue_width = ref_type_width(value_type(child0)); } else if (op == PARENS) { - lvalue_width = codegen_lvalue(get_child(node, 0)); + lvalue_width = codegen_lvalue(child0); grow_fs(-1); } else { putstr("op="); putint(op); putchar('\n'); @@ -1051,46 +1057,46 @@ int codegen_lvalue(ast node) { } else if (nb_children == 2) { if (op == '[') { - type = value_type(get_child(node, 0)); - codegen_rvalue(get_child(node, 0)); - codegen_rvalue(get_child(node, 1)); - codegen_binop('+', get_child(node, 0), get_child(node, 1)); + type = value_type(child0); + codegen_rvalue(child0); + codegen_rvalue(child1); + codegen_binop('+', child0, child1); grow_fs(-2); lvalue_width = ref_type_width(type); } else if (op == '.') { - type = value_type(get_child(node, 0)); - if (is_struct_or_union_type(type) && get_val(type) == 0) { - codegen_lvalue(get_child(node, 0)); + type = value_type(child0); + if (is_struct_or_union_type(type) && get_stars(type) == 0) { + codegen_lvalue(child0); pop_reg(reg_X); // union members are at the same offset: 0 if (get_op(type) == STRUCT_KW) { - add_reg_imm(reg_X, struct_member_offset(type, get_child(node, 1))); + add_reg_imm(reg_X, struct_member_offset(type, child1)); } push_reg(reg_X); grow_fs(-1); - lvalue_width = type_width_ast(get_child(struct_member(type, get_child(node, 1)), 1), true, true); // child 1 of member is the type + lvalue_width = type_width_ast(get_child_(',', struct_member(type, child1), 1), true, true); // child 1 of member is the type } else { fatal_error("codegen_lvalue: . operator on non-struct type"); } } else if (op == ARROW) { // Same as '.', but type must be a pointer - type = value_type(get_child(node, 0)); - if (is_struct_or_union_type(type) && get_val(type) == 1) { - codegen_rvalue(get_child(node, 0)); + type = value_type(child0); + if (is_struct_or_union_type(type) && get_stars(type) == 1) { + codegen_rvalue(child0); pop_reg(reg_X); // union members are at the same offset: 0 if (get_op(type) == STRUCT_KW) { - add_reg_imm(reg_X, struct_member_offset(type, get_child(node, 1))); + add_reg_imm(reg_X, struct_member_offset(type, child1)); } push_reg(reg_X); grow_fs(-1); - lvalue_width = type_width_ast(get_child(struct_member(type, get_child(node, 1)), 1), true, true); // child 1 of member is the type + lvalue_width = type_width_ast(get_child_(',', struct_member(type, child1), 1), true, true); // child 1 of member is the type } else { fatal_error("codegen_lvalue: -> operator on non-struct pointer type"); } } else if (op == CAST) { - codegen_lvalue(get_child(node, 1)); - lvalue_width = type_width_ast(get_child(node, 0), true, true); + codegen_lvalue(child1); + lvalue_width = type_width_ast(child0, true, true); grow_fs(-1); // grow_fs is called at the end of the function, so we need to decrement it here } else { fatal_error("codegen_lvalue: unknown lvalue with 2 children"); @@ -1110,7 +1116,6 @@ int codegen_lvalue(ast node) { } void codegen_string(int string_probe) { - int lbl = alloc_label(); char *string_start = string_pool + heap[string_probe + 1]; char *string_end = string_start + heap[string_probe + 4]; @@ -1141,21 +1146,23 @@ void codegen_rvalue(ast node) { int nb_children = get_nb_children(node); int binding; int ident; - int lbl1; - int lbl2; + int lbl1, lbl2; int left_width; - ast type1; - ast type2; + ast type1, type2; + ast child0, child1; + + if (nb_children >= 1) child0 = get_child(node, 0); + if (nb_children >= 2) child1 = get_child(node, 1); if (nb_children == 0) { if (op == INTEGER) { - mov_reg_imm(reg_X, -get_val(node)); + mov_reg_imm(reg_X, -get_val_(INTEGER, node)); push_reg(reg_X); } else if (op == CHARACTER) { - mov_reg_imm(reg_X, get_val(node)); + mov_reg_imm(reg_X, get_val_(CHARACTER, node)); push_reg(reg_X); } else if (op == IDENTIFIER) { - ident = get_val(node); + ident = get_val_(IDENTIFIER, node); binding = cgc_lookup_var(ident, cgc_locals); if (binding != 0) { mov_reg_imm(reg_X, (cgc_fs - heap[binding+4]) * word_size); @@ -1163,8 +1170,8 @@ void codegen_rvalue(ast node) { // local arrays are allocated on the stack, so no need to dereference // same thing for non-pointer structs and unions. if (get_op(heap[binding+5]) != '[' - && (get_op(heap[binding+5]) != STRUCT_KW || get_val(heap[binding+5]) != 0) - && (get_op(heap[binding+5]) != UNION_KW || get_val(heap[binding+5]) != 0)) { + && (get_op(heap[binding+5]) != STRUCT_KW || get_stars(heap[binding+5]) != 0) + && (get_op(heap[binding+5]) != UNION_KW || get_stars(heap[binding+5]) != 0)) { mov_reg_mem(reg_X, reg_X, 0); } push_reg(reg_X); @@ -1176,24 +1183,24 @@ void codegen_rvalue(ast node) { // global arrays are allocated on the stack, so no need to dereference // same thing for non-pointer structs and unions. if (get_op(heap[binding+5]) != '[' - && (get_op(heap[binding+5]) != STRUCT_KW || get_val(heap[binding+5]) != 0) - && (get_op(heap[binding+5]) != UNION_KW || get_val(heap[binding+5]) != 0)) { + && (get_op(heap[binding+5]) != STRUCT_KW || get_stars(heap[binding+5]) != 0) + && (get_op(heap[binding+5]) != UNION_KW || get_stars(heap[binding+5]) != 0)) { mov_reg_mem(reg_X, reg_X, 0); } push_reg(reg_X); } else { binding = cgc_lookup_enum_value(ident, cgc_globals); if (binding != 0) { - mov_reg_imm(reg_X, -get_val(heap[binding+3])); + mov_reg_imm(reg_X, -get_val_(INTEGER, heap[binding+3])); push_reg(reg_X); } else { - putstr("ident = "); putstr(string_pool+get_val(ident)); putchar('\n'); + putstr("ident = "); putstr(string_pool + probe_string(ident)); putchar('\n'); fatal_error("codegen_rvalue: identifier not found"); } } } } else if (op == STRING) { - codegen_string(get_val(node)); + codegen_string(get_val_(STRING, node)); } else { putstr("op="); putint(op); putchar('\n'); fatal_error("codegen_rvalue: unknown rvalue with nb_children == 0"); @@ -1201,27 +1208,27 @@ void codegen_rvalue(ast node) { } else if (nb_children == 1) { if (op == '*') { - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); pop_reg(reg_Y); grow_fs(-1); - if (is_pointer_type(value_type(get_child(node, 0)))) { - load_mem_location(reg_X, reg_Y, 0, ref_type_width(value_type(get_child(node, 0)))); + if (is_pointer_type(value_type(child0))) { + load_mem_location(reg_X, reg_Y, 0, ref_type_width(value_type(child0))); } else { fatal_error("codegen_rvalue: non-pointer is being dereferenced with *"); } push_reg(reg_X); } else if (op == '+' || op == PARENS) { - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); grow_fs(-1); } else if (op == '-') { - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); pop_reg(reg_Y); grow_fs(-1); xor_reg_reg(reg_X, reg_X); sub_reg_reg(reg_X, reg_Y); push_reg(reg_X); } else if (op == '~') { - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); pop_reg(reg_Y); grow_fs(-1); mov_reg_imm(reg_X, -1); @@ -1231,11 +1238,11 @@ void codegen_rvalue(ast node) { xor_reg_reg(reg_X, reg_X); push_reg(reg_X); grow_fs(1); - codegen_rvalue(get_child(node, 0)); - codegen_binop(EQ_EQ, new_ast0(INTEGER, 0), get_child(node, 0)); + codegen_rvalue(child0); + codegen_binop(EQ_EQ, new_ast0(INTEGER, 0), child0); grow_fs(-2); } else if (op == MINUS_MINUS_POST || op == PLUS_PLUS_POST){ - codegen_lvalue(get_child(node, 0)); + codegen_lvalue(child0); pop_reg(reg_Y); mov_reg_mem(reg_X, reg_Y, 0); push_reg(reg_X); // saves the original value of lvalue @@ -1243,13 +1250,13 @@ void codegen_rvalue(ast node) { push_reg(reg_X); // saves the value of lvalue to be modified mov_reg_imm(reg_X, 1); // Equivalent to calling codegen rvalue with INTEGER 1 (subtraction or addition handled in codegen_binop) push_reg(reg_X); - codegen_binop(op, get_child(node, 0), new_ast0(INTEGER, 0)); // Pops two values off the stack and pushes the result + codegen_binop(op, child0, new_ast0(INTEGER, 0)); // Pops two values off the stack and pushes the result pop_reg(reg_X); // result pop_reg(reg_Y); // address grow_fs(-1); mov_mem_reg(reg_Y, 0, reg_X); // Store the result in the address } else if (op == MINUS_MINUS_PRE || op == PLUS_PLUS_PRE) { - codegen_lvalue(get_child(node, 0)); + codegen_lvalue(child0); pop_reg(reg_Y); push_reg(reg_Y); mov_reg_mem(reg_X, reg_Y, 0); @@ -1258,20 +1265,20 @@ void codegen_rvalue(ast node) { mov_reg_imm(reg_X, 1); // equivalent to calling codegen rvalue with INTEGER 1 (subtraction or addition handled in codegen_binop) push_reg(reg_X); grow_fs(1); - codegen_binop(op, get_child(node, 0), new_ast0(INTEGER, 0)); // Pops two values off the stack and pushes the result + codegen_binop(op, child0, new_ast0(INTEGER, 0)); // Pops two values off the stack and pushes the result pop_reg(reg_X); // result pop_reg(reg_Y); // address grow_fs(-3); mov_mem_reg(reg_Y, 0, reg_X); //store the result in the address push_reg(reg_X); } else if (op == '&') { - codegen_lvalue(get_child(node, 0)); + codegen_lvalue(child0); grow_fs(-1); } else if (op == SIZEOF_KW) { - if (is_type(get_child(node, 0))) { - mov_reg_imm(reg_X, type_width_ast(get_child(node, 0), true, false)); + if (is_type(child0)) { + mov_reg_imm(reg_X, type_width_ast(child0, true, false)); } else { - mov_reg_imm(reg_X, type_width_ast(value_type(get_child(node, 0)), true, false)); + mov_reg_imm(reg_X, type_width_ast(value_type(child0), true, false)); } push_reg(reg_X); } else { @@ -1281,22 +1288,22 @@ void codegen_rvalue(ast node) { } else if (nb_children == 2) { if (op == '+' || op == '-' || op == '*' || op == '/' || op == '%' || op == '&' || op == '|' || op == '^' || op == LSHIFT || op == RSHIFT || op == '<' || op == '>' || op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ || op == '[' || op == ',') { - codegen_rvalue(get_child(node, 0)); - codegen_rvalue(get_child(node, 1)); - codegen_binop(op, get_child(node, 0), get_child(node, 1)); + codegen_rvalue(child0); + codegen_rvalue(child1); + codegen_binop(op, child0, child1); grow_fs(-2); } else if (op == '=') { - type1 = value_type(get_child(node, 0)); - left_width = codegen_lvalue(get_child(node, 0)); - if (is_struct_or_union_type(type1) && get_val(type1) == 0) { + type1 = value_type(child0); + left_width = codegen_lvalue(child0); + if (is_struct_or_union_type(type1) && get_stars(type1) == 0) { // Struct assignment, we copy the struct. - codegen_lvalue(get_child(node, 1)); + codegen_lvalue(child1); pop_reg(reg_X); pop_reg(reg_Y); grow_fs(-2); copy_obj(reg_Y, 0, reg_X, 0, left_width); } else { - codegen_rvalue(get_child(node, 1)); + codegen_rvalue(child1); pop_reg(reg_X); pop_reg(reg_Y); grow_fs(-2); @@ -1304,14 +1311,14 @@ void codegen_rvalue(ast node) { } push_reg(reg_X); } else if (op == AMP_EQ || op == BAR_EQ || op == CARET_EQ || op == LSHIFT_EQ || op == MINUS_EQ || op == PERCENT_EQ || op == PLUS_EQ || op == RSHIFT_EQ || op == SLASH_EQ || op == STAR_EQ) { - left_width = codegen_lvalue(get_child(node, 0)); + left_width = codegen_lvalue(child0); pop_reg(reg_Y); push_reg(reg_Y); load_mem_location(reg_X, reg_Y, 0, left_width); push_reg(reg_X); grow_fs(1); - codegen_rvalue(get_child(node, 1)); - codegen_binop(op, get_child(node, 0), get_child(node, 1)); + codegen_rvalue(child1); + codegen_binop(op, child0, child1); pop_reg(reg_X); pop_reg(reg_Y); grow_fs(-3); @@ -1319,7 +1326,7 @@ void codegen_rvalue(ast node) { push_reg(reg_X); } else if (op == AMP_AMP || op == BAR_BAR) { lbl1 = alloc_label(); - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); pop_reg(reg_X); push_reg(reg_X); xor_reg_reg(reg_Y, reg_Y); @@ -1329,21 +1336,21 @@ void codegen_rvalue(ast node) { jump_cond_reg_reg(NE, lbl1, reg_X, reg_Y); } pop_reg(reg_X); grow_fs(-1); - codegen_rvalue(get_child(node, 1)); + codegen_rvalue(child1); grow_fs(-1); def_label(lbl1); } else if (op == '(') { codegen_call(node); } else if (op == '.') { - type1 = value_type(get_child(node, 0)); - if (is_struct_or_union_type(type1) && get_val(type1) == 0) { - type2 = get_child(struct_member(type1, get_child(node, 1)), 1); - codegen_lvalue(get_child(node, 0)); + type1 = value_type(child0); + if (is_struct_or_union_type(type1) && get_stars(type1) == 0) { + type2 = get_child_(',', struct_member(type1, child1), 1); + codegen_lvalue(child0); pop_reg(reg_Y); grow_fs(-1); // union members are at the same offset: 0 if (get_op(type1) == STRUCT_KW) { - add_reg_imm(reg_Y, struct_member_offset(type1, get_child(node, 1))); + add_reg_imm(reg_Y, struct_member_offset(type1, child1)); } if (!is_aggregate_type(type2)) { load_mem_location(reg_Y, reg_Y, 0, type_width_ast(type2, false, false)); @@ -1353,15 +1360,15 @@ void codegen_rvalue(ast node) { fatal_error("codegen_rvalue: . operator on non-struct type"); } } else if (op == ARROW) { - type1 = value_type(get_child(node, 0)); - if (is_struct_or_union_type(type1) && get_val(type1) == 1) { - type2 = get_child(struct_member(type1, get_child(node, 1)), 1); - codegen_rvalue(get_child(node, 0)); + type1 = value_type(child0); + if (is_struct_or_union_type(type1) && get_stars(type1) == 1) { + type2 = get_child_(',', struct_member(type1, child1), 1); + codegen_rvalue(child0); pop_reg(reg_Y); grow_fs(-1); // union members are at the same offset: 0 if (get_op(type1) == STRUCT_KW) { - add_reg_imm(reg_Y, struct_member_offset(type1, get_child(node, 1))); + add_reg_imm(reg_Y, struct_member_offset(type1, child1)); } if (!is_aggregate_type(type2)) { load_mem_location(reg_Y, reg_Y, 0, word_size); @@ -1371,7 +1378,7 @@ void codegen_rvalue(ast node) { fatal_error("codegen_rvalue: -> operator on non-struct pointer type"); } } else if (op == CAST) { - codegen_rvalue(get_child(node, 1)); + codegen_rvalue(child1); grow_fs(-1); // grow_fs(1) is called by codegen_rvalue and at the end of the function } else { fatal_error("codegen_rvalue: unknown rvalue with 2 children"); @@ -1382,12 +1389,12 @@ void codegen_rvalue(ast node) { if (op == '?') { lbl1 = alloc_label(); // false label lbl2 = alloc_label(); // end label - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(child0); pop_reg(reg_X); grow_fs(-1); xor_reg_reg(reg_Y, reg_Y); jump_cond_reg_reg(EQ, lbl1, reg_X, reg_Y); - codegen_rvalue(get_child(node, 1)); // value when true + codegen_rvalue(child1); // value when true jump(lbl2); def_label(lbl1); grow_fs(-1); // here, the child#1 is not on the stack, so we adjust it @@ -1472,19 +1479,19 @@ void codegen_begin() { void handle_enum_struct_union_type_decl(ast type); void codegen_enum(ast node) { - ast name = get_child(node, 1); - ast cases = get_child(node, 2); + ast name = get_child_opt_(ENUM_KW, IDENTIFIER, node, 1); + ast cases = get_child_opt_(ENUM_KW, ',', node, 2); int binding; - while (get_op(cases) == ',') { - cgc_add_enum(get_val(get_child(cases, 0)), get_child(cases, 1)); - cases = get_child(cases, 2); + if (name != 0 && cases != 0) { // if enum has a name and members (not a reference to an existing type) + binding = cgc_lookup_enum(get_val_(IDENTIFIER, name), cgc_globals); + if (binding != 0) { fatal_error("codegen_enum: enum already declared"); } + cgc_add_typedef(get_val_(IDENTIFIER, name), BINDING_TYPE_ENUM, node); } - if (name != 0 && get_child(node, 2) != 0) { // if enum has a name and members (not a reference to an existing type) - binding = cgc_lookup_enum(get_val(name), cgc_globals); - if (binding != 0) { fatal_error("codegen_enum: enum already declared"); } - cgc_add_typedef(get_val(name), BINDING_TYPE_ENUM, node); + while (get_op(cases) == ',') { + cgc_add_enum(get_val_(IDENTIFIER, get_child__(',', IDENTIFIER, cases, 0)), get_child__(',', INTEGER, cases, 1)); + cases = get_child_opt_(',', ',', cases, 2); } } @@ -1493,18 +1500,18 @@ void codegen_struct_or_union(ast node, enum BINDING kind) { ast members = get_child(node, 2); int binding; - if (name != 0 && get_child(node, 2) != 0) { // if struct has a name and members (not a reference to an existing type) - binding = cgc_lookup_binding_ident(kind, get_val(name), cgc_globals); + if (name != 0 && members != 0) { // if struct has a name and members (not a reference to an existing type) + binding = cgc_lookup_binding_ident(kind, get_val_(IDENTIFIER, name), cgc_globals); if (binding != 0 && heap[binding + 3] != node) { fatal_error("codegen_struct_or_union: struct/union/enum already declared"); } - cgc_add_typedef(get_val(name), kind, node); + cgc_add_typedef(get_val_(IDENTIFIER, name), kind, node); } // Traverse the structure to find any other declarations. // This is not the right semantic because inner declarations are scoped to // this declaration, but it's probably good enough for TCC. while (members != 0 && get_op(members) == ',') { - handle_enum_struct_union_type_decl(get_child(members, 1)); - members = get_child(members, 2); + handle_enum_struct_union_type_decl(get_child_(',', members, 1)); + members = get_child_opt_(',', ',', members, 2); } } @@ -1521,15 +1528,14 @@ void handle_enum_struct_union_type_decl(ast type) { } void codegen_glo_var_decl(ast node) { - - ast name = get_child(node, 0); - ast type = get_child(node, 1); - ast init = get_child(node, 2); + ast name = get_child_(VAR_DECL, node, 0); + ast type = get_child_(VAR_DECL, node, 1); + ast init = get_child_(VAR_DECL, node, 2); int size; int binding = cgc_lookup_var(name, cgc_globals); if (get_op(type) == '[') { // Array declaration - size = get_val(get_child(type, 0)); + size = get_val_(INTEGER, get_child_('[', type, 0)); } else { // All non-array types have size 1 size = 1; @@ -1565,33 +1571,17 @@ void codegen_glo_var_decl(ast node) { } } -void codegen_body(ast node) { - ast decls; - ast variable; - ast x; - int save_fs = cgc_fs; - int save_locals = cgc_locals; - ast name; - ast type; - ast init; +void codegen_local_var_decl(ast node) { + ast name = get_child_(VAR_DECL, node, 0); + ast type = get_child_(VAR_DECL, node, 1); + ast init = get_child_(VAR_DECL, node, 2); int size; - if (node != 0) { - while (get_op(node) == '{') { - x = get_child(node, 0); - if (get_op(x) == VAR_DECLS) { // Variable declaration - decls = get_child(x, 0); // Declaration list - while(decls != 0) { // Multiple variable declarations - variable = get_child(decls, 0); // Single variable declaration - name = get_child(variable, 0); - type = get_child(variable, 1); - init = get_child(variable, 2); - if (get_op(type) == '[') { // Array declaration size = type_width_ast(type, true, true); // size in bytes (word aligned) grow_stack_bytes(size); size /= word_size; // size in words - } else if (is_struct_or_union_type(type) && get_val(type) == 0) { + } else if (is_struct_or_union_type(type) && get_stars(type) == 0) { size = struct_union_size(type); // size in bytes (word aligned) grow_stack_bytes(size); size /= word_size; // size in words @@ -1608,20 +1598,31 @@ void codegen_body(ast node) { size = 1; } cgc_add_local_var(name, size, type); - decls = get_child(decls, 1); // Move to the next declaration in the list - } +} +void codegen_body(ast node) { + int save_fs = cgc_fs; + int save_locals = cgc_locals; + ast stmt; + ast decls; + + while (node != 0) { + stmt = get_child_('{', node, 0); + if (get_op(stmt) == VAR_DECLS) { // Variable declaration + decls = get_child__(VAR_DECLS, ',', stmt, 0); // Declaration list + while(decls != 0) { // Multiple variable declarations + codegen_local_var_decl(get_child__(',', VAR_DECL, decls, 0)); + decls = get_child_opt_(',', ',', decls, 1); // Move to the next declaration in the list + } } else { - codegen_statement(x); + codegen_statement(stmt); } - node = get_child(node, 1); + node = get_child_opt_('{', '{', node, 1); } - grow_stack(save_fs - cgc_fs); cgc_fs = save_fs; cgc_locals = save_locals; - } } void codegen_statement(ast node) { @@ -1639,15 +1640,15 @@ void codegen_statement(ast node) { lbl1 = alloc_label(); // else statement lbl2 = alloc_label(); // join point after if - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(get_child_(IF_KW, node, 0)); pop_reg(reg_X); grow_fs(-1); xor_reg_reg(reg_Y, reg_Y); jump_cond_reg_reg(EQ, lbl1, reg_X, reg_Y); - codegen_statement(get_child(node, 1)); + codegen_statement(get_child_(IF_KW, node, 1)); jump(lbl2); def_label(lbl1); - codegen_statement(get_child(node, 2)); + codegen_statement(get_child_(IF_KW, node, 2)); def_label(lbl2); } else if (op == WHILE_KW) { @@ -1661,12 +1662,12 @@ void codegen_statement(ast node) { cgc_add_enclosing_loop(cgc_fs, lbl2, lbl1); def_label(lbl1); - codegen_rvalue(get_child(node, 0)); + codegen_rvalue(get_child_(WHILE_KW, node, 0)); pop_reg(reg_X); grow_fs(-1); xor_reg_reg(reg_Y, reg_Y); jump_cond_reg_reg(EQ, lbl2, reg_X, reg_Y); - codegen_statement(get_child(node, 1)); + codegen_statement(get_child_(WHILE_KW, node, 1)); jump(lbl1); def_label(lbl2); @@ -1684,17 +1685,17 @@ void codegen_statement(ast node) { cgc_add_enclosing_loop(cgc_fs, lbl2, lbl1); - codegen_statement(get_child(node, 0)); // init + codegen_statement(get_child_(FOR_KW, node, 0)); // init jump(lbl3); // skip post loop action def_label(lbl1); - codegen_statement(get_child(node, 2)); // post loop action + codegen_statement(get_child_(FOR_KW, node, 2)); // post loop action def_label(lbl3); - codegen_rvalue(get_child(node, 1)); // test + codegen_rvalue(get_child_(FOR_KW, node, 1)); // test pop_reg(reg_X); grow_fs(-1); xor_reg_reg(reg_Y, reg_Y); jump_cond_reg_reg(EQ, lbl2, reg_X, reg_Y); - codegen_statement(get_child(node, 3)); + codegen_statement(get_child_(FOR_KW, node, 3)); jump(lbl1); def_label(lbl2); @@ -1711,8 +1712,8 @@ void codegen_statement(ast node) { cgc_add_enclosing_loop(cgc_fs, lbl2, lbl1); def_label(lbl1); - codegen_statement(get_child(node, 0)); - codegen_rvalue(get_child(node, 1)); + codegen_statement(get_child_(DO_KW, node, 0)); + codegen_rvalue(get_child_(DO_KW, node, 1)); pop_reg(reg_X); grow_fs(-1); xor_reg_reg(reg_Y, reg_Y); @@ -1733,9 +1734,9 @@ void codegen_statement(ast node) { cgc_add_enclosing_switch(cgc_fs, lbl1, lbl2); - codegen_rvalue(get_child(node, 0)); // switch operand + codegen_rvalue(get_child_(SWITCH_KW, node, 0)); // switch operand jump(lbl2); // Jump to first case - codegen_statement(get_child(node, 1)); // switch body + codegen_statement(get_child_(SWITCH_KW, node, 1)); // switch body if (heap[lbl2 + 1] >= 0) { def_label(lbl2); // No case statement => jump to end of switch @@ -1761,12 +1762,12 @@ void codegen_statement(ast node) { def_label(heap[binding + 4]); // false jump location of previous case heap[binding + 4] = alloc_label(); // create false jump location for current case dup(reg_X); // duplicate switch operand for the comparison - codegen_rvalue(get_child(node, 0)); // evaluate case expression and compare it + codegen_rvalue(get_child_(CASE_KW, node, 0)); // evaluate case expression and compare it pop_reg(reg_Y); pop_reg(reg_X); grow_fs(-2); jump_cond_reg_reg(EQ, lbl1, reg_X, reg_Y); jump(heap[binding + 4]); // condition is false => jump to next case def_label(lbl1); // start of case conditional block - codegen_statement(get_child(node, 1)); // case statement + codegen_statement(get_child_(CASE_KW, node, 1)); // case statement } else { fatal_error("case outside of switch"); } @@ -1778,7 +1779,7 @@ void codegen_statement(ast node) { if (binding != 0) { def_label(heap[binding + 4]); // false jump location of previous case heap[binding + 4] = alloc_label(); // create label for next case (even if default catches all cases) - codegen_statement(get_child(node, 0)); // default statement + codegen_statement(get_child_(DEFAULT_KW, node, 0)); // default statement } else { fatal_error("default outside of switch"); } @@ -1805,8 +1806,8 @@ void codegen_statement(ast node) { } else if (op == RETURN_KW) { - if (get_child(node, 0) != 0) { - codegen_rvalue(get_child(node, 0)); + if (get_child_(RETURN_KW, node, 0) != 0) { + codegen_rvalue(get_child_(RETURN_KW, node, 0)); pop_reg(reg_X); grow_fs(-1); } @@ -1821,15 +1822,15 @@ void codegen_statement(ast node) { } else if (op == ':') { - binding = cgc_lookup_goto_label(get_val(get_child(node, 0)), cgc_locals_fun); + binding = cgc_lookup_goto_label(get_val_(IDENTIFIER, get_child_(':', node, 0)), cgc_locals_fun); if (binding == 0) { - cgc_add_goto_label(get_val(get_child(node, 0)), alloc_goto_label()); + cgc_add_goto_label(get_val_(IDENTIFIER, get_child_(':', node, 0)), alloc_goto_label()); binding = cgc_locals_fun; } def_goto_label(heap[binding + 3]); - codegen_statement(get_child(node, 1)); // labelled statement + codegen_statement(get_child_(':', node, 1)); // labelled statement } else if (op == GOTO_KW) { @@ -1850,32 +1851,28 @@ void add_params(ast params) { int ident; ast type; - if (params != 0) { - decl = get_child(params, 0); - ident = get_child(decl, 0); // TODO: ident is not really a child - type = get_child(decl, 1); + while (params != 0) { + decl = get_child__(',', VAR_DECL, params, 0); + ident = get_child_(VAR_DECL, decl, 0); + type = get_child_(VAR_DECL, decl, 1); - if (cgc_lookup_var(ident, cgc_locals) != 0) { - fatal_error("add_params: duplicate parameter"); - } + if (cgc_lookup_var(ident, cgc_locals) != 0) fatal_error("add_params: duplicate parameter"); cgc_add_local_param(ident, type_width_ast(type, false, true) / word_size, type); - - add_params(get_child(params, 1)); + params = get_child_opt_(',', ',', params, 1); } } void codegen_glo_fun_decl(ast node) { - - ast name = get_child(node, 0); - ast fun_type = get_child(node, 1); - ast params = get_child(node, 2); - ast body = get_child(node, 3); + ast name = get_child_(FUN_DECL, node, 0); + ast fun_type = get_child_(FUN_DECL, node, 1); + ast params = get_child_(FUN_DECL, node, 2); + ast body = get_child_opt_(FUN_DECL, '{', node, 3); int lbl; int binding; int save_locals_fun = cgc_locals_fun; - if (is_struct_or_union_type(fun_type) && get_val(fun_type) == 0) { + if (is_struct_or_union_type(fun_type) && get_stars(fun_type) == 0) { fatal_error("add_params: returning structs from function not supported"); } else if (get_op(fun_type) == '[') { fatal_error("add_params: returning arrays from function not supported"); @@ -1919,20 +1916,18 @@ void codegen_glo_fun_decl(ast node) { void codegen_glo_decl(ast node) { ast decls; - ast variable; int op = get_op(node); if (op == VAR_DECLS) { - decls = get_child(node, 0); // Declaration list + decls = get_child__(VAR_DECLS, ',', node, 0); // Declaration list while (decls != 0) { // Multiple variable declarations - variable = get_child(decls, 0); // Single variable declaration - codegen_glo_var_decl(variable); // Process each variable declaration - decls = get_child(decls, 1); // Move to the next variable declaration in the list + codegen_glo_var_decl(get_child__(',', VAR_DECL, decls, 0)); + decls = get_child_opt_(',', ',', decls, 1); // Next variable declaration } } else if (op == FUN_DECL) { codegen_glo_fun_decl(node); } else if (op == TYPEDEF_KW) { - handle_enum_struct_union_type_decl(get_child(node, 1)); + handle_enum_struct_union_type_decl(get_child_(TYPEDEF_KW, node, 1)); } else if (op == ENUM_KW || op == STRUCT_KW || op == UNION_KW) { handle_enum_struct_union_type_decl(node); } else { diff --git a/pnut.c b/pnut.c index 2ebe14cc..c98055b8 100644 --- a/pnut.c +++ b/pnut.c @@ -11,6 +11,11 @@ #define false 0 #define EOF (-1) +#ifdef SAFE_MODE +#define INCLUDE_LINE_NUMBER_ON_ERROR +#define NICE_ERR_MSG +#endif + #ifdef RELEASE_PNUT_SH #define sh #define RT_NO_INIT_GLOBALS @@ -313,6 +318,129 @@ ast get_nb_children(ast node) { return heap[node] >> 10; } +// Because everything is an int in pnut, it's easy to make mistakes and pass the +// wrong node type to a function. These versions of get_child take the input +// and/or output node type and checks that the node has the expected type before +// returning the child node. +// It also checks that the index is within bounds. +#ifdef SAFE_MODE +int get_val_checked(char* file, int line, ast node) { + if (get_nb_children(node) != 0) { + printf("%s:%d: get_val called on node %d with %d children\n", file, line, get_op(node), get_nb_children(node)); + exit(1); + } + return heap[node+1]; +} + +int get_val_go(char* file, int line, int expected_node, ast node) { + if (get_op(node) != expected_node) { + printf("%s:%d: Expected node %d, got %d\n", file, line, expected_node, get_op(node)); + exit(1); + } + return get_val_checked(file, line, node); +} + +void set_val_checked(char* file, int line, ast node, int val) { + if (get_nb_children(node) != 0) { + printf("%s:%d: set_val called on node %d with %d children\n", file, line, get_op(node), get_nb_children(node)); + exit(1); + } + heap[node+1] = val; +} + +ast get_child_checked(char* file, int line, ast node, int i) { + if (i != 0 && i >= get_nb_children(node)) { + printf("%s:%d: Index %d out of bounds for node %d\n", file, line, i, get_op(node)); + exit(1); + } + return heap[node+i+1]; +} + +void set_child_checked(char* file, int line, ast node, int i, ast child) { + if (i != 0 && i >= get_nb_children(node)) { + printf("%s:%d: Index %d out of bounds for node %d\n", file, line, i, get_op(node)); + exit(1); + } + heap[node+i+1] = child; +} + +// This function checks that the parent node has the expected operator before +// returning the child node. +ast get_child_go(char* file, int line, int expected_parent_node, ast node, int i) { + if (get_op(node) != expected_parent_node) { + printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node)); + exit(1); + } + return get_child_checked(file, line, node, i); +} + +// This function checks that the parent node has the expected operator and that +// the child node has the expected operator before returning the child node. +ast get_child__go(char* file, int line, int expected_parent_node, int expected_node, ast node, int i) { + if (get_op(node) != expected_parent_node) { + printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node)); + exit(1); + } + if (get_op(heap[node+i+1]) != expected_node) { + printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(heap[node+i+1])); + exit(1); + } + return get_child_checked(file, line, node, i); +} + +// This function checks that the parent node has the expected operator and that +// the child node has the expected operator (if child node is not 0) before +// returning the child node. +ast get_child_opt_go(char* file, int line, int expected_parent_node, int expected_node, ast node, int i) { + if (get_op(node) != expected_parent_node) { + printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node)); + exit(1); + } + if (heap[node+i+1] > 0 && get_op(heap[node+i+1]) != expected_node) { + printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(heap[node+i+1])); + exit(1); + } + return get_child_checked(file, line, node, i); +} + +#define get_val(node) get_val_checked(__FILE__, __LINE__, node) +#define get_val_(expected_node, node) get_val_go(__FILE__, __LINE__, expected_node, node) +#define set_val(node, val) set_val_checked(__FILE__, __LINE__, node, val) +#define set_child(node, i, child) set_child_checked(__FILE__, __LINE__, node, i, child) +#define get_child(node, i) get_child_checked(__FILE__, __LINE__, node, i) +#define get_child_(expected_parent_node, node, i) get_child_go(__FILE__, __LINE__, expected_parent_node, node, i) +#define get_child__(expected_parent_node, expected_node, node, i) get_child__go(__FILE__, __LINE__, expected_parent_node, expected_node, node, i) +#define get_child_opt_(expected_parent_node, expected_node, node, i) get_child_opt_go(__FILE__, __LINE__, expected_parent_node, expected_node, node, i) + +int get_stars(ast type) { + switch (get_op(type)) { + case INT_KW: + return get_child_(INT_KW, type, 0); + case CHAR_KW: + return get_child_(CHAR_KW, type, 0); + case VOID_KW: + return get_child_(VOID_KW, type, 0); + case ENUM_KW: + return get_child_(ENUM_KW, type, 0); + case STRUCT_KW: + return get_child_(STRUCT_KW, type, 0); + case UNION_KW: + return get_child_(UNION_KW, type, 0); + case '[': + return get_child_('[', type, 0); + default: + printf("get_stars: unexpected type: %d\n", get_op(type)); + exit(1); + return 0; + } +} + +void set_stars(ast type, int stars) { + set_child(type, 0, stars); +} + +#else + int get_val(ast node) { return heap[node+1]; } @@ -329,6 +457,21 @@ void set_child(ast node, int i, ast child) { heap[node+i+1] = child; } +#define get_val_(expected_node, node) get_val(node) +#define get_child_(expected_parent_node, node, i) get_child(node, i) +#define get_child__(expected_parent_node, expected_node, node, i) get_child(node, i) +#define get_child_opt_(expected_parent_node, expected_node, node, i) get_child(node, i) + +int get_stars(ast type) { + return get_child(type, 0); +} + +void set_stars(ast type, int stars) { + set_child(type, 0, stars); +} + +#endif + ast ast_result; ast new_ast0(int op, int val) { @@ -503,6 +646,10 @@ int end_ident() { return probe; } +int probe_string(int probe) { + return heap[probe+1]; // return the start of the string +} + void get_tok(); void get_ident(); void expect_tok(int expected); @@ -616,6 +763,7 @@ void get_ch() { include_stack = include_stack->next; fp = include_stack->fp; #ifdef INCLUDE_LINE_NUMBER_ON_ERROR + fp_filepath = include_stack->filepath; line_number = include_stack->line_number; column_number = include_stack->column_number; #endif @@ -995,27 +1143,31 @@ int eval_constant(ast expr, bool if_macro) { int op = get_op(expr); int op1; int op2; + ast child0, child1; + + if (get_nb_children(expr) >= 1) child0 = get_child(expr, 0); + if (get_nb_children(expr) >= 2) child1 = get_child(expr, 1); switch (op) { - case PARENS: return eval_constant(get_child(expr, 0), if_macro); - case INTEGER: return -get_val(expr); - case CHARACTER: return get_val(expr); - case '~': return ~eval_constant(get_child(expr, 0), if_macro); - case '!': return !eval_constant(get_child(expr, 0), if_macro); + case PARENS: return eval_constant(child0, if_macro); + case INTEGER: return -get_val_(INTEGER, expr); + case CHARACTER: return get_val_(CHARACTER, expr); + case '~': return ~eval_constant(child0, if_macro); + case '!': return !eval_constant(child0, if_macro); case '-': case '+': - op1 = eval_constant(get_child(expr, 0), if_macro); + op1 = eval_constant(child0, if_macro); if (get_nb_children(expr) == 1) { return op == '-' ? -op1 : op1; } else { - op2 = eval_constant(get_child(expr, 1), if_macro); + op2 = eval_constant(child1, if_macro); return op == '-' ? op1 - op2 : op1 + op2; } case '?': - op1 = eval_constant(get_child(expr, 0), if_macro); + op1 = eval_constant(child0, if_macro); if (op1) { - return eval_constant(get_child(expr, 1), if_macro); + return eval_constant(child1, if_macro); } else { return eval_constant(get_child(expr, 2), if_macro); } @@ -1034,8 +1186,8 @@ int eval_constant(ast expr, bool if_macro) { case GT_EQ: case '<': case '>': - op1 = eval_constant(get_child(expr, 0), if_macro); - op2 = eval_constant(get_child(expr, 1), if_macro); + op1 = eval_constant(child0, if_macro); + op2 = eval_constant(child1, if_macro); switch (op) { case '*': return op1 * op2; case '/': return op1 / op2; @@ -1055,18 +1207,18 @@ int eval_constant(ast expr, bool if_macro) { return 0; // Should never reach here case AMP_AMP: - op1 = eval_constant(get_child(expr, 0), if_macro); + op1 = eval_constant(child0, if_macro); if (!op1) return 0; - else return eval_constant(get_child(expr, 1), if_macro); + else return eval_constant(child1, if_macro); case BAR_BAR: - op1 = eval_constant(get_child(expr, 0), if_macro); + op1 = eval_constant(child0, if_macro); if (op1) return 1; - else return eval_constant(get_child(expr, 1), if_macro); + else return eval_constant(child1, if_macro); case '(': // defined operators are represented as fun calls - if (if_macro && get_val(get_child(expr, 0)) == DEFINED_ID) { - return get_child(expr, 1) == MACRO; + if (if_macro && get_val_(IDENTIFIER, child0) == DEFINED_ID) { + return child1 == MACRO; } else { fatal_error("unknown function call in constant expressions"); return 0; @@ -2196,7 +2348,7 @@ int parse_stars_for_type(int type) { // We don't want to mutate types that are typedef'ed, so making a copy of the type obj if (stars != 0) { type = clone_ast(type); - set_val(type, stars); + set_child(type, 0, stars); } return type; @@ -2272,7 +2424,7 @@ ast parse_enum() { tail = result; } else { set_child(tail, 2, new_ast3(',', ident, value, 0)); - tail = get_child(tail, 2); + tail = get_child_(',', tail, 2); } if (tok == ',') { @@ -2318,7 +2470,7 @@ ast parse_struct_or_union(int struct_or_union_tok) { type = parse_type_with_stars(); - if (get_val(type) == 0 && get_op(type) == VOID_KW) + if (get_op(type) == VOID_KW && get_stars(type) == 0) parse_error("variable with void type", tok); ident = 0; // Anonymous struct @@ -2335,9 +2487,9 @@ ast parse_struct_or_union(int struct_or_union_tok) { type = new_ast2('[', new_ast0(INTEGER, 0), type); get_tok(); } else if (tok == INTEGER) { - type = new_ast2('[', new_ast0(INTEGER, -val), type); - get_tok(); - expect_tok(']'); + type = new_ast2('[', new_ast0(INTEGER, -val), type); + get_tok(); + expect_tok(']'); } else { parse_error("array size must be an integer constant", tok); } @@ -2354,7 +2506,7 @@ ast parse_struct_or_union(int struct_or_union_tok) { tail = result; } else { set_child(tail, 2, new_ast3(',', ident, type, 0)); - tail = get_child(tail, 2); + tail = get_child_(',', tail, 2); } } @@ -2375,7 +2527,7 @@ ast parse_param_decl() { type = parse_type_with_stars(); name = val; expect_tok(IDENTIFIER); - if (get_val(type) == 0 && get_op(type) == VOID_KW) parse_error("variable with void type", tok); + if (get_op(type) == VOID_KW && get_stars(type) == 0) parse_error("variable with void type", tok); result = new_ast3(VAR_DECL, name, type, 0); } else if (tok == IDENTIFIER) { // Support K&R param syntax in function definition @@ -2476,7 +2628,7 @@ ast parse_definition(int local) { } else { - if (get_val(this_type) == 0 && get_op(this_type) == VOID_KW) { + if (get_op(this_type) == VOID_KW && get_stars(this_type) == 0) { parse_error("variable with void type", tok); } @@ -2508,7 +2660,7 @@ ast parse_definition(int local) { tail = result; // Keep track of the last declaration } else { set_child(tail, 1, new_ast2(',', current_declaration, 0)); // Link the new declaration to the last one - tail = get_child(tail, 1); // Update the last declaration + tail = get_child_(',', tail, 1); // Update the last declaration } if (tok == ';') { @@ -2543,7 +2695,7 @@ ast parse_definition(int local) { // need the name of a struct/union/enum to compile sizeof and typedef'ed structures // don't always have a name. if (get_op(type) == STRUCT_KW || get_op(type) == UNION_KW || get_op(type) == ENUM_KW) { - if (get_child(type, 1) != 0 && get_val(get_child(type, 1)) != val) { + if (get_child(type, 1) != 0 && get_val_(IDENTIFIER, get_child(type, 1)) != val) { syntax_error("typedef name must match struct/union/enum name"); } set_child(type, 1, new_ast0(IDENTIFIER, val)); @@ -2600,7 +2752,7 @@ ast parse_primary_expression() { get_tok(); if (tok == STRING) { // Contiguous strings - result = cons(get_val(result), 0); // Result is now a list of string values + result = cons(get_val_(STRING, result), 0); // Result is now a list of string values tail = result; while (tok == STRING) { set_cdr(tail, cons(val, 0)); diff --git a/run-tests.sh b/run-tests.sh index 22d4c67f..9800ce23 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -19,6 +19,9 @@ fi : ${PNUT_OPTIONS:=} # Default to empty options backend=$1; shift bootstrap=0 +safe=0 +fast=0 +compile_only=0 shell="$SHELL" # Use current shell as the default pattern=".*" while [ $# -gt 0 ]; do @@ -26,6 +29,9 @@ while [ $# -gt 0 ]; do --shell) shell="$2"; shift 2;; --match) pattern="$2"; shift 2;; --bootstrap) bootstrap=1; shift 1;; + --safe) safe=1; shift 1;; + --fast) fast=1; shift 1;; + --compile-only) compile_only=1; shift 1;; *) echo "Unknown option: $1"; exit 1;; esac done @@ -46,6 +52,19 @@ case "$backend" in ;; esac +if [ "$safe" -eq 1 ]; then + # Enable safe mode which checks get_child accesses + PNUT_EXE_OPTIONS="$PNUT_EXE_OPTIONS -DSAFE_MODE" +fi + +if [ "$fast" -eq 1 ]; then + if [ "$backend" != "sh" ]; then + fail "Fast mode is not supported for the sh backend" + fi + # Enable fast mode which optimizes constant parameters + PNUT_EXE_OPTIONS="$PNUT_EXE_OPTIONS -DSH_SAVE_VARS_WITH_SET" +fi + # Compile pnut, either using gcc or with pnut itself. Set pnut_comp to the compiled pnut executable # The compiled pnut executable is cached in the tests folder to speed up the process compile_pnut() { # extra pnut compilation options: $1 @@ -204,6 +223,11 @@ run_test() { # file_to_test: $1 compile_test "$file" > "$dir/$filename.$ext" 2> "$dir/$filename.err" if [ $? -eq 0 ]; then # If compilation was successful + if [ "$compile_only" -eq 1 ]; then + echo "✅ Compiled $file" + return 0 + fi + chmod +x "$dir/$filename.$ext" execute_test "$dir/$filename.$ext" "$(test_timeout $file)" "$(test_args $file)" > "$dir/$filename.output" 2> "$dir/$filename.err" if [ $? -eq 0 ]; then # If the executable ran successfully diff --git a/sh.c b/sh.c index 5a582340..b9531afd 100644 --- a/sh.c +++ b/sh.c @@ -422,21 +422,21 @@ void print_glo_decls() { text format_special_var(ast ident, ast prefixed_with_dollar) { int op = get_op(ident); if (op == IDENTIFIER_INTERNAL) { - return string_concat(wrap_str_lit("__t"), get_val(ident)); + return string_concat(wrap_str_lit("__t"), get_val_(IDENTIFIER_INTERNAL, ident)); } else if (op == IDENTIFIER_STRING) { - return string_concat(wrap_str_lit("__str_"), get_val(ident)); + return string_concat(wrap_str_lit("__str_"), get_val_(IDENTIFIER_STRING, ident)); } else if (op == IDENTIFIER_DOLLAR) { if (prefixed_with_dollar) { - if (get_val(ident) <= 9) { - return wrap_int(get_val(ident)); + if (get_val_(IDENTIFIER_DOLLAR, ident) <= 9) { + return wrap_int(get_val_(IDENTIFIER_DOLLAR, ident)); } else { - return string_concat3(wrap_char('{'), wrap_int(get_val(ident)), wrap_char('}')); + return string_concat3(wrap_char('{'), wrap_int(get_val_(IDENTIFIER_DOLLAR, ident)), wrap_char('}')); } } else { - if (get_val(ident) <= 9) { - return string_concat(wrap_char('$'), wrap_int(get_val(ident))); + if (get_val_(IDENTIFIER_DOLLAR, ident) <= 9) { + return string_concat(wrap_char('$'), wrap_int(get_val_(IDENTIFIER_DOLLAR, ident))); } else { - return string_concat3(wrap_str_lit("${"), wrap_int(get_val(ident)), wrap_char('}')); + return string_concat3(wrap_str_lit("${"), wrap_int(get_val_(IDENTIFIER_DOLLAR, ident)), wrap_char('}')); } } } else if (op == IDENTIFIER_EMPTY) { @@ -449,33 +449,28 @@ text format_special_var(ast ident, ast prefixed_with_dollar) { } text struct_member_var(ast member_name_ident) { - return string_concat(wrap_str_lit("__"), wrap_str_pool(get_val(get_val(member_name_ident)))); + return string_concat(wrap_str_lit("__"), wrap_str_pool(probe_string(get_val_(IDENTIFIER, member_name_ident)))); } text struct_sizeof_var(ast struct_name_ident) { - return string_concat(wrap_str_lit("__sizeof__"), wrap_str_pool(get_val(get_val(struct_name_ident)))); + return string_concat(wrap_str_lit("__sizeof__"), wrap_str_pool(probe_string(get_val_(IDENTIFIER, struct_name_ident)))); } -text global_var(ast ident_tok) { - return string_concat(wrap_char('_'), wrap_str_pool(get_val(ident_tok))); +text global_var(ast ident) { + return string_concat(wrap_char('_'), wrap_str_pool(probe_string(ident))); } text env_var_with_prefix(ast ident, ast prefixed_with_dollar) { if (get_op(ident) == IDENTIFIER) { - if (cgc_lookup_var(get_val(ident), cgc_locals)) { + if (cgc_lookup_var(get_val_(IDENTIFIER, ident), cgc_locals)) { // TODO: Constant param optimization - // if (get_child(var, 2) == KIND_PARAM) { - // res = wrap_int(get_child(var, 1)); - // if (!prefixed_with_dollar) res = string_concat(wrap_char('$'), res); - // } else { - if (get_val(ident) == ARGV_ID) { - return wrap_str_lit("argv_"); - } else { - return wrap_str_pool(get_val(get_val(ident))); - } - // } + if (get_val_(IDENTIFIER, ident) == ARGV_ID) { + return wrap_str_lit("argv_"); + } else { + return wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))); + } } else { - return global_var(get_val(ident)); + return global_var(get_val_(IDENTIFIER, ident)); } } else { return format_special_var(ident, prefixed_with_dollar); @@ -487,7 +482,7 @@ text env_var(ast ident) { } text function_name(int ident_tok) { - return string_concat(wrap_char('_'), wrap_str_pool(get_val(ident_tok))); + return string_concat(wrap_char('_'), wrap_str_pool(probe_string(ident_tok))); } ast fresh_ident() { @@ -513,24 +508,22 @@ ast fresh_string_ident(int string_probe) { } void add_var_to_local_env(ast decl, enum BINDING kind) { - int ident_tok = get_child(decl, 0); + int ident_probe = get_child_(VAR_DECL, decl, 0); // Make sure we're not shadowing an existing local variable - if (cgc_lookup_var(ident_tok, cgc_locals)) { - putstr("var="); putstr(string_pool + get_val(ident_tok)); putchar('\n'); + if (cgc_lookup_var(ident_probe, cgc_locals)) { + putstr("var="); putstr(string_pool + probe_string(ident_probe)); putchar('\n'); fatal_error("Variable is already in local environment"); } // The var is not part of the environment, so we add it. - cgc_add_local_var(kind, ident_tok, get_child(decl, 1)); + cgc_add_local_var(kind, ident_probe, get_child_(VAR_DECL, decl, 1)); } void add_fun_params_to_local_env(ast lst) { - ast decl; while (lst != 0) { - decl = get_child(lst, 0); - add_var_to_local_env(decl, BINDING_PARAM_LOCAL); - lst = get_child(lst, 1); + add_var_to_local_env(get_child__(',', VAR_DECL, lst, 0), BINDING_PARAM_LOCAL); + lst = get_child_opt_(',', ',', lst, 1); } } @@ -540,9 +533,9 @@ void add_fun_params_to_local_env(ast lst) { // // Also, the shell backend doesn't support variables with aggregate types. void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for assert_idents_are_safe - ast ident_tok = get_child(variable, 0); - char* name = string_pool + get_val(ident_tok); - ast type = get_child(variable, 1); + ast ident_probe = get_child_(VAR_DECL, variable, 0); + char* name = string_pool + probe_string(ident_probe); + ast type = get_child_(VAR_DECL, variable, 1); if (name[0] == '_' || (name[0] != '\0' && name[1] == '_' && name[2] == '\0')) { // Check for a_ variables that could conflict with character constants printf("%s ", name); @@ -552,14 +545,14 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for // IFS is a special shell variable that's overwritten by certain. // In zsh, writing to argv assigns to $@, so we map argv to argv_, and forbid argv_. // This check only applies to local variables because globals are prefixed with _. - if (local && (ident_tok == ARGV__ID || ident_tok == IFS_ID)) { + if (local && (ident_probe == ARGV__ID || ident_probe == IFS_ID)) { printf("%s ", name); fatal_error("variable name is invalid. It can't be 'IFS' or 'argv_'."); } // Local variables don't correspond to memory locations, and can't store // more than 1 number/pointer. - if (local && (get_op(type) == '[' || (get_op(type) == STRUCT_KW && get_val(type) == 0))) { + if (local && (get_op(type) == '[' || (get_op(type) == STRUCT_KW && get_stars(type) == 0))) { printf("%s ", name); fatal_error("array/struct value type is not supported for shell backend. Use a reference type instead."); } @@ -567,8 +560,8 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for void check_param_decls(ast lst) { while (lst != 0) { - assert_var_decl_is_safe(get_child(lst, 0), true); - lst = get_child(lst, 1); + assert_var_decl_is_safe(get_child__(',', VAR_DECL, lst, 0), true); + lst = get_child_(',', lst, 1); } } @@ -660,12 +653,9 @@ text let_params(int params) { while (params != 0) { // TODO: Constant param optimization - // local_var = find_var_in_local_env(get_child(get_child(params, 0), 0), local_env); - // if (!variable_is_constant_param(local_var)) { - ident = new_ast0(IDENTIFIER, get_child(get_child(params, 0), 0)); + ident = new_ast0(IDENTIFIER, get_child_(VAR_DECL, get_child__(',', VAR_DECL, params, 0), 0)); res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); - // } - params = get_child(params, 1); + params = get_child_opt_(',', ',', params, 1); params_ix += 1; } @@ -849,8 +839,7 @@ bool contains_side_effects = 0; ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_conditionally) { int start_gensym_ix = gensym_ix; - ast sub1; - ast sub2; + ast sub1, sub2; if (assign_to == 0) { assign_to = fresh_ident(); // Unique identifier for the function call @@ -862,16 +851,16 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition gensym_ix -= 1; } - // Traverse the arguments and replace them with the result of handle_side_effects_go - // sub1 is the parent node of the current argument - sub2 = get_child(node, 1); + // Traverse the arguments and replace them with the result of + // handle_side_effects_go sub is the parent node of the current argument + sub2 = get_child_('(', node, 1); if (sub2 != 0) { // Check if not an empty list sub1 = node; // For 1 param, the parent node is the fun call node // If there are 2 or more params, we traverse the ',' nodes ... while (get_op(sub2) == ',') { - sub1 = sub2; // ... and the parent node is the ',' node - set_child(sub1, 0, handle_side_effects_go(get_child(sub2, 0), executes_conditionally)); - sub2 = get_child(sub2, 1); + sub1 = sub2;; // .. and the parent node is the ',' node + set_child(sub1, 0, handle_side_effects_go(get_child_(',', sub2, 0), executes_conditionally)); + sub2 = get_child_(',', sub2, 1); } // Handle the last argument set_child(sub1, 1, handle_side_effects_go(sub2, executes_conditionally)); @@ -881,7 +870,7 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition // reused after the function call, so resetting the gensym counter. gensym_ix = start_gensym_ix; - sub1 = new_ast2(',', assign_to, node); + sub1 = new_ast2('=', assign_to, node); sub1 = new_ast2(',', sub1, 0); if (executes_conditionally) { if (conditional_fun_calls == 0) { conditional_fun_calls = sub1; } @@ -904,20 +893,23 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition ast handle_side_effects_go(ast node, bool executes_conditionally) { int op = get_op(node); int nb_children = get_nb_children(node); - ast sub1; - ast sub2; + ast sub1, sub2; ast previous_conditional_fun_calls; - ast left_conditional_fun_calls; - ast right_conditional_fun_calls; + ast left_conditional_fun_calls, right_conditional_fun_calls; int start_gensym_ix = gensym_ix; + ast child0, child1, child2; + + if (nb_children >= 1) { child0 = get_child(node, 0); } + if (nb_children >= 2) { child1 = get_child(node, 1); } + if (nb_children >= 3) { child2 = get_child(node, 2); } if (nb_children == 0) { if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR || op == INTEGER || op == CHARACTER) { return node; } else if (op == STRING) { /* We must initialize strings before the expression */ - sub1 = fresh_string_ident(get_val(node)); - literals_inits = new_ast2(',', new_ast2(',', sub1, get_val(node)), literals_inits); + sub1 = fresh_string_ident(get_val_(STRING, node)); + literals_inits = new_ast2(',', new_ast2('=', sub1, get_val_(STRING, node)), literals_inits); return sub1; } else { printf("handle_side_effects_go: op=%d %c", op, op); @@ -927,10 +919,10 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { } else if (nb_children == 1) { if (op == '&' || op == '*' || op == '+' || op == '-' || op == '~' || op == '!' || op == PARENS) { // TODO: Reuse ast node? - return new_ast1(op, handle_side_effects_go(get_child(node, 0), executes_conditionally)); + return new_ast1(op, handle_side_effects_go(child0, executes_conditionally)); } else if (op == PLUS_PLUS_PRE || op == MINUS_MINUS_PRE || op == PLUS_PLUS_POST || op == MINUS_MINUS_POST) { contains_side_effects = true; - return new_ast1(op, handle_side_effects_go(get_child(node, 0), executes_conditionally)); + return new_ast1(op, handle_side_effects_go(child0, executes_conditionally)); } else if (op == SIZEOF_KW) { return node; // sizeof is a compile-time operator } else { @@ -942,42 +934,42 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { if (op == '(') { // Function call return handle_fun_call_side_effect(node, 0, executes_conditionally); } else if (op == '=') { - if (get_op(get_child(node, 1)) == '(') { // Function call + if (get_op(child1) == '(') { // Function call // In that case, we reuse the left hand side of the assignment as the result location - return handle_fun_call_side_effect(get_child(node, 1), get_child(node, 0), executes_conditionally); + return handle_fun_call_side_effect(child1, child0, executes_conditionally); } else { - sub1 = handle_side_effects_go(get_child(node, 0), executes_conditionally); - sub2 = handle_side_effects_go(get_child(node, 1), executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call + sub1 = handle_side_effects_go(child0, executes_conditionally); + sub2 = handle_side_effects_go(child1, executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call return new_ast2(op, sub1, sub2); } } else if (op == '&' || op == '|' || op == '<' || op == '>' || op == '+' || op == '-' || op == '*' || op == '/' || op == '%' || op == '^' || op == ',' || op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ || op == LSHIFT || op == RSHIFT || op == '[' || op == '.' || op == ARROW ) { - sub1 = handle_side_effects_go(get_child(node, 0), executes_conditionally); - sub2 = handle_side_effects_go(get_child(node, 1), executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call + sub1 = handle_side_effects_go(child0, executes_conditionally); + sub2 = handle_side_effects_go(child1, executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call return new_ast2(op, sub1, sub2); } else if (op == AMP_EQ || op == BAR_EQ || op == CARET_EQ || op == LSHIFT_EQ || op == MINUS_EQ || op == PERCENT_EQ || op == PLUS_EQ || op == RSHIFT_EQ || op == SLASH_EQ || op == STAR_EQ) { // Just like previous case, except that we update contains_side_effects contains_side_effects = true; - sub1 = handle_side_effects_go(get_child(node, 0), executes_conditionally); - sub2 = handle_side_effects_go(get_child(node, 1), executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call + sub1 = handle_side_effects_go(child0, executes_conditionally); + sub2 = handle_side_effects_go(child1, executes_conditionally); // We could inline that one since the assignment to the global variable is done after the last handle_side_effects_go call return new_ast2(op, sub1, sub2); } else if (op == AMP_AMP || op == BAR_BAR) { previous_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = 0; // The left side is always executed, unless the whole expression is executed conditionally. // We could compile it as always executed, but it makes the Shell code less regular so we compile it conditionally. - sub1 = handle_side_effects_go(get_child(node, 0), true); + sub1 = handle_side_effects_go(child0, true); gensym_ix = start_gensym_ix; // Reset gensym counter because the 2 sides are independent left_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = 0; - sub2 = handle_side_effects_go(get_child(node, 1), true); + sub2 = handle_side_effects_go(child1, true); gensym_ix = start_gensym_ix; // Reset gensym counter because the 2 sides are independent right_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = previous_conditional_fun_calls; return new_ast4(op, sub1, sub2, left_conditional_fun_calls, right_conditional_fun_calls); } else if (op == CAST) { - return new_ast2(CAST, get_child(node, 0), handle_side_effects_go(get_child(node, 1), executes_conditionally)); + return new_ast2(CAST, child0, handle_side_effects_go(child1, executes_conditionally)); } else { printf("2: op=%d %c", op, op); fatal_error("unexpected operator"); @@ -987,16 +979,16 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { if (op == '?') { previous_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = 0; - sub1 = handle_side_effects_go(get_child(node, 1), true); + sub1 = handle_side_effects_go(child1, true); left_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = 0; - sub2 = handle_side_effects_go(get_child(node, 2), true); + sub2 = handle_side_effects_go(child2, true); right_conditional_fun_calls = conditional_fun_calls; if (left_conditional_fun_calls != 0 || right_conditional_fun_calls != 0) { fatal_error("Conditional function calls in ternary operator not allowed"); } - return new_ast3('?', handle_side_effects_go(get_child(node, 0), executes_conditionally), sub1, sub2); + return new_ast3('?', handle_side_effects_go(child0, executes_conditionally), sub1, sub2); } else { printf("3: op=%d %c\n", op, op); fatal_error("unexpected operator"); @@ -1047,15 +1039,16 @@ enum VALUE_CTX { }; text with_prefixed_side_effects(ast test_side_effects, text code) { - text test_side_effects_code = 0; + ast side_effect; while (test_side_effects != 0) { + side_effect = get_child__(',', '=', test_side_effects, 0); test_side_effects_code = string_concat3(test_side_effects_code, - comp_fun_call_code(get_child(get_child(test_side_effects, 0), 1), get_child(get_child(test_side_effects, 0), 0)), + comp_fun_call_code(get_child_('=', side_effect, 1), get_child_('=', side_effect, 0)), wrap_str_lit("; ")); - test_side_effects = get_child(test_side_effects, 1); + test_side_effects = get_child_(',', test_side_effects, 1); } if (test_side_effects_code != 0) { return string_concat4(wrap_str_lit("{ "), test_side_effects_code, code, wrap_str_lit("; }")); @@ -1097,7 +1090,7 @@ text wrap_if_needed(int parens_otherwise, int context, ast test_side_effects, te int non_parenthesized_operand(ast node) { while (get_op(node) == PARENS) { - node = get_child(node, 0); + node = get_child_(PARENS, node, 0); } return node; @@ -1116,21 +1109,25 @@ text wrap_in_condition_if_needed(int context, ast test_side_effects, text code) text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) { int op = get_op(node); int nb_children = get_nb_children(node); - text sub1; - text sub2; - text sub3; + text sub1, sub2, sub3; + ast child0, child1, child2, child3; + + if (nb_children >= 1) { child0 = get_child(node, 0); } + if (nb_children >= 2) { child1 = get_child(node, 1); } + if (nb_children >= 3) { child2 = get_child(node, 2); } + if (nb_children >= 4) { child3 = get_child(node, 3); } if (nb_children == 0) { if (op == INTEGER) { - return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(-get_val(node))); + return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(-get_val_(INTEGER, node))); } else if (op == CHARACTER) { #ifdef SH_INLINE_CHAR_LITERAL - return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val(node))); + return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val_(CHARACTER, node))); #else if (context == RVALUE_CTX_ARITH_EXPANSION) { - return character_ident(get_val(node)); + return character_ident(get_val_(CHARACTER, node)); } else { - return wrap_in_condition_if_needed(context, test_side_effects, string_concat(wrap_char('$'), character_ident(get_val(node)))); + return wrap_in_condition_if_needed(context, test_side_effects, string_concat(wrap_char('$'), character_ident(get_val_(CHARACTER, node)))); } #endif } else if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR) { @@ -1149,54 +1146,54 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) // Setting context to RVALUE_CTX_BASE even if it's wrapped in $(( ... )) because we // need another layer of wrapping if it's a complex expression, i.e. not a // literal or a variable. - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_BASE, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_BASE, 0, op); return wrap_if_needed(false, context, test_side_effects, string_concat(wrap_char('_'), sub1), outer_op, op); } else if (op == '+' || op == PARENS) { // +x is equivalent to x - return comp_rvalue_go(get_child(node, 0), context, test_side_effects, outer_op); + return comp_rvalue_go(child0, context, test_side_effects, outer_op); } else if (op == '-') { // Check if the rest of ast is a literal, if so directly return the negated value. // Note: I think this can be simplified by not wrapped in () in the else case. - if (get_op(get_child(node, 0)) == INTEGER) { - return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val(get_child(node, 0)))); + if (get_op(child0) == INTEGER) { + return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val_(INTEGER, child0))); } else { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(false, context, test_side_effects, string_concat3(wrap_str_lit("-("), sub1, wrap_char(')')), outer_op, op); } } else if (op == '~') { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(false, context, test_side_effects, string_concat3(wrap_str_lit("~("), sub1, wrap_char(')')), outer_op, op); } else if (op == '!') { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(true, context, test_side_effects, string_concat(wrap_char('!'), sub1), outer_op, op); } else if (op == MINUS_MINUS_PRE) { - sub1 = comp_lvalue(get_child(node, 0)); + sub1 = comp_lvalue(child0); return wrap_if_needed(true, context, test_side_effects, string_concat(sub1, wrap_str_lit(" -= 1")), outer_op, op); } else if (op == PLUS_PLUS_PRE) { - sub1 = comp_lvalue(get_child(node, 0)); + sub1 = comp_lvalue(child0); return wrap_if_needed(true, context, test_side_effects, string_concat(sub1, wrap_str_lit(" += 1")), outer_op, op); } else if (op == MINUS_MINUS_POST) { - sub1 = comp_lvalue(get_child(node, 0)); + sub1 = comp_lvalue(child0); return wrap_if_needed(false, context, test_side_effects,string_concat4(wrap_char('('), sub1, wrap_str_lit(" -= 1)"), wrap_str_lit(" + 1")), outer_op, '+'); } else if (op == PLUS_PLUS_POST) { - sub1 = comp_lvalue(get_child(node, 0)); + sub1 = comp_lvalue(child0); return wrap_if_needed(false, context, test_side_effects, string_concat4(wrap_char('('), sub1, wrap_str_lit(" += 1)"), wrap_str_lit(" - 1")), outer_op, '-'); } else if (op == SIZEOF_KW) { - if (get_op(get_child(node, 0)) == INT_KW - || get_op(get_child(node, 0)) == CHAR_KW - || get_op(get_child(node, 0)) == VOID_KW - || get_op(get_child(node, 0)) == ENUM_KW - || (( get_op(get_child(node, 0)) == STRUCT_KW || get_op(get_child(node, 0)) == UNION_KW) - && get_child(get_child(node, 0), 0) >= 1)) { // If it's a pointer + if (get_op(child0) == INT_KW + || get_op(child0) == CHAR_KW + || get_op(child0) == VOID_KW + || get_op(child0) == ENUM_KW + || (( get_op(child0) == STRUCT_KW || get_op(child0) == UNION_KW) + && get_child(child0, 0) >= 1)) { // If it's a pointer return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(1)); - } else if (get_op(get_child(node, 0)) == STRUCT_KW) { - return wrap_if_needed(false, context, test_side_effects, struct_sizeof_var(get_child(get_child(node, 0), 1)), outer_op, op); + } else if (get_op(child0) == STRUCT_KW) { + return wrap_if_needed(false, context, test_side_effects, struct_sizeof_var(get_child__(STRUCT_KW, IDENTIFIER, child0, 1)), outer_op, op); } else { fatal_error("comp_rvalue_go: sizeof is not supported for this type or expression"); return 0; } } else if (op == '&') { - return wrap_if_needed(false, context, test_side_effects, comp_lvalue_address(get_child(node, 0)), outer_op, op); + return wrap_if_needed(false, context, test_side_effects, comp_lvalue_address(child0), outer_op, op); } else { printf("1: op=%d %c", op, op); fatal_error("comp_rvalue_go: unexpected operator"); @@ -1204,33 +1201,33 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } } else if (nb_children == 2) { if (op == '+' || op == '-' || op == '*' || op == '/' || op == '%' || op == '&' || op == '|' || op == '^' || op == LSHIFT || op == RSHIFT || op == ',') { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(true, context, test_side_effects, string_concat3(sub1, op_to_str(op), sub2), outer_op, op); } else if (op == '=' || op == AMP_EQ || op == BAR_EQ || op == CARET_EQ || op == LSHIFT_EQ || op == MINUS_EQ || op == PERCENT_EQ || op == PLUS_EQ || op == RSHIFT_EQ || op == SLASH_EQ || op == STAR_EQ) { - sub1 = comp_lvalue(get_child(node, 0)); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_lvalue(child0); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(true, context, test_side_effects, string_concat3(sub1, op_to_str(op), sub2), outer_op, op); } else if (op == '[') { // array indexing - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, '+'); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION, 0, '+'); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, '+'); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, '+'); return wrap_if_needed(false, context, test_side_effects, string_concat5(wrap_str_lit("_$(("), sub1, wrap_str_lit(" + "), sub2, wrap_str_lit("))")), outer_op, op); } else if (op == ARROW) { // member access is implemented like array access - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = struct_member_var(get_child(node, 1)); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = struct_member_var(child1); return wrap_if_needed(false, context, test_side_effects, string_concat5(wrap_str_lit("_$(("), sub1, wrap_str_lit(" + "), sub2, wrap_str_lit("))")), outer_op, op); } else if (op == EQ_EQ || op == EXCL_EQ || op == LT_EQ || op == GT_EQ || op == '<' || op == '>') { if (context == RVALUE_CTX_TEST) { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_BASE, 0, op); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_BASE, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_BASE, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_BASE, 0, op); return with_prefixed_side_effects(test_side_effects, string_concat5(wrap_str_lit("[ "), sub1, test_op_to_str(op), sub2, wrap_str_lit(" ]"))); } else { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(true, context, test_side_effects, string_concat3(sub1, op_to_str(op), sub2), outer_op, op); } } else if (op == CAST) { // Casts are no-op - return comp_rvalue_go(get_child(node, 1), context, 0, op); + return comp_rvalue_go(child1, context, 0, op); } else if (op == AMP_AMP || op == BAR_BAR) { fatal_error("comp_rvalue_go: && and || should have 4 children by that point"); return 0; @@ -1240,9 +1237,9 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } } else if (nb_children == 3) { if (op == '?') { - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub3 = comp_rvalue_go(get_child(node, 2), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub3 = comp_rvalue_go(child2, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(true, context, test_side_effects, string_concat5(sub1, op_to_str(op), sub2, wrap_str_lit(": "), sub3), outer_op, op); return 0; } else { @@ -1265,31 +1262,31 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) // // As a heuristic, we add parenthesis whenever the left or right side of // the operator is a different comparison operator. - sub1 = non_parenthesized_operand(get_child(node, 0)); // un-parenthesized lhs - sub2 = non_parenthesized_operand(get_child(node, 1)); // un-parenthesized rhs + sub1 = non_parenthesized_operand(child0); // un-parenthesized lhs + sub2 = non_parenthesized_operand(child1); // un-parenthesized rhs // if lhs is && or ||, and different from the current operator if ((get_op(sub1) == AMP_AMP || get_op(sub1) == BAR_BAR) && get_op(sub1) != op) { - sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, get_child(node, 2), op); + sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, child2, op); sub1 = string_concat3(wrap_str_lit("{ "), sub1, wrap_str_lit("; }")); } else { - sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, get_child(node, 2), op); + sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, child2, op); } // if rhs is && or ||, and different from the current operator if ((get_op(sub2) == AMP_AMP || get_op(sub2) == BAR_BAR) && get_op(sub2) != op) { - sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, get_child(node, 3), op); + sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, child3, op); sub2 = string_concat3(wrap_str_lit("{ "), sub2, wrap_str_lit("; }")); } else { - sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, get_child(node, 3), op); + sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, child3, op); } return string_concat3(sub1, op_to_str(op), sub2); } else { - if (test_side_effects != 0 || get_child(node, 2) != 0 || get_child(node, 3) != 0) { + if (test_side_effects != 0 || child2 != 0 || child3 != 0) { fatal_error("comp_rvalue_go: && and || with function calls can only be used in tests"); } - sub1 = comp_rvalue_go(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = comp_rvalue_go(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(true, context, test_side_effects, string_concat3(sub1, op_to_str(op), sub2), outer_op, op); } } else { @@ -1311,13 +1308,15 @@ text comp_rvalue(ast node, int context) { int contains_side_effects2 = contains_side_effects; int fun_call_decl_start; text result; + ast side_effect; // Capture the start of the side effects to be able to undo them if needed fun_call_decl_start = glo_decl_ix; while (literals_inits != 0) { - comp_defstr(get_child(get_child(literals_inits, 0), 0), get_child(get_child(literals_inits, 0), 1)); - literals_inits = get_child(literals_inits, 1); + side_effect = get_child__(',', '=', literals_inits, 0); + comp_defstr(get_child_('=', side_effect, 0), get_child_('=', side_effect, 1)); + literals_inits = get_child_opt_(',', ',', literals_inits, 1); } // We don't want to call defstr on every iteration, so we only capture fun @@ -1328,8 +1327,9 @@ text comp_rvalue(ast node, int context) { fun_call_decl_start = glo_decl_ix; while (replaced_fun_calls2 != 0) { - comp_fun_call(get_child(get_child(replaced_fun_calls2, 0), 1), get_child(get_child(replaced_fun_calls2, 0), 0)); - replaced_fun_calls2 = get_child(replaced_fun_calls2, 1); + side_effect = get_child__(',', '=', replaced_fun_calls2, 0); + comp_fun_call(get_child_('=', side_effect, 1), get_child_('=', side_effect, 0)); + replaced_fun_calls2 = get_child_opt_(',', ',', replaced_fun_calls2, 1); } // When compiling a test, we place the function side effects inline with the condition. @@ -1372,17 +1372,17 @@ text comp_lvalue_address(ast node) { fatal_error("comp_rvalue_go: can't take the address of a local variable"); return 0; } else if (op == '[') { - sub1 = comp_rvalue(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION); - sub2 = comp_rvalue(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION); + sub1 = comp_rvalue(get_child_('[', node, 0), RVALUE_CTX_ARITH_EXPANSION); + sub2 = comp_rvalue(get_child_('[', node, 1), RVALUE_CTX_ARITH_EXPANSION); return string_concat3(sub1, wrap_str_lit(" + "), sub2); } else if (op == '*') { - return comp_rvalue(get_child(node, 0), RVALUE_CTX_BASE); + return comp_rvalue(get_child_('*', node, 0), RVALUE_CTX_BASE); } else if (op == ARROW) { - sub1 = comp_rvalue(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION); - sub2 = struct_member_var(get_child(node, 1)); + sub1 = comp_rvalue(get_child_(ARROW, node, 0), RVALUE_CTX_ARITH_EXPANSION); + sub2 = struct_member_var(get_child_(ARROW, node, 1)); return string_concat3(sub1, wrap_str_lit(" + "), sub2); } else if (op == CAST) { - return comp_lvalue_address(get_child(node, 1)); + return comp_lvalue_address(get_child_(CAST, node, 1)); } else { printf("op=%d %c\n", op, op); fatal_error("comp_lvalue_address: unknown lvalue"); @@ -1398,18 +1398,18 @@ text comp_lvalue(ast node) { if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_EMPTY || op == IDENTIFIER_DOLLAR) { return env_var(node); } else if (op == '[') { - sub1 = comp_rvalue(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION); - sub2 = comp_rvalue(get_child(node, 1), RVALUE_CTX_ARITH_EXPANSION); + sub1 = comp_rvalue(get_child_('[', node, 0), RVALUE_CTX_ARITH_EXPANSION); + sub2 = comp_rvalue(get_child_('[', node, 1), RVALUE_CTX_ARITH_EXPANSION); return string_concat5(wrap_str_lit("_$(("), sub1, wrap_str_lit(" + "), sub2, wrap_str_lit("))")); } else if (op == '*') { - sub1 = comp_rvalue(get_child(node, 0), RVALUE_CTX_BASE); + sub1 = comp_rvalue(get_child_('*', node, 0), RVALUE_CTX_BASE); return string_concat(wrap_char('_'), sub1); } else if (op == ARROW) { - sub1 = comp_rvalue(get_child(node, 0), RVALUE_CTX_ARITH_EXPANSION); - sub2 = struct_member_var(get_child(node, 1)); + sub1 = comp_rvalue(get_child_(ARROW, node, 0), RVALUE_CTX_ARITH_EXPANSION); + sub2 = struct_member_var(get_child_(ARROW, node, 1)); return string_concat5(wrap_str_lit("_$(("), sub1, wrap_str_lit(" + "), sub2, wrap_str_lit("))")); } else if (op == CAST) { - return comp_lvalue(get_child(node, 1)); + return comp_lvalue(get_child_(CAST, node, 1)); } else { printf("op=%d %c\n", op, op); fatal_error("comp_lvalue: unknown lvalue"); @@ -1423,9 +1423,9 @@ text fun_call_params(ast params) { if (params != 0) { // Check if not an empty list while (get_op(params) == ',') { - param = comp_rvalue(get_child(params, 0), RVALUE_CTX_BASE); + param = comp_rvalue(get_child_(',', params, 0), RVALUE_CTX_BASE); code_params = concatenate_strings_with(code_params, param, wrap_char(' ')); - params = get_child(params, 1); + params = get_child_(',', params, 1); } param = comp_rvalue(params, RVALUE_CTX_BASE); // Last parameter code_params = concatenate_strings_with(code_params, param, wrap_char(' ')); @@ -1448,8 +1448,8 @@ text comp_putchar_inline(ast param) { text res; ast ident; - if (get_op(param) == CHARACTER && get_val(param) >= 32 && get_val(param) <= 126) { // Printable ASCII characters - return string_concat3(wrap_str_lit("printf \""), escape_text(wrap_char(get_val(param)), true), wrap_char('\"')); + if (get_op(param) == CHARACTER && get_val_(CHARACTER, param) >= 32 && get_val_(CHARACTER, param) <= 126) { // Printable ASCII characters + return string_concat3(wrap_str_lit("printf \""), escape_text(wrap_char(get_val_(CHARACTER, param)), true), wrap_char('\"')); } res = comp_rvalue(param, RVALUE_CTX_ARITH_EXPANSION); @@ -1512,12 +1512,10 @@ void handle_printf_call(char *format_str, ast params) { while (*format_str != '\0') { // Param is consumed, get the next one - // printf("param=%d, params=%d\n", get_op(param), get_op(params)); - // printf("param=%d %d, params=%d %d\n", get_op(param), param, get_op(params), params); if (param == 0 && params != 0) { if (get_op(params) == ',') { - param = get_child(params, 0); - params = get_child(params, 1); + param = get_child_(',', params, 0); + params = get_child_(',', params, 1); } else { param = params; params = 0; @@ -1643,18 +1641,18 @@ void handle_printf_call(char *format_str, ast params) { #endif text comp_fun_call_code(ast node, ast assign_to) { - ast name = get_child(node, 0); - ast params = get_child(node, 1); - int name_id = get_val(name); + ast name = get_child__('(', IDENTIFIER, node, 0); + ast params = get_child_('(', node, 1); + int name_id = get_val_(IDENTIFIER, name); text res; #ifdef SH_AVOID_PRINTF_USE if (get_op(assign_to) == IDENTIFIER_EMPTY) { if (((name_id == PUTS_ID || name_id == PUTSTR_ID || name_id == PRINTF_ID) && params != 0 && get_op(params) == STRING)) { // puts("..."), putstr("..."), printf("...") - return printf_call(STRING_BUF(get_val(params)), 0, 0, true); + return printf_call(STRING_BUF(get_val_(STRING, params)), 0, 0, true); } else if (name_id == PRINTF_ID && get_op(get_child(params, 0)) == STRING) { - handle_printf_call(STRING_BUF(get_val(get_child(params, 0))), get_child(params, 1)); + handle_printf_call(STRING_BUF(get_val_(STRING, get_child(params, 0))), get_child(params, 1)); return 0; } #ifdef SH_INLINE_PUTCHAR @@ -1686,7 +1684,7 @@ text comp_fun_call_code(ast node, ast assign_to) { else if (name_id == CLOSE_ID) { runtime_use_close = true; } return string_concat3( - function_name(get_val(name)), + function_name(get_val_(IDENTIFIER, name)), wrap_char(' '), concatenate_strings_with(comp_lvalue(assign_to), fun_call_params(params), wrap_char(' ')) ); @@ -1694,8 +1692,7 @@ text comp_fun_call_code(ast node, ast assign_to) { void comp_fun_call(ast node, ast assign_to) { text res = comp_fun_call_code(node, assign_to); - if (res) - append_glo_decl(res); + if (res) append_glo_decl(res); } void comp_assignment(ast lhs, ast rhs) { @@ -1731,9 +1728,9 @@ bool comp_body(ast node, STMT_CTX stmt_ctx) { while (node != 0) { // Last statement of body is in tail position if the body itself is in tail position - if (get_op(get_child(node, 1)) != '{') in_tail_position = start_in_tail_position; - if (comp_statement(get_child(node, 0), stmt_ctx)) break; // Statement always returns => block is terminated - node = get_child(node, 1); + if (get_op(get_child_('{', node, 1)) != '{') in_tail_position = start_in_tail_position; + if (comp_statement(get_child_('{', node, 0), stmt_ctx)) break; // Statement always returns => block is terminated + node = get_child_('{', node, 1); } cgc_locals = start_cgc_locals; @@ -1754,7 +1751,7 @@ text make_switch_pattern(ast statement) { switch (get_op(statement)) { case DEFAULT_KW: str = wrap_char('*'); - statement = get_child(statement, 0); + statement = get_child_(DEFAULT_KW, statement, 0); break; case CASE_KW: @@ -1762,8 +1759,8 @@ text make_switch_pattern(ast statement) { // but Shell allows matching on arbitrary expression in case // patterns so it's fine. If we wanted to do this right, we'd check // that the pattern is a numeric literal or an enum identifier. - str = concatenate_strings_with(str, comp_rvalue(get_child(statement, 0), RVALUE_CTX_BASE), wrap_char('|')); - statement = get_child(statement, 1); + str = concatenate_strings_with(str, comp_rvalue(get_child_(CASE_KW, statement, 0), RVALUE_CTX_BASE), wrap_char('|')); + statement = get_child_(CASE_KW, statement, 1); break; default: @@ -1780,20 +1777,20 @@ bool comp_switch(ast node) { append_glo_decl(string_concat3( wrap_str_lit("case "), - comp_rvalue(get_child(node, 0), RVALUE_CTX_BASE), + comp_rvalue(get_child_(SWITCH_KW, node, 0), RVALUE_CTX_BASE), wrap_str_lit(" in") )); cgc_add_enclosing_switch(in_tail_position); nest_level += 1; - node = get_child(node, 1); + node = get_child_(SWITCH_KW, node, 1); if (node == 0 || get_op(node) != '{') fatal_error("comp_statement: switch without body"); while (get_op(node) == '{') { - statement = get_child(node, 0); - node = get_child(node, 1); + statement = get_child_('{', node, 0); + node = get_child_('{', node, 1); append_glo_decl(make_switch_pattern(statement)); statement = last_stmt; // last_stmt is set by make_switch_pattern @@ -1808,8 +1805,8 @@ bool comp_switch(ast node) { // Case and default nodes contain the first statement of the block so we process that one first. if (!comp_statement(statement, STMT_CTX_SWITCH)) { while (get_op(node) == '{') { - statement = get_child(node, 0); - node = get_child(node, 1); + statement = get_child_('{', node, 0); + node = get_child_('{', node, 1); if (comp_statement(statement, STMT_CTX_SWITCH)) break; } } @@ -1848,26 +1845,26 @@ bool comp_if(ast node, STMT_CTX stmt_ctx) { append_glo_decl(string_concat3( wrap_str_lit(else_if ? "elif " : "if "), - comp_rvalue(get_child(node, 0), else_if ? RVALUE_CTX_TEST_ELSEIF : RVALUE_CTX_TEST), + comp_rvalue(get_child_(IF_KW, node, 0), else_if ? RVALUE_CTX_TEST_ELSEIF : RVALUE_CTX_TEST), wrap_str_lit(" ; then") )); nest_level += 1; start_glo_decl_idx = glo_decl_ix; - termination_lhs = comp_statement(get_child(node, 1), stmt_ctx); + termination_lhs = comp_statement(get_child_(IF_KW, node, 1), stmt_ctx); // ifs cannot be empty so we insert ':' if it's empty if (!any_active_glo_decls(start_glo_decl_idx)) append_glo_decl(wrap_char(':')); nest_level -= 1; - if (get_child(node, 2) != 0) { + if (get_child_(IF_KW, node, 2) != 0) { // Compile sequence of if else if using elif - if (get_op(get_child(node, 2)) == IF_KW) { - termination_rhs = comp_if(get_child(node, 2), stmt_ctx | STMT_CTX_ELSE_IF); // STMT_CTX_ELSE_IF => next if stmt will use elif + if (get_op(get_child_(IF_KW, node, 2)) == IF_KW) { + termination_rhs = comp_if(get_child_(IF_KW, node, 2), stmt_ctx | STMT_CTX_ELSE_IF); // STMT_CTX_ELSE_IF => next if stmt will use elif } else { append_glo_decl(wrap_str_lit("else")); nest_level += 1; start_glo_decl_idx = glo_decl_ix; - termination_rhs = comp_statement(get_child(node, 2), stmt_ctx & ~STMT_CTX_ELSE_IF); // Clear STMT_CTX_ELSE_IF bit + termination_rhs = comp_statement(get_child_(IF_KW, node, 2), stmt_ctx & ~STMT_CTX_ELSE_IF); // Clear STMT_CTX_ELSE_IF bit if (!any_active_glo_decls(start_glo_decl_idx)) append_glo_decl(wrap_char(':')); nest_level -= 1; } @@ -1994,22 +1991,20 @@ bool comp_return(ast return_value) { void comp_var_decls(ast node) { ast var_decl; - node = get_child(node, 0); + node = get_child_opt_(VAR_DECLS, ',', node, 0); while (node != 0) { // Add to local env and cummulative env, then initialize - var_decl = get_child(node, 0); - // printf("Adding var %s\n", string_pool + get_val(get_child(var_decl, 0))); + var_decl = get_child__(',', VAR_DECL, node, 0); add_var_to_local_env(var_decl, BINDING_VAR_LOCAL); - if (get_child(var_decl, 2) != 0) { - comp_assignment(new_ast0(IDENTIFIER, get_child(var_decl, 0)), get_child(var_decl, 2)); + if (get_child_(VAR_DECL, var_decl, 2) != 0) { // Initializer + comp_assignment(new_ast0(IDENTIFIER, get_child_(VAR_DECL, var_decl, 0)), get_child_(VAR_DECL, var_decl, 2)); } #ifdef INITIALIZE_LOCAL_VARS_WITH_ZERO else { - comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), new_ast0(INTEGER, 0)); + comp_assignment(new_ast0(IDENTIFIER, get_child_(VAR_DECL, var, 0)), new_ast0(INTEGER, 0)); } #endif - // TODO: Cummulative env - node = get_child(node, 1); // Next variable + node = get_child_opt_(',', ',', node, 1); // Next variable } } @@ -2028,30 +2023,30 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { if (op == IF_KW) { return comp_if(node, stmt_ctx); } else if (op == WHILE_KW) { - return comp_loop(comp_rvalue(get_child(node, 0), RVALUE_CTX_TEST), - get_child(node, 1), + return comp_loop(comp_rvalue(get_child_(WHILE_KW, node, 0), RVALUE_CTX_TEST), + get_child_(WHILE_KW, node, 1), 0, // No loop end statement 0, // No last line stmt_ctx ); } else if (op == DO_KW) { return comp_loop(wrap_str_lit(":"), - get_child(node, 0), + get_child_(DO_KW, node, 0), 0, // No loop end statement - string_concat(comp_rvalue(get_child(node, 1), RVALUE_CTX_TEST), wrap_str_lit(" || break")), + string_concat(comp_rvalue(get_child_(DO_KW, node, 1), RVALUE_CTX_TEST), wrap_str_lit(" || break")), stmt_ctx ); } else if (op == FOR_KW) { - comp_statement(get_child(node, 0), STMT_CTX_DEFAULT); // Assuming this statement never returns... + comp_statement(get_child_(FOR_KW, node, 0), STMT_CTX_DEFAULT); // Assuming this statement never returns... str = wrap_char(':'); // Empty statement - if (get_child(node, 1)) { - str = comp_rvalue(get_child(node, 1), RVALUE_CTX_TEST); + if (get_child_(FOR_KW, node, 1)) { + str = comp_rvalue(get_child_(FOR_KW, node, 1), RVALUE_CTX_TEST); } return comp_loop(str, - get_child(node, 3), // Body - get_child(node, 2), // End of loop statement + get_child_(FOR_KW, node, 3), // Body + get_child_(FOR_KW, node, 2), // End of loop statement 0, // No last line stmt_ctx ); @@ -2062,20 +2057,20 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { } else if (op == CONTINUE_KW) { return comp_continue(); // Continue to next iteration of loop } else if (op == RETURN_KW) { - return comp_return(get_child(node, 0)); + return comp_return(get_child_(RETURN_KW, node, 0)); } else if (op == '(') { // six.call comp_fun_call(node, new_ast0(IDENTIFIER_EMPTY, 0)); // Reuse IDENTIFIER_EMPTY ast? return false; } else if (op == '{') { // six.compound return comp_body(node, stmt_ctx); } else if (op == '=') { // six.x=y - comp_assignment(get_child(node, 0), get_child(node, 1)); + comp_assignment(get_child_('=', node, 0), get_child_('=', node, 1)); return false; } else if (op == ':') { // Labelled statement are not very useful as gotos are not supported in the // Shell backend, but we still emit a label comment for readability. - append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(get_val(get_val(get_child(node, 0)))), wrap_char(':'))); - return comp_statement(get_child(node, 1), stmt_ctx); + append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, get_child_(':', node, 0)))), wrap_char(':'))); + return comp_statement(get_child_(':', node, 1), stmt_ctx); } else if (op == GOTO_KW) { fatal_error("goto statements not supported"); return false; @@ -2095,10 +2090,10 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { } void comp_glo_fun_decl(ast node) { - ast name = get_child(node, 0); - ast fun_type = get_child(node, 1); - ast params = get_child(node, 2); - ast body = get_child(node, 3); + ast name = get_child_(FUN_DECL, node, 0); + ast fun_type = get_child_(FUN_DECL, node, 1); + ast params = get_child_opt_(FUN_DECL, ',', node, 2); + ast body = get_child_opt_(FUN_DECL, '{', node, 3); text trailing_txt = 0; int params_ix = 2; // Start at 2 because $1 is assigned to the return location ast var; @@ -2129,9 +2124,9 @@ void comp_glo_fun_decl(ast node) { if (trailing_txt == 0) { // Show the mapping between the function parameters and $1, $2, etc. while (params != 0) { - var = get_child(params, 0); - trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(get_val(get_val(var))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); - params = get_child(params, 1); + var = get_child__(',', VAR_DECL, params, 0); + trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(probe_string(get_child_(VAR_DECL, var, 0))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); + params = get_child_(',', params, 1); params_ix += 1; } if (trailing_txt != 0) trailing_txt = string_concat(wrap_str_lit(" # "), trailing_txt); @@ -2151,18 +2146,15 @@ void comp_glo_fun_decl(ast node) { #ifndef SH_INITIALIZE_PARAMS_WITH_LET // Initialize parameters - params = get_child(node, 2); // Reload params because params is now = 0 + params = get_child_opt_(FUN_DECL, ',', node, 2); // Reload params because params is now = 0 params_ix = 2; while (params != 0) { - var = get_child(params, 0); + var = get_child__(',', VAR_DECL, params, 0); // TODO: Constant param optimization // Constant parameters don't need to be initialized - // if (!variable_is_constant_param(find_var_in_local_env(get_val(var)), local_env)) { - comp_assignment(new_ast0(IDENTIFIER, get_child(var, 0)), new_ast0(IDENTIFIER_DOLLAR, params_ix)); - // } - - params = get_child(params, 1); + comp_assignment(new_ast0(IDENTIFIER, get_child_(VAR_DECL, var, 0)), new_ast0(IDENTIFIER_DOLLAR, params_ix)); + params = get_child_opt_(',', ',', params, 1); params_ix += 1; } #endif @@ -2180,8 +2172,8 @@ void comp_glo_fun_decl(ast node) { // So we fixup the calls to save_vars and unsave_vars at the end. fixup_glo_decl(save_loc_vars_fixup, save_local_vars()); while (rest_loc_var_fixups != 0) { - fixup_glo_decl(get_child(rest_loc_var_fixups, 0), restore_local_vars(params_ix - 1)); - rest_loc_var_fixups = get_child(rest_loc_var_fixups, 1); + fixup_glo_decl(get_child_(',', rest_loc_var_fixups, 0), restore_local_vars(params_ix - 1)); + rest_loc_var_fixups = get_child_opt_(',', ',', rest_loc_var_fixups, 1); } nest_level -= 1; @@ -2204,9 +2196,11 @@ void comp_glo_var_decl(ast node) { // Arrays of structs and struct value types are not supported for now. // When we have type information on the local and global variables, we'll // be able to generate the correct code for these cases. - if ((get_op(type) == '[' && get_op(get_child(type, 1)) == STRUCT_KW && get_val(get_child(type, 1)) == 0) - || (get_op(type) == STRUCT_KW && get_val(type) == 0)) { - printf("%s ", string_pool + get_val(name)); + if ((get_op(type) == '[' + && get_op(get_child_('[', type, 1)) == STRUCT_KW + && get_stars(get_child_('[', type, 1)) == 0) + || (get_op(type) == STRUCT_KW && get_stars(type) == 0)) { + printf("%s ", string_pool + probe_string(name)); fatal_error("array of struct and struct value type are not supported in shell backend. Use a reference type instead."); } @@ -2217,7 +2211,7 @@ void comp_glo_var_decl(ast node) { wrap_str_lit("defarr "), env_var(new_ast0(IDENTIFIER, name)), wrap_char(' '), - wrap_int(get_val(get_child(type, 0))) + wrap_int(get_val_(INTEGER, get_child__('[', INTEGER, type, 0))) ) ); } else { @@ -2246,13 +2240,13 @@ void comp_assignment_constant(text constant_name, ast rhs) { // it easy to implement enums. void comp_enum_cases(ast ident, ast cases) { if (ident != 0) { - append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(get_val(get_val(ident))), wrap_str_lit(" enum declaration"))); + append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))), wrap_str_lit(" enum declaration"))); } else { append_glo_decl(wrap_str_lit("# Enum declaration")); } while (get_op(cases) == ',') { - comp_assignment_constant(env_var(get_child(cases, 0)), get_child(cases, 1)); - cases = get_child(cases, 2); + comp_assignment_constant(env_var(get_child__(',', IDENTIFIER, cases, 0)), get_child_(',', cases, 1)); + cases = get_child_opt_(',', ',', cases, 2); } } @@ -2287,22 +2281,22 @@ void comp_struct(ast ident, ast members) { int offset = new_ast0(INTEGER, 0); int field_type; if (ident != 0) { - append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(get_val(get_val(ident))), wrap_str_lit(" struct member declarations"))); + append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))), wrap_str_lit(" struct member declarations"))); } else { append_glo_decl(wrap_str_lit("# Struct member declarations")); } while (get_op(members) == ',') { - field_type = get_child(members, 1); - comp_assignment_constant(struct_member_var(get_child(members, 0)), offset); - members = get_child(members, 2); + field_type = get_child_(',', members, 1); + comp_assignment_constant(struct_member_var(get_child_opt_(',', IDENTIFIER, members, 0)), offset); + members = get_child_opt_(',', ',', members, 2); // Arrays and struct value types are not supported for now. // When we have type information on the local and global variables, we'll // be able to generate the correct code for these cases. - if (get_op(field_type) == '[' || (get_op(field_type) == STRUCT_KW && get_val(field_type) == 0)) { + if (get_op(field_type) == '[' || (get_op(field_type) == STRUCT_KW && get_stars(field_type) == 0)) { fatal_error("Nested structures not supported by shell backend. Use a reference type instead."); } else { - set_val(offset, get_val(offset) - 1); + set_val(offset, get_val_(INTEGER, offset) - 1); } } @@ -2315,9 +2309,9 @@ void comp_struct(ast ident, ast members) { void handle_enum_struct_union_type_decl(ast type) { if (get_op(type) == ENUM_KW) { - comp_enum_cases(get_child(type, 1), get_child(type, 2)); + comp_enum_cases(get_child_opt_(ENUM_KW, IDENTIFIER, type, 1), get_child_(ENUM_KW, type, 2)); } else if (get_op(type) == STRUCT_KW) { - comp_struct(get_child(type, 1), get_child(type, 2)); + comp_struct(get_child_opt_(STRUCT_KW, IDENTIFIER, type, 1), get_child_(STRUCT_KW, type, 2)); } else if (get_op(type) == UNION_KW) { fatal_error("handle_enum_struct_union_type_decl: union not supported"); } @@ -2334,25 +2328,23 @@ void handle_enum_struct_union_type_decl(ast type) { // - struct declarations void comp_glo_decl(ast node) { ast declarations; - ast variable; int op = get_op(node); fun_gensym_ix = 0; top_level_stmt = true; if (op == '=') { // Assignments - comp_assignment(get_child(node, 0), get_child(node, 1)); + comp_assignment(get_child_('=', node, 0), get_child_('=', node, 1)); } else if (op == VAR_DECLS) { // Variable declarations - declarations = get_child(node, 0); + declarations = get_child__(VAR_DECLS, ',', node, 0); while (declarations != 0) { // Multiple variable declarations - variable = get_child(declarations, 0); // Single variable declaration - comp_glo_var_decl(variable); // Compile variable declaration - declarations = get_child(declarations, 1); // Next variable declaration + comp_glo_var_decl(get_child__(',', VAR_DECL, declarations, 0)); + declarations = get_child_opt_(',', ',', declarations, 1); } } else if (op == FUN_DECL) { comp_glo_fun_decl(node); } else if (op == TYPEDEF_KW) { - handle_enum_struct_union_type_decl(get_child(node, 1)); + handle_enum_struct_union_type_decl(get_child_(TYPEDEF_KW, node, 1)); } else if (op == ENUM_KW || op == STRUCT_KW || op == UNION_KW) { handle_enum_struct_union_type_decl(node); } else { diff --git a/tests/_all/preprocessor/macro/builtin-stubbed.c b/tests/_all/preprocessor/macro/builtin-stubbed.c index eb12a3e5..752eb86f 100644 --- a/tests/_all/preprocessor/macro/builtin-stubbed.c +++ b/tests/_all/preprocessor/macro/builtin-stubbed.c @@ -1,4 +1,5 @@ // tests for __FILE__, __LINE__, __DATE__, __TIME__, __TIMESTAMP__ built-in macros +// comp_pnut_opt: -USAFE_MODE #include #ifndef __FILE__ diff --git a/tests/_all/preprocessor/macro/builtin.golden b/tests/_all/preprocessor/macro/builtin.golden index 0f1a8a20..2c42a473 100644 --- a/tests/_all/preprocessor/macro/builtin.golden +++ b/tests/_all/preprocessor/macro/builtin.golden @@ -1,5 +1,5 @@ tests/_all/preprocessor/macro/builtin-stubbed.c -41 +42 Jan 1 1970 00:00:00 Jan 1 1970 00:00:00 From a93c1f070c1ece58d7e89f988f7c43b06eb48b08 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau <16990250+laurenthuberdeau@users.noreply.github.com> Date: Mon, 20 Jan 2025 22:44:53 -0500 Subject: [PATCH 02/89] Call get_child_checked before cmping expected type (#137) Otherwise, invalid object accesses complain about the node type not being the expected type while the actual problem is that get_child received an out-of-bound index. --- pnut.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pnut.c b/pnut.c index c98055b8..bc554061 100644 --- a/pnut.c +++ b/pnut.c @@ -367,40 +367,43 @@ void set_child_checked(char* file, int line, ast node, int i, ast child) { // This function checks that the parent node has the expected operator before // returning the child node. ast get_child_go(char* file, int line, int expected_parent_node, ast node, int i) { + ast res = get_child_checked(file, line, node, i); if (get_op(node) != expected_parent_node) { printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node)); exit(1); } - return get_child_checked(file, line, node, i); + return res; } // This function checks that the parent node has the expected operator and that // the child node has the expected operator before returning the child node. ast get_child__go(char* file, int line, int expected_parent_node, int expected_node, ast node, int i) { + ast res = get_child_checked(file, line, node, i); if (get_op(node) != expected_parent_node) { printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node)); exit(1); } - if (get_op(heap[node+i+1]) != expected_node) { - printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(heap[node+i+1])); + if (get_op(res) != expected_node) { + printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(res)); exit(1); } - return get_child_checked(file, line, node, i); + return res; } // This function checks that the parent node has the expected operator and that // the child node has the expected operator (if child node is not 0) before // returning the child node. ast get_child_opt_go(char* file, int line, int expected_parent_node, int expected_node, ast node, int i) { + ast res = get_child_checked(file, line, node, i); if (get_op(node) != expected_parent_node) { printf("%s:%d: Expected node %d, got %d\n", file, line, expected_parent_node, get_op(node)); exit(1); } - if (heap[node+i+1] > 0 && get_op(heap[node+i+1]) != expected_node) { - printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(heap[node+i+1])); + if (res > 0 && get_op(res) != expected_node) { + printf("%s:%d: Expected child node %d, got %d\n", file, line, expected_node, get_op(res)); exit(1); } - return get_child_checked(file, line, node, i); + return res; } #define get_val(node) get_val_checked(__FILE__, __LINE__, node) From 7f8346f30138c34b2a5f63e310c5e9de7c403688 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau <16990250+laurenthuberdeau@users.noreply.github.com> Date: Wed, 22 Jan 2025 13:03:11 -0500 Subject: [PATCH 03/89] Allow constant expressions in enum values (#138) * Allow constant expressions in enum values * Add test --- pnut.c | 9 ++++----- tests/_all/enum-tests/enum-suite.c | 12 +++++++++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pnut.c b/pnut.c index bc554061..e0476b26 100644 --- a/pnut.c +++ b/pnut.c @@ -2412,11 +2412,10 @@ ast parse_enum() { if (tok == '=') { get_tok(); - - if (tok != INTEGER) parse_error("integer expected", tok); - value = new_ast0(INTEGER, val); - next_value = val - 1; // Next value is the current value + 1, but val is negative - get_tok(); // skip + value = parse_assignment_expression(); + if (value == 0) parse_error("Enum value must be a constant expression", tok); + value = new_ast0(INTEGER, -eval_constant(value, false)); + next_value = get_val_(INTEGER, value) - 1; // Next value is the current value + 1, but val is negative } else { value = new_ast0(INTEGER, next_value); next_value -= 1; diff --git a/tests/_all/enum-tests/enum-suite.c b/tests/_all/enum-tests/enum-suite.c index b3afdac7..5d3446de 100644 --- a/tests/_all/enum-tests/enum-suite.c +++ b/tests/_all/enum-tests/enum-suite.c @@ -31,10 +31,16 @@ enum Boolean { TRUE }; +#define FIRST_VALUE 0 +#define SECOND_VALUE 1000000 +#define THIRD_VALUE 2000000 +#define FORTH_VALUE FIRST_VALUE + SECOND_VALUE + THIRD_VALUE + enum LargeEnum { - FIRST = 0, - SECOND = 1000000, - THIRD = 2000000 + FIRST = FIRST_VALUE, + SECOND = SECOND_VALUE, + THIRD = THIRD_VALUE, + FORTH = FORTH_VALUE, }; int main(){ From 3eba51d4da39b776e34abefe3603f0f8663ce044 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 23 Jan 2025 12:21:24 -0500 Subject: [PATCH 04/89] Extend declaration parser to support most cases The previous parser only handled relatively basic types (int, pointer to int, arrays of ints) as well as function declarations. This new parser handles all type, storage-class specifiers and type qualifiers, and is recursive, meaning it is now possible to have declarations such as `void test(int (*)(int, int))`. This will allow us to add support for unsigned and floating pointer types, as well as function pointers. The only part of declarations that are left untouched by this commit is the initializer parser. This will be done in a follow-up PR. --- pnut.c | 656 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 373 insertions(+), 283 deletions(-) diff --git a/pnut.c b/pnut.c index e0476b26..aa6afe8b 100644 --- a/pnut.c +++ b/pnut.c @@ -153,8 +153,8 @@ enum { VOID_KW, VOLATILE_KW, WHILE_KW, - VAR_DECL, - VAR_DECLS, + DECL, + DECLS, FUN_DECL, CAST, @@ -2267,7 +2267,6 @@ void parse_error_internal(char * msg, int token, char * file, int line) { exit(1); } - void expect_tok(int expected_tok) { if (tok != expected_tok) { #ifdef NICE_ERR_MSG @@ -2288,96 +2287,23 @@ ast parse_compound_statement(); ast parse_conditional_expression(); ast parse_enum(); ast parse_struct_or_union(int struct_or_union_tok); +ast parse_declarator(bool abstract_decl, ast parent_type); +ast parse_declaration_specifiers(); -ast parse_type() { - - int type_kw = 0; - - while (1) { - if (tok == INT_KW || tok == SHORT_KW || tok == LONG_KW || tok == SIGNED_KW) { - if (type_kw != 0 && type_kw != INT_KW) parse_error("inconsistent type", tok); - type_kw = INT_KW; - get_tok(); - } else if (tok == CHAR_KW) { - if (type_kw != 0) parse_error("inconsistent type", tok); - type_kw = CHAR_KW; - get_tok(); - } else if ((tok == UNSIGNED_KW) || (tok == FLOAT_KW) || (tok == DOUBLE_KW)) { - parse_error("unsupported type", tok); - } else if (tok == VOID_KW) { - if (type_kw != 0) parse_error("inconsistent type", tok); - type_kw = VOID_KW; - get_tok(); - } else if (tok == CONST_KW) { - get_tok(); // ignore const - } else if (tok == ENUM_KW) { - if (type_kw != 0) parse_error("inconsistent type", tok); - return parse_enum(); - } else if (tok == STRUCT_KW || tok == UNION_KW) { - if (type_kw != 0) parse_error("inconsistent type", tok); - return parse_struct_or_union(tok); - } else if (tok == TYPE) { - // Look in types table. It's a type, not a type_kw, but we reuse the variable - type_kw = heap[val + 3]; // For TYPE tokens, the tag is the type - get_tok(); - return type_kw; - } else { - break; - } - } - - if (type_kw == 0) { - parse_error("type expected", tok); - } - - return new_ast0(type_kw, 0); -} - -int parse_stars() { - - int stars = 0; - - while (tok == '*') { - stars += 1; - get_tok(); - } - - return stars; -} - -int parse_stars_for_type(int type) { - int stars = parse_stars(); - - // We don't want to mutate types that are typedef'ed, so making a copy of the type obj - if (stars != 0) { - type = clone_ast(type); - set_child(type, 0, stars); - } - - return type; -} - -//defining a const after the * is valid c, ie -// const int * const foo; -void ignore_optional_const() { - if(tok == CONST_KW) { - //skip the const - get_tok(); - } -} - -int parse_type_with_stars() { - int type = parse_stars_for_type(parse_type()); - ignore_optional_const(); - return type; -} +// The storage class specifier and type qualifier tokens are all between 300 (AUTO_KW) and 326 (VOLATILE_KW) so we store them as bits in an int. +#define MK_TYPE_SPECIFIER(tok) (1 << (tok - AUTO_KW)) int is_type_starter(int tok) { - return tok == INT_KW || tok == CHAR_KW || tok == SHORT_KW || tok == LONG_KW || tok == SIGNED_KW // Supported types - || tok == UNSIGNED_KW || tok == FLOAT_KW || tok == DOUBLE_KW || tok == VOID_KW // Unsupported types - || tok == TYPE // User defined types - || tok == CONST_KW // Type attributes - || tok == ENUM_KW || tok == STRUCT_KW || tok == UNION_KW; // Enum, struct, union + return tok == INT_KW || tok == CHAR_KW || tok == SHORT_KW || tok == LONG_KW // Numeric types + || tok == VOID_KW + || tok == FLOAT_KW || tok == DOUBLE_KW // Floating point types + || tok == SIGNED_KW || tok == UNSIGNED_KW // Signedness + || tok == TYPE // User defined types + || tok == CONST_KW // Type attributes + || tok == ENUM_KW || tok == STRUCT_KW || tok == UNION_KW // Enum, struct, union + // Typedef is not a valid type starter in all contexts + // || tok == TYPEDEF_KW // Typedef + ; } ast parse_enum() { @@ -2440,13 +2366,12 @@ ast parse_enum() { } - return new_ast3(ENUM_KW, 0, name, result); // 0 is number of stars + return new_ast3(ENUM_KW, 0, name, result); // child#0 is the storage-class specifiers and type qualifiers } ast parse_struct_or_union(int struct_or_union_tok) { ast name; - ast ident; - ast type; + ast type_specifier, decl; ast result = 0; ast tail; bool ends_in_flex_array = false; @@ -2469,251 +2394,416 @@ ast parse_struct_or_union(int struct_or_union_tok) { while (tok != '}') { if (!is_type_starter(tok)) parse_error("type expected in struct declaration", tok); if (ends_in_flex_array) parse_error("flexible array member must be last", tok); + type_specifier = parse_declaration_specifiers(); - type = parse_type_with_stars(); - - if (get_op(type) == VOID_KW && get_stars(type) == 0) - parse_error("variable with void type", tok); - - ident = 0; // Anonymous struct - if (tok == IDENTIFIER) { - ident = new_ast0(IDENTIFIER, val); - get_tok(); + // If the decl has no name, it's an anonymous struct/union member + // and there can only be 1 declarator so not looping. + if (tok == ';') { + if (get_op(type_specifier) != ENUM_KW && get_op(type_specifier) != STRUCT_KW && get_op(type_specifier) != UNION_KW) { + parse_error("Anonymous struct/union member must be a struct or union type", tok); + } + decl = new_ast3(DECL, 0, type_specifier, 0); - if (tok == '[') { // Array - get_tok(); - if (tok == ']') { - if (struct_or_union_tok != STRUCT_KW) parse_error("flexible array member must be in a struct", tok); - ends_in_flex_array = true; - val = 0; // Flex array are arrays with no size, using 0 for now - type = new_ast2('[', new_ast0(INTEGER, 0), type); - get_tok(); - } else if (tok == INTEGER) { - type = new_ast2('[', new_ast0(INTEGER, -val), type); - get_tok(); - expect_tok(']'); + if (result == 0) { + tail = result = new_ast2(',', decl, 0); + } else { + set_child(tail, 1, new_ast2(',', decl, 0)); + tail = get_child_(',', tail, 1); + } + } else { + while (1) { + decl = parse_declarator(false, type_specifier); + if (result == 0) { + tail = result = new_ast2(',', decl, 0); } else { - parse_error("array size must be an integer constant", tok); + set_child(tail, 1, new_ast2(',', decl, 0)); + tail = get_child_(',', tail, 1); } + if (get_child_(DECL, decl, 1) == VOID_KW) parse_error("member with void type not allowed in struct/union", tok); + if (get_child_(DECL, decl, 1) == '[' && get_child_('[', get_child_(DECL, decl, 1), 1) == 0) { + // Set ends_in_flex_array if the type is an array with no size + ends_in_flex_array = true; + break; + } + if (tok == ',') get_tok(); + else break; } - } else if (get_op(type) != STRUCT_KW && get_op(type) != UNION_KW) { - parse_error("Anonymous struct/union member must have be a struct or union type", tok); } expect_tok(';'); - - if (result == 0) { - result = new_ast3(',', ident, type, 0); - tail = result; - } else { - set_child(tail, 2, new_ast3(',', ident, type, 0)); - tail = get_child_(',', tail, 2); - } } expect_tok('}'); - } - return new_ast3(struct_or_union_tok, 0, name, result); // 0 is number of stars + return new_ast3(struct_or_union_tok, 0, name, result); // child#0 is the storage-class specifiers and type qualifiers } -ast parse_param_decl() { +ast parse_type_specifier() { + ast type_specifier = 0; + switch (tok) { + case CHAR_KW: + case INT_KW: + case VOID_KW: +#ifdef DEBUG_PARSER + case FLOAT_KW: + case DOUBLE_KW: +#endif + type_specifier = new_ast0(tok, 0); + get_tok(); + return type_specifier; - ast type; - int name; - ast result = 0; + case SHORT_KW: + get_tok(); + if (tok == INT_KW) get_tok(); // Just "short" is equivalent to "short int" + return new_ast0(SHORT_KW, 0); - if (is_type_starter(tok)) { - type = parse_type_with_stars(); - name = val; - expect_tok(IDENTIFIER); - if (get_op(type) == VOID_KW && get_stars(type) == 0) parse_error("variable with void type", tok); - result = new_ast3(VAR_DECL, name, type, 0); - } else if (tok == IDENTIFIER) { - // Support K&R param syntax in function definition - name = val; - expect_tok(IDENTIFIER); - type = new_ast0(INT_KW, 0); - result = new_ast3(VAR_DECL, name, type, 0); - } else if (tok == ELLIPSIS) { - // ignore ELLIPSIS nodes for now - get_tok(); + case SIGNED_KW: + get_tok(); + type_specifier = parse_type_specifier(); + // Just "signed" is equivalent to "signed int" + if (type_specifier == 0) type_specifier = new_ast0(INT_KW, 0); + return type_specifier; + +#ifdef DEBUG_PARSER + case UNSIGNED_KW: + get_tok(); + type_specifier = parse_type_specifier(); + // Just "unsigned" is equivalent to "unsigned int" + if (type_specifier == 0) type_specifier = new_ast0(INT_KW, MK_TYPE_SPECIFIER(UNSIGNED_KW)); + return type_specifier; +#endif + + case LONG_KW: + get_tok(); +#ifdef DEBUG_PARSER + if (tok == DOUBLE_KW) { + get_tok(); + return new_ast0(DOUBLE_KW, 0); + } else +#endif + { + if (tok == LONG_KW) { + get_tok(); + if (tok == INT_KW) get_tok(); // Just "long long" is equivalent to "long long int" + // FIXME: For now, "long long int" is the same as "long int" which is ok + // if the code generators assign at least 64 bits to long int + } else if (tok == INT_KW) { + get_tok(); // Just "long" is equivalent to "long int" + } + return new_ast0(LONG_KW, 0); + } + + default: + return 0; } +} - return result; +// A declaration is split in 2 parts: +// 1. specifiers and qualifiers +// 2. declarators and initializers +// This function parses the first part +ast parse_declaration_specifiers() { + ast type_specifier = 0; + int type_storage_class = 0; + int type_qualifier = 0; + bool loop = true; + + while (loop) { + switch (tok) { + case AUTO_KW: + case REGISTER_KW: + case STATIC_KW: + case EXTERN_KW: + case TYPEDEF_KW: + type_storage_class |= MK_TYPE_SPECIFIER(tok); + get_tok(); + break; + + case CONST_KW: + case VOLATILE_KW: + type_qualifier |= MK_TYPE_SPECIFIER(tok); + get_tok(); + break; + + case CHAR_KW: + case INT_KW: + case VOID_KW: + case SHORT_KW: + case SIGNED_KW: + case UNSIGNED_KW: + case LONG_KW: + case FLOAT_KW: + case DOUBLE_KW: + if (type_specifier != 0) parse_error("Unexpected C type specifier", tok); + type_specifier = parse_type_specifier(); + if (type_specifier == 0) parse_error("Failed to parse type specifier", tok); + break; + + case STRUCT_KW: + case UNION_KW: + if (type_specifier != 0) parse_error("Multiple types not supported", tok); + type_specifier = parse_struct_or_union(tok); + break; + + case ENUM_KW: + if (type_specifier != 0) parse_error("Multiple types not supported", tok); + type_specifier = parse_enum(); + break; + + case TYPE: + // Look in types table. It's a type, not a type_kw, but we reuse the variable + type_specifier = heap[val + 3]; // For TYPE tokens, the tag is the type + type_specifier = clone_ast(type_specifier); // Clone the type so it can be modified + get_tok(); + break; + + default: + loop = false; // Break out of loop + break; + } + } + + // Note: Remove to support K&R C syntax + if (type_specifier == 0) parse_error("Type expected", tok); + + set_child(type_specifier, 0, type_storage_class | type_qualifier); // Set the storage class and type qualifier + + return type_specifier; } int parse_param_list() { - ast decl = parse_param_decl(); ast result = 0; ast tail; - if (decl != 0) { - result = new_ast2(',', decl, 0); - tail = result; + ast decl; + + expect_tok('('); - while (tok == ',') { + while (tok != ')' && tok != EOF) { + if (is_type_starter(tok)) { + decl = parse_declarator(true, parse_declaration_specifiers()); + if (get_op(decl) == VOID_KW) { + if (tok != ')' || result != 0) parse_error("void must be the only parameter", tok); + break; + } + } else if (tok == IDENTIFIER) { + // Support K&R param syntax in function definition + decl = new_ast3(DECL, new_ast0(IDENTIFIER, val), new_ast0(INT_KW, 0), 0); + get_tok(); + } else if (tok == ELLIPSIS) { + // ignore ELLIPSIS nodes for now, but it should be the last parameter get_tok(); - decl = parse_param_decl(); - if (decl == 0) { break; } + break; + } else { + parse_error("Parameter declaration expected", tok); + } + + if (tok == ',') get_tok(); - decl = new_ast2(',', decl, 0); - set_child(tail, 1, decl); - tail = decl; + if (result == 0) { + tail = result = new_ast2(',', decl, 0); + } else { + set_child(tail, 1, new_ast2(',', decl, 0)); + tail = get_child_(',', tail, 1); } } + expect_tok(')'); + return result; } -// Note: Uses a simplified syntax for definitions -ast parse_definition(int local) { - - ast type; - ast init; - int name; - ast params; - ast body; - ast this_type; +// abstract_decl: true if the declarator may omit the identifier +ast parse_declarator(bool abstract_decl, ast parent_type) { + bool first_tok = tok; // Indicates if the declarator is a noptr-declarator ast result = 0; - ast tail = 0; - ast current_declaration; - - //static can be skipped for global definitions without affecting semantics - if(!local && tok == STATIC_KW) { - get_tok(); - } + ast decl; + ast arr_size_expr; - if (is_type_starter(tok)) { - type = parse_type(); + switch (tok) { + case IDENTIFIER: + result = new_ast3(DECL, new_ast0(IDENTIFIER, val), parent_type, 0); // child#2 is the initializer + get_tok(); + break; - // global enum/struct/union declaration - if (tok == ';') { - if (get_op(type) != ENUM_KW && get_op(type) != STRUCT_KW && get_op(type) != UNION_KW) { - parse_error("enum/struct/union declaration expected", tok); - } + case '*': get_tok(); - return type; - } + // Pointers may be const-qualified + if (tok == CONST_KW) { + get_tok(); + result = new_ast2('*', MK_TYPE_SPECIFIER(CONST_KW), parent_type); + } else { + result = new_ast2('*', 0, parent_type); + } - while (1) { + result = parse_declarator(abstract_decl, result); + break; - this_type = parse_stars_for_type(type); - ignore_optional_const(); + // Parenthesis delimit the specifier-and-qualifier part of the declaration from the declarator + case '(': + get_tok(); + result = parse_declarator(abstract_decl, parent_type); + expect_tok(')'); + break; + } - name = val; + if (result == 0) { + // Abstract declarators don't need names, meaning the previous switch may + // not have set result. In that case, we create a DECL node with no + // identifier. + if (abstract_decl) { + return new_ast3(DECL, 0, parent_type, 0); // child#0 is the identifier, child#2 is the initializer + } else { + parse_error("Invalid declarator, expected an identifier but declarator doesn't have one", tok); + } + } - expect_tok(IDENTIFIER); + // At this point, the only non-recursive declarator is an identifier + // so we know that get_op(result) == DECL. + // Because we want the DECL to stay as the outermost node, we temporarily + // unwrap the DECL parent_type. + decl = result; + result = get_child_(DECL, result, 1); // child#1 is the type - if (tok == '(') { + while (first_tok != '*') { + // noptr-declarator may be followed by [ constant-expression ] to declare an + // array or by ( parameter-type-list ) to declare a function. We loop since + // both may be present. + if (tok == '[') { + // Check if not a void array + if (get_op(result) == VOID_KW) parse_error("void array not allowed", tok); + get_tok(); + if (tok == ']') { + val = 0; + } else { + arr_size_expr = parse_assignment_expression(); + if (arr_size_expr == 0) parse_error("Array size must be an integer constant", tok); + val = eval_constant(arr_size_expr, false); + } + result = new_ast2('[', result, val); // 0 is used to represent an unsized array + expect_tok(']'); + } else if (tok == '(') { + result = new_ast2('(', result, parse_param_list()); + } else { + break; + } + } - if (local) { - parse_error("function declaration only allowed at global level", tok); - } + // And now we wrap the DECL back around the result. + // Maybe we could try reusing the DECL node? + set_child(decl, 1, result); // child#1 is the type + return decl; +} - get_tok(); +ast parse_declarator_and_initializer(ast type_specifier) { + ast declarator = parse_declarator(false, type_specifier); - params = parse_param_list(); + if ((get_child(type_specifier, 0) & MK_TYPE_SPECIFIER(TYPEDEF_KW)) == 0) { + if (tok == '=') { + get_tok(); + // parse_declarator returns a DECL node where the initializer is child#2 + set_child(declarator, 2, parse_conditional_expression()); + } + } - expect_tok(')'); + return declarator; +} - if (tok == ';') { - // forward declaration. Body == -1 - body = -1; - get_tok(); - } else { - body = parse_compound_statement(); - } +void add_typedef(ast declarator) { + int decl_ident = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, declarator, 0)); + ast decl_type = get_child_(DECL, declarator, 1); // child#1 is the type - return new_ast4(FUN_DECL, name, this_type, params, body); +#ifdef sh + // If the struct/union/enum doesn't have a name, we give it the name of the typedef. + // This is not correct, but it's a limitation of the current shell backend where we + // need the name of a struct/union/enum to compile sizeof and typedef'ed structures + // don't always have a name. + if (get_op(decl_type) == STRUCT_KW || get_op(decl_type) == UNION_KW || get_op(decl_type) == ENUM_KW) { + if (get_child(decl_type, 1) != 0 && get_val_(IDENTIFIER, get_child(decl_type, 1)) != decl_ident) { + syntax_error("typedef name must match struct/union/enum name"); + } + set_child(decl_type, 1, new_ast0(IDENTIFIER, decl_ident)); + } +#endif - } else { + heap[decl_ident + 2] = TYPE; + heap[decl_ident + 3] = decl_type; +} - if (get_op(this_type) == VOID_KW && get_stars(this_type) == 0) { - parse_error("variable with void type", tok); - } +ast parse_fun_def(ast declarator) { + ast params = get_child_(DECL, declarator, 1); - if (tok == '[') { - // if (local) { - // syntax_error("array declaration only allowed at global level"); - // } - get_tok(); - if (tok == INTEGER) { - this_type = new_ast2('[', new_ast0(INTEGER, -val), this_type); - get_tok(); - } else { - parse_error("array size must be an integer constant", tok); - } + // Check that the parameters are all named since declarator may be abstract + while (get_op(params) == ',') { + if (get_child_(DECL, get_child__(',', DECL, params, 0), 0) == 0) { + parse_error("Parameter name expected", tok); + } + params = get_child_(',', params, 1); + } + if (get_child_(DECL, declarator, 2) != 0) parse_error("Initializer not allowed in function definition", tok); + return new_ast2(FUN_DECL, declarator, parse_compound_statement()); +} - expect_tok(']'); - } +ast parse_declaration(bool local) { + ast declarator; + ast declarators; + ast tail; + // First we parse the specifiers: + ast type_specifier = parse_declaration_specifiers(); + ast result; + + // From cppreference: + // > The enum, struct, and union declarations may omit declarators, in which + // > case they only introduce the enumeration constants and/or tags. + if (tok == ';') { + if (get_op(type_specifier) != ENUM_KW && get_op(type_specifier) != STRUCT_KW && get_op(type_specifier) != UNION_KW) { + parse_error("enum/struct/union declaration expected", tok); + } + get_tok(); // Skip the ; + return type_specifier; + } - init = 0; + // Then we parse the declarators and initializers + declarator = parse_declarator_and_initializer(type_specifier); - if (tok == '=') { - get_tok(); - init = parse_conditional_expression(); - } - current_declaration = new_ast3(VAR_DECL, name, this_type, init); // Create a new declaration + // The declarator may be a function definition, in which case we parse the function body + if (get_op(get_child_(DECL, declarator, 1)) == '(' && tok == '{') { + if (local) parse_error("Function definition not allowed in local scope", tok); + return parse_fun_def(declarator); + } - if(result == 0) { // First declaration - result = new_ast2(',', current_declaration, 0); - tail = result; // Keep track of the last declaration - } else { - set_child(tail, 1, new_ast2(',', current_declaration, 0)); // Link the new declaration to the last one - tail = get_child_(',', tail, 1); // Update the last declaration - } + declarators = new_ast2(',', declarator, 0); // Wrap the declarators in a list + tail = declarators; - if (tok == ';') { - get_tok(); - break; - } else if (tok == ',') { - get_tok(); - continue; // Continue to the next declaration - } else { - parse_error("';' or ',' expected", tok); - } - } + // Otherwise, this is a variable or declaration + while (tok != ';') { + if (tok == ',') { + get_tok(); + set_child(tail, 1, new_ast2(',', parse_declarator_and_initializer(type_specifier), 0)); + tail = get_child__(',', ',', tail, 1); + } else { + parse_error("';' or ',' expected", tok); } - return new_ast1(VAR_DECLS, result); - } else if (tok == TYPEDEF_KW) { - // When parsing a typedef, the type is added to the types table. - // This is so the parser can determine if an identifier is a type or not. - // This implementation is not completely correct, as an identifier that was - // typedef'ed can also be used as a variable name, but TCC doesn't do that so - // it should be fine for now. - // - // When we want to implement typedef correctly, we'll want to tag - // identifiers as typedef'ed and have the typedef be scoped to the block - // it was defined in (global or in function). - get_tok(); - type = parse_type_with_stars(); - if (tok != IDENTIFIER) { parse_error("identifier expected", tok); } + } -#ifdef sh - // If the struct/union/enum doesn't have a name, we give it the name of the typedef. - // This is not correct, but it's a limitation of the current shell backend where we - // need the name of a struct/union/enum to compile sizeof and typedef'ed structures - // don't always have a name. - if (get_op(type) == STRUCT_KW || get_op(type) == UNION_KW || get_op(type) == ENUM_KW) { - if (get_child(type, 1) != 0 && get_val_(IDENTIFIER, get_child(type, 1)) != val) { - syntax_error("typedef name must match struct/union/enum name"); - } - set_child(type, 1, new_ast0(IDENTIFIER, val)); + // The type_specifier may be a typedef, in that case, it's not a variable or + // function declaration, and we instead want to add the typedef'ed type to the + // type table. + if (get_child(type_specifier, 0) & MK_TYPE_SPECIFIER(TYPEDEF_KW)) { + type_specifier = declarators; // Save declarators in type_specifier + while (get_op(declarators) == ',') { + add_typedef(get_child__(',', DECL, declarators, 0)); + declarators = get_child_opt_(',', ',', declarators, 1); } -#endif - - heap[val + 2] = TYPE; - heap[val + 3] = type; - result = new_ast2(TYPEDEF_KW, val, type); - get_tok(); - expect_tok(';'); - return result; + result = new_ast1(TYPEDEF_KW, type_specifier); } else { - parse_error("unknown decl: type expected", tok); - return result; + result = new_ast1(DECLS, declarators); } + + expect_tok(';'); + + return result; } ast parse_parenthesized_expression() { @@ -2876,7 +2966,7 @@ ast parse_unary_expression() { get_tok(); if (tok == '(') { get_tok(); - result = parse_type_with_stars(); + result = parse_declarator(true, parse_declaration_specifiers()); expect_tok(')'); } else { result = parse_unary_expression(); @@ -2928,7 +3018,7 @@ ast parse_cast_expression() { get_tok(); if (is_type_starter(tok)) { - type = parse_type_with_stars(); + type = parse_declarator(true, parse_declaration_specifiers()); expect_tok(')'); result = new_ast2(CAST, type, parse_cast_expression()); @@ -3337,7 +3427,7 @@ ast parse_compound_statement() { // TODO: Simplify this if (tok != '}' && tok != EOF) { if (is_type_starter(tok)) { - child1 = parse_definition(1); + child1 = parse_declaration(true); } else { child1 = parse_statement(); } @@ -3345,7 +3435,7 @@ ast parse_compound_statement() { tail = result; while (tok != '}' && tok != EOF) { if (is_type_starter(tok)) { - child1 = parse_definition(1); + child1 = parse_declaration(true); } else { child1 = parse_statement(); } @@ -3449,15 +3539,15 @@ int main(int argc, char **argv) { #elif defined DEBUG_PARSER // Parse input, output nothing get_tok(); while (tok != EOF) { - decl = parse_definition(0); + decl = parse_declaration(false); } #else codegen_begin(); get_tok(); while (tok != EOF) { - decl = parse_definition(0); + decl = parse_declaration(false); #ifdef SH_INCLUDE_C_CODE - output_declaration_c_code(get_op(decl) == '=' | get_op(decl) == VAR_DECLS); + output_declaration_c_code(get_op(decl) == '=' | get_op(decl) == DECLS); #endif codegen_glo_decl(decl); } From 2a3bc63b2266c4148738a042191c94878bb288e1 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 23 Jan 2025 13:15:38 -0500 Subject: [PATCH 05/89] Support new declaration AST in pnut-sh This change shouldn't affect the shell code generated. --- sh.c | 175 ++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 107 insertions(+), 68 deletions(-) diff --git a/sh.c b/sh.c index b9531afd..30a1a1ba 100644 --- a/sh.c +++ b/sh.c @@ -508,7 +508,7 @@ ast fresh_string_ident(int string_probe) { } void add_var_to_local_env(ast decl, enum BINDING kind) { - int ident_probe = get_child_(VAR_DECL, decl, 0); + int ident_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); // Make sure we're not shadowing an existing local variable if (cgc_lookup_var(ident_probe, cgc_locals)) { @@ -517,12 +517,12 @@ void add_var_to_local_env(ast decl, enum BINDING kind) { } // The var is not part of the environment, so we add it. - cgc_add_local_var(kind, ident_probe, get_child_(VAR_DECL, decl, 1)); + cgc_add_local_var(kind, ident_probe, get_child_(DECL, decl, 1)); } void add_fun_params_to_local_env(ast lst) { while (lst != 0) { - add_var_to_local_env(get_child__(',', VAR_DECL, lst, 0), BINDING_PARAM_LOCAL); + add_var_to_local_env(get_child__(',', DECL, lst, 0), BINDING_PARAM_LOCAL); lst = get_child_opt_(',', ',', lst, 1); } } @@ -533,9 +533,9 @@ void add_fun_params_to_local_env(ast lst) { // // Also, the shell backend doesn't support variables with aggregate types. void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for assert_idents_are_safe - ast ident_probe = get_child_(VAR_DECL, variable, 0); + ast ident_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, variable, 0)); char* name = string_pool + probe_string(ident_probe); - ast type = get_child_(VAR_DECL, variable, 1); + ast type = get_child_(DECL, variable, 1); if (name[0] == '_' || (name[0] != '\0' && name[1] == '_' && name[2] == '\0')) { // Check for a_ variables that could conflict with character constants printf("%s ", name); @@ -550,17 +550,29 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for fatal_error("variable name is invalid. It can't be 'IFS' or 'argv_'."); } - // Local variables don't correspond to memory locations, and can't store - // more than 1 number/pointer. - if (local && (get_op(type) == '[' || (get_op(type) == STRUCT_KW && get_stars(type) == 0))) { - printf("%s ", name); - fatal_error("array/struct value type is not supported for shell backend. Use a reference type instead."); + if (local) { + // Local variables don't correspond to memory locations, and can't store + // more than 1 number/pointer. + if (get_op(type) == '[' || get_op(type) == STRUCT_KW) { + printf("%s ", name); + fatal_error("array/struct value type is not supported for shell backend. Use a reference type instead."); + } + } else { + // Arrays of structs and struct value types are not supported for now. + // When we have type information on the local and global variables, we'll + // be able to generate the correct code for these cases. + if ( (get_op(type) == '[' && get_op(get_child_('[', type, 0)) == STRUCT_KW) // Array of structs + || (get_op(type) == '[' && get_op(get_child_('[', type, 0)) == '[') // Array of arrays + || get_op(type) == STRUCT_KW) { // Struct value type + printf("%s ", name); + fatal_error("array of struct and struct value type are not supported in shell backend. Use a reference type instead."); + } } } void check_param_decls(ast lst) { while (lst != 0) { - assert_var_decl_is_safe(get_child__(',', VAR_DECL, lst, 0), true); + assert_var_decl_is_safe(get_child__(',', DECL, lst, 0), true); lst = get_child_(',', lst, 1); } } @@ -653,7 +665,7 @@ text let_params(int params) { while (params != 0) { // TODO: Constant param optimization - ident = new_ast0(IDENTIFIER, get_child_(VAR_DECL, get_child__(',', VAR_DECL, params, 0), 0)); + ident = get_child__(DECL, IDENTIFIER, get_child__(',', DECL, params, 0), 0); res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); params = get_child_opt_(',', ',', params, 1); params_ix += 1; @@ -1179,16 +1191,27 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) sub1 = comp_lvalue(child0); return wrap_if_needed(false, context, test_side_effects, string_concat4(wrap_char('('), sub1, wrap_str_lit(" += 1)"), wrap_str_lit(" - 1")), outer_op, '-'); } else if (op == SIZEOF_KW) { - if (get_op(child0) == INT_KW - || get_op(child0) == CHAR_KW - || get_op(child0) == VOID_KW - || get_op(child0) == ENUM_KW - || (( get_op(child0) == STRUCT_KW || get_op(child0) == UNION_KW) - && get_child(child0, 0) >= 1)) { // If it's a pointer - return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(1)); - } else if (get_op(child0) == STRUCT_KW) { - return wrap_if_needed(false, context, test_side_effects, struct_sizeof_var(get_child__(STRUCT_KW, IDENTIFIER, child0, 1)), outer_op, op); + // child0 is either an abstract declaration or an expression + if (get_op(child0) == DECL) { + child0 = get_child_(DECL, child0, 1); // Get the type + if ( get_op(child0) == INT_KW + || get_op(child0) == SHORT_KW + || get_op(child0) == LONG_KW + || get_op(child0) == CHAR_KW + || get_op(child0) == VOID_KW + || get_op(child0) == ENUM_KW + || get_op(child0) == '*') { // If it's a pointer + return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(1)); + } else if (get_op(child0) == STRUCT_KW) { + return wrap_if_needed(false, context, test_side_effects, struct_sizeof_var(get_child__(STRUCT_KW, IDENTIFIER, child0, 1)), outer_op, op); + } else { + printf("op=%d %c", get_op(child0), get_op(child0)); + printf("op=%d %c", get_op(get_child(child0, 1)), get_op(get_child(child0, 1))); + fatal_error("comp_rvalue_go: sizeof is not supported for this type or expression"); + return 0; + } } else { + printf("op=%d %c", get_op(child0), get_op(child0)); fatal_error("comp_rvalue_go: sizeof is not supported for this type or expression"); return 0; } @@ -1991,17 +2014,17 @@ bool comp_return(ast return_value) { void comp_var_decls(ast node) { ast var_decl; - node = get_child_opt_(VAR_DECLS, ',', node, 0); + node = get_child_opt_(DECLS, ',', node, 0); while (node != 0) { // Add to local env and cummulative env, then initialize - var_decl = get_child__(',', VAR_DECL, node, 0); + var_decl = get_child__(',', DECL, node, 0); add_var_to_local_env(var_decl, BINDING_VAR_LOCAL); - if (get_child_(VAR_DECL, var_decl, 2) != 0) { // Initializer - comp_assignment(new_ast0(IDENTIFIER, get_child_(VAR_DECL, var_decl, 0)), get_child_(VAR_DECL, var_decl, 2)); + if (get_child_(DECL, var_decl, 2) != 0) { // Initializer + comp_assignment(get_child__(DECL, IDENTIFIER, var_decl, 0), get_child_(DECL, var_decl, 2)); } #ifdef INITIALIZE_LOCAL_VARS_WITH_ZERO else { - comp_assignment(new_ast0(IDENTIFIER, get_child_(VAR_DECL, var, 0)), new_ast0(INTEGER, 0)); + comp_assignment(new_ast0(IDENTIFIER, get_child__(DECL, IDENTIFIER, var, 0)), new_ast0(INTEGER, 0)); } #endif node = get_child_opt_(',', ',', node, 1); // Next variable @@ -2077,7 +2100,7 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { } else if (get_op(node) == CASE_KW || get_op(node) == DEFAULT_KW) { fatal_error("case/default must be at the beginning of a switch conditional block"); return false; - } else if (op == VAR_DECLS) { + } else if (op == DECLS) { comp_var_decls(node); return false; } else { @@ -2090,10 +2113,11 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { } void comp_glo_fun_decl(ast node) { - ast name = get_child_(FUN_DECL, node, 0); - ast fun_type = get_child_(FUN_DECL, node, 1); - ast params = get_child_opt_(FUN_DECL, ',', node, 2); - ast body = get_child_opt_(FUN_DECL, '{', node, 3); + ast decl = get_child__(FUN_DECL, DECL, node, 0); + ast body = get_child_opt_(FUN_DECL, '{', node, 1); + ast name_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); + ast fun_type = get_child__(DECL, '(', decl, 1); + ast params = get_child_opt_('(', ',', fun_type, 1); text trailing_txt = 0; int params_ix = 2; // Start at 2 because $1 is assigned to the return location ast var; @@ -2108,12 +2132,12 @@ void comp_glo_fun_decl(ast node) { add_fun_params_to_local_env(params); // If the function is main - if (name == MAIN_ID) { + if (name_probe == MAIN_ID) { main_defined = true; // If main has parameters. If so, we'll prepare the argc/argv values in the prologue. if (params != 0) runtime_use_make_argv = true; // Check if main returns an exit code. - if (get_op(fun_type) != VOID_KW) main_returns = true; + if (get_op(get_child_('(', fun_type, 0)) != VOID_KW) main_returns = true; // TODO: test this } #ifdef SH_INITIALIZE_PARAMS_WITH_LET @@ -2124,8 +2148,8 @@ void comp_glo_fun_decl(ast node) { if (trailing_txt == 0) { // Show the mapping between the function parameters and $1, $2, etc. while (params != 0) { - var = get_child__(',', VAR_DECL, params, 0); - trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(probe_string(get_child_(VAR_DECL, var, 0))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); + var = get_child__(',', DECL, params, 0); + trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(probe_string(get_val_(IDENTIFIER, get_child_(DECL, var, 0)))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); params = get_child_(',', params, 1); params_ix += 1; } @@ -2133,7 +2157,7 @@ void comp_glo_fun_decl(ast node) { } append_glo_decl(string_concat3( - function_name(name), + function_name(name_probe), wrap_str_lit("() {"), trailing_txt )); @@ -2146,14 +2170,14 @@ void comp_glo_fun_decl(ast node) { #ifndef SH_INITIALIZE_PARAMS_WITH_LET // Initialize parameters - params = get_child_opt_(FUN_DECL, ',', node, 2); // Reload params because params is now = 0 + params = get_child_opt_('(', ',', fun_type, 1); // Reload params because params is now = 0 params_ix = 2; while (params != 0) { - var = get_child__(',', VAR_DECL, params, 0); + var = get_child__(',', DECL, params, 0); // TODO: Constant param optimization // Constant parameters don't need to be initialized - comp_assignment(new_ast0(IDENTIFIER, get_child_(VAR_DECL, var, 0)), new_ast0(IDENTIFIER_DOLLAR, params_ix)); + comp_assignment(get_child_(DECL, var, 0), new_ast0(IDENTIFIER_DOLLAR, params_ix)); params = get_child_opt_(',', ',', params, 1); params_ix += 1; } @@ -2182,9 +2206,11 @@ void comp_glo_fun_decl(ast node) { } void comp_glo_var_decl(ast node) { - ast name = get_child(node, 0); - ast type = get_child(node, 1); - ast init = get_child(node, 2); + ast name = get_child__(DECL, IDENTIFIER, node, 0); + ast type = get_child_(DECL, node, 1); + ast init = get_child_(DECL, node, 2); + + if (get_op(type) == '(') return; // Ignore function declarations if (init == 0) init = new_ast0(INTEGER, 0); @@ -2193,25 +2219,14 @@ void comp_glo_var_decl(ast node) { assert_var_decl_is_safe(node, false); - // Arrays of structs and struct value types are not supported for now. - // When we have type information on the local and global variables, we'll - // be able to generate the correct code for these cases. - if ((get_op(type) == '[' - && get_op(get_child_('[', type, 1)) == STRUCT_KW - && get_stars(get_child_('[', type, 1)) == 0) - || (get_op(type) == STRUCT_KW && get_stars(type) == 0)) { - printf("%s ", string_pool + probe_string(name)); - fatal_error("array of struct and struct value type are not supported in shell backend. Use a reference type instead."); - } - if (get_op(type) == '[') { // Array declaration runtime_defarr(); append_glo_decl( string_concat4( wrap_str_lit("defarr "), - env_var(new_ast0(IDENTIFIER, name)), + env_var(name), wrap_char(' '), - wrap_int(get_val_(INTEGER, get_child__('[', INTEGER, type, 0))) + wrap_int(get_child_('[', type, 1)) ) ); } else { @@ -2220,13 +2235,13 @@ void comp_glo_var_decl(ast node) { append_glo_decl( string_concat4( wrap_str_lit("defglo "), - env_var(new_ast0(IDENTIFIER, name)), + env_var(name), wrap_char(' '), comp_rvalue(init, VALUE_CTX_BASE) ) ); #else - comp_assignment(new_ast0(IDENTIFIER, name), init); + comp_assignment(name, init); #endif } } @@ -2278,6 +2293,7 @@ void comp_enum_cases(ast ident, ast cases) { // Because the member offset variables are declared as readonly, name conflicts // will result in a runtime error when the shell program initializes. void comp_struct(ast ident, ast members) { + ast decl; int offset = new_ast0(INTEGER, 0); int field_type; if (ident != 0) { @@ -2286,18 +2302,18 @@ void comp_struct(ast ident, ast members) { append_glo_decl(wrap_str_lit("# Struct member declarations")); } while (get_op(members) == ',') { - field_type = get_child_(',', members, 1); - comp_assignment_constant(struct_member_var(get_child_opt_(',', IDENTIFIER, members, 0)), offset); - members = get_child_opt_(',', ',', members, 2); - + decl = get_child__(',', DECL, members, 0); + field_type = get_child_(DECL, decl, 1); // Arrays and struct value types are not supported for now. // When we have type information on the local and global variables, we'll // be able to generate the correct code for these cases. - if (get_op(field_type) == '[' || (get_op(field_type) == STRUCT_KW && get_stars(field_type) == 0)) { + if (get_op(field_type) == '[' || get_op(field_type) == STRUCT_KW) { fatal_error("Nested structures not supported by shell backend. Use a reference type instead."); - } else { - set_val(offset, get_val_(INTEGER, offset) - 1); } + + comp_assignment_constant(struct_member_var(get_child_opt_(DECL, IDENTIFIER, decl, 0)), offset); + members = get_child_opt_(',', ',', members, 1); + set_val(offset, get_val_(INTEGER, offset) - 1); } if (ident != 0) { @@ -2319,6 +2335,29 @@ void handle_enum_struct_union_type_decl(ast type) { // If not an enum, struct, or union, do nothing } +// For now, we don't do anything with the declarations in a typedef. +// The only thing we need to do is to call handle_enum_struct_union_type_decl +// on the type specifier. +void handle_typedef(ast node) { + ast decls = get_child__(TYPEDEF_KW, ',', node, 0); + ast decl = get_child__(',', DECL, decls, 0); + ast type = get_child_(DECL, decl, 1); + + while (1) { + switch (get_op(type)) { + case '[': + type = get_child_('[', type, 0); + break; + case '*': + type = get_child_('*', type, 0); + break; + default: + handle_enum_struct_union_type_decl(type); + return; + } + } +} + // This function compiles 1 top level declaration at the time. // The supported top level declarations are: // - global variable declarations @@ -2335,16 +2374,16 @@ void comp_glo_decl(ast node) { if (op == '=') { // Assignments comp_assignment(get_child_('=', node, 0), get_child_('=', node, 1)); - } else if (op == VAR_DECLS) { // Variable declarations - declarations = get_child__(VAR_DECLS, ',', node, 0); + } else if (op == DECLS) { // Variable declarations + declarations = get_child__(DECLS, ',', node, 0); while (declarations != 0) { // Multiple variable declarations - comp_glo_var_decl(get_child__(',', VAR_DECL, declarations, 0)); + comp_glo_var_decl(get_child__(',', DECL, declarations, 0)); declarations = get_child_opt_(',', ',', declarations, 1); } } else if (op == FUN_DECL) { comp_glo_fun_decl(node); } else if (op == TYPEDEF_KW) { - handle_enum_struct_union_type_decl(get_child_(TYPEDEF_KW, node, 1)); + handle_typedef(node); } else if (op == ENUM_KW || op == STRUCT_KW || op == UNION_KW) { handle_enum_struct_union_type_decl(node); } else { From 7bfd4bb6460fa6fb5a51e57e1c0b6eaac40bc014 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 23 Jan 2025 13:16:47 -0500 Subject: [PATCH 06/89] Add timeout to prepare.sh script and use safe mode --- examples/prepare.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/prepare.sh b/examples/prepare.sh index a74e7176..b919bbff 100755 --- a/examples/prepare.sh +++ b/examples/prepare.sh @@ -12,7 +12,7 @@ mkdir -p build echo "Compiling examples" -PNUT_SH_OPTIONS="-DRELEASE_PNUT_SH -DRT_COMPACT" +PNUT_SH_OPTIONS="-DRELEASE_PNUT_SH -DRT_COMPACT -DSAFE_MODE" # Compile pnut.exe gcc -o build/pnut-sh-base.exe $PNUT_SH_OPTIONS pnut.c 2> /dev/null || fail "Error: Failed to compile pnut" @@ -34,9 +34,11 @@ generate_executable_with() { # $1 = executable, $2 = options opt=$2 fi - if ./build/$1 $file $opt > $COMP_DIR/$filename.sh; then + if timeout 3 ./build/$1 $file $opt > $COMP_DIR/$filename.sh; then chmod +x $COMP_DIR/$filename.sh printf "✅\n" + elif [ $? -eq 124 ]; then + printf "Timeout ❌\n" else printf "Failed to compile ❌\n" failed=1 From c9d6dfc99e03331d925534476686e6d6cc8c7c87 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 25 Jan 2025 20:28:05 -0500 Subject: [PATCH 07/89] Simplify parse_declarator --- pnut.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pnut.c b/pnut.c index aa6afe8b..18399cad 100644 --- a/pnut.c +++ b/pnut.c @@ -2293,6 +2293,12 @@ ast parse_declaration_specifiers(); // The storage class specifier and type qualifier tokens are all between 300 (AUTO_KW) and 326 (VOLATILE_KW) so we store them as bits in an int. #define MK_TYPE_SPECIFIER(tok) (1 << (tok - AUTO_KW)) + +ast pointer_type(ast parent_type, bool is_const) { + return new_ast2('*', is_const ? MK_TYPE_SPECIFIER(CONST_KW) : 0, parent_type); +} + +// Type and declaration parser int is_type_starter(int tok) { return tok == INT_KW || tok == CHAR_KW || tok == SHORT_KW || tok == LONG_KW // Numeric types || tok == VOID_KW @@ -2630,12 +2636,8 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { case '*': get_tok(); // Pointers may be const-qualified - if (tok == CONST_KW) { - get_tok(); - result = new_ast2('*', MK_TYPE_SPECIFIER(CONST_KW), parent_type); - } else { - result = new_ast2('*', 0, parent_type); - } + result = pointer_type(parent_type, tok == CONST_KW); + if (tok == CONST_KW) get_tok(); result = parse_declarator(abstract_decl, result); break; @@ -2646,17 +2648,17 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { result = parse_declarator(abstract_decl, parent_type); expect_tok(')'); break; - } - if (result == 0) { - // Abstract declarators don't need names, meaning the previous switch may - // not have set result. In that case, we create a DECL node with no - // identifier. - if (abstract_decl) { - return new_ast3(DECL, 0, parent_type, 0); // child#0 is the identifier, child#2 is the initializer - } else { - parse_error("Invalid declarator, expected an identifier but declarator doesn't have one", tok); - } + default: + // Abstract declarators don't need names, and so in the base declarator, + // we don't require an identifier. This is useful for function pointers. + // In that case, we create a DECL node with no identifier. + if (abstract_decl) { + result = new_ast3(DECL, 0, parent_type, 0); // child#0 is the identifier, child#2 is the initializer + } else { + parse_error("Invalid declarator, expected an identifier but declarator doesn't have one", tok); + } + return result; } // At this point, the only non-recursive declarator is an identifier @@ -2691,7 +2693,6 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { } // And now we wrap the DECL back around the result. - // Maybe we could try reusing the DECL node? set_child(decl, 1, result); // child#1 is the type return decl; } From f741374ab2a0e71b6b2af3e1e3ec71fb9f98de2f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 25 Jan 2025 20:30:01 -0500 Subject: [PATCH 08/89] Add helper function to get type specifier --- pnut.c | 17 +++++++++++++++++ sh.c | 14 +------------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/pnut.c b/pnut.c index 18399cad..234396e0 100644 --- a/pnut.c +++ b/pnut.c @@ -2293,6 +2293,23 @@ ast parse_declaration_specifiers(); // The storage class specifier and type qualifier tokens are all between 300 (AUTO_KW) and 326 (VOLATILE_KW) so we store them as bits in an int. #define MK_TYPE_SPECIFIER(tok) (1 << (tok - AUTO_KW)) +ast get_type_specifier(ast type_or_decl) { + while (1) { + switch (get_op(type_or_decl)) { + case DECL: + type_or_decl = get_child_(DECL, type_or_decl, 1); + break; + case '[': + type_or_decl = get_child_('[', type_or_decl, 0); + break; + case '*': + type_or_decl = get_child_('*', type_or_decl, 0); + break; + default: + return type_or_decl; + } + } +} ast pointer_type(ast parent_type, bool is_const) { return new_ast2('*', is_const ? MK_TYPE_SPECIFIER(CONST_KW) : 0, parent_type); diff --git a/sh.c b/sh.c index 30a1a1ba..28468390 100644 --- a/sh.c +++ b/sh.c @@ -2343,19 +2343,7 @@ void handle_typedef(ast node) { ast decl = get_child__(',', DECL, decls, 0); ast type = get_child_(DECL, decl, 1); - while (1) { - switch (get_op(type)) { - case '[': - type = get_child_('[', type, 0); - break; - case '*': - type = get_child_('*', type, 0); - break; - default: - handle_enum_struct_union_type_decl(type); - return; - } - } + handle_enum_struct_union_type_decl(get_type_specifier(type)); } // This function compiles 1 top level declaration at the time. From b7cd8bcd498ba3b0e751fd7f7469613826f5dd4f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 25 Jan 2025 20:50:42 -0500 Subject: [PATCH 09/89] Support new declaration AST in pnut-exe --- exe.c | 501 ++++++++++++++++++++++++++------------------------------- pnut.c | 35 ---- 2 files changed, 227 insertions(+), 309 deletions(-) diff --git a/exe.c b/exe.c index dd440c8d..3ce9c390 100644 --- a/exe.c +++ b/exe.c @@ -167,30 +167,30 @@ int power_of_2_log(int n) { return i; } -void mul_for_pointer_arith(int reg, int type_width) { +void mul_for_pointer_arith(int reg, int width) { int other_reg = reg == reg_X ? reg_Y : reg_X; - if (type_width == 1) return; + if (width == 1) return; - if (is_power_of_2(type_width)) { - while (type_width > 1) { - type_width /= 2; + if (is_power_of_2(width)) { + while (width > 1) { + width /= 2; add_reg_reg(reg, reg); } } else { push_reg(other_reg); - mov_reg_imm(other_reg, type_width); + mov_reg_imm(other_reg, width); mul_reg_reg(reg, other_reg); pop_reg(other_reg); } } -void div_for_pointer_arith(int reg, int type_width) { +void div_for_pointer_arith(int reg, int width) { int reg_start = reg; - if (type_width == 1) return; + if (width == 1) return; - if (is_power_of_2(type_width)) { + if (is_power_of_2(width)) { // sar_reg_reg does not work with reg_Y, so we need to shift the value to reg_X if (reg_start != reg_X) { push_reg(reg_X); // Save reg_X @@ -201,7 +201,7 @@ void div_for_pointer_arith(int reg, int type_width) { } // At this point, reg is always reg_X, and reg_Y is free - mov_reg_imm(reg_Y, power_of_2_log(type_width)); + mov_reg_imm(reg_Y, power_of_2_log(width)); sar_reg_reg(reg_X, reg_Y); // Now reg_X contains the result, and we move it back in reg_start if needed @@ -220,7 +220,7 @@ void div_for_pointer_arith(int reg, int type_width) { push_reg(reg_Y); } - mov_reg_imm(reg_Y, type_width); + mov_reg_imm(reg_Y, width); div_reg_reg(reg_X, reg_Y); if (reg_start != reg_X) { @@ -419,14 +419,12 @@ void def_goto_label(int lbl) { } // Type, structure and union handling -int type_width_ast(ast type, bool array_value, bool word_align); int struct_union_size(ast struct_type); // A pointer type is either an array type or a type with at least one star bool is_pointer_type(ast type) { bool op = get_op(type); - bool stars = get_stars(type); - return op == '[' || stars > 0; + return op == '[' || op == '*'; } bool is_struct_or_union_type(ast type) { @@ -436,26 +434,8 @@ bool is_struct_or_union_type(ast type) { // An aggregate type is either an array type or a struct/union type (that's not a reference) bool is_aggregate_type(ast type) { - if ((is_struct_or_union_type(type) && get_stars(type) == 0) || get_op(type) == '[') { - return true; - } else { - return false; - } -} - -bool is_type(ast type) { - switch (get_op(type)) { - case INT_KW: - case CHAR_KW: - case VOID_KW: - case STRUCT_KW: - case UNION_KW: - case ENUM_KW: - case '[': - return true; - default: - return false; - } + int op = get_op(type); + return op == '[' || op == STRUCT_KW || op == UNION_KW; } bool is_not_pointer_type(ast type) { @@ -466,41 +446,33 @@ bool is_not_pointer_type(ast type) { // If array_value is true, the size of the array is returned, otherwise the // size of the pointer is returned. // If word_align is true, the size is rounded up to the word size. -int type_width(ast type, int stars, bool array_value, bool word_align) { - // All types have the same shape (kw, stars, ...) except for arrays so we - // handle array types separately. - if (get_op(type) == '[') { - // In certain contexts, we want to know the static size of the array (i.e. - // sizeof, in struct definitions, etc.) while in other contexts we care - // about the pointer (i.e. when passing an array to a function, etc.) - if (array_value) { - return round_up_to_word_size(get_val_(INTEGER, get_child_('[', type, 0)) * type_width_ast(get_child_('[', type, 1), true, false)); - } else { - return word_size; // Array is a pointer to the first element - } - } else if (stars) { - return word_size; // Pointer - } - +int type_width(ast type, bool array_value, bool word_align) { // Basic type kw switch (get_op(type)) { + case '[': + // In certain contexts, we want to know the static size of the array (i.e. + // sizeof, in struct definitions, etc.) while in other contexts we care + // about the pointer (i.e. when passing an array to a function, etc.) + if (array_value) { + return round_up_to_word_size(get_child_('[', type, 1) * type_width(get_child_('[', type, 0), true, false)); + } else { + return word_size; // Array is a pointer to the first element + } + case '*': + return word_size; case CHAR_KW: return word_align ? word_size : char_width; case STRUCT_KW: case UNION_KW: return struct_union_size(type); case VOID_KW: - fatal_error("type_width_ast: void type"); + fatal_error("type_width: void type"); return 0; default: return word_size; } } -int type_width_ast(ast type, bool array_value, bool word_align) { - return type_width(type, get_stars(type), array_value, word_align); -} - // Structs, enums and unions types come in 2 variants: // - definition: the type contains the members of the struct/enum/union // - reference: the type reference an already declared struct/enum/union and doesn't contain the members. @@ -513,29 +485,19 @@ ast canonicalize_type(ast type) { if (get_op(type) == STRUCT_KW && get_child_opt_(STRUCT_KW, ',', type, 2) == 0) { // struct with empty def => reference binding = cgc_lookup_struct(get_val_(IDENTIFIER, get_child__(STRUCT_KW, IDENTIFIER, type, 1)), cgc_globals); - if (binding == 0) fatal_error("canonicalize_type: struct type not defined"); - res = heap[binding+3]; - if (get_stars(type) != 0) { // Copy stars - res = clone_ast(res); - set_child(res, 0, get_child_(STRUCT_KW, type, 0)); - } } else if (get_op(type) == UNION_KW && get_child_opt_(UNION_KW, ',', type, 2) == 0) { // union with empty def => reference binding = cgc_lookup_union(get_val_(IDENTIFIER, get_child__(UNION_KW, IDENTIFIER, type, 1)), cgc_globals); - if (binding == 0) fatal_error("canonicalize_type: union type not defined"); - res = heap[binding+3]; - if (get_stars(type) != 0) { // Copy stars - res = clone_ast(res); - set_child(res, 0, get_child_(UNION_KW, type, 0)); - } - } else if (get_op(type) == ENUM_KW && get_child_opt_(ENUM_KW, ',', type, 1) == 0) { // enum with empty def => reference - binding = cgc_lookup_enum(get_val_(IDENTIFIER, get_child__(ENUM_KW, IDENTIFIER, type, 0)), cgc_globals); - if (binding == 0) fatal_error("canonicalize_type: enum type not defined"); - res = heap[binding+3]; - if (get_stars(type) != 0) { // Copy stars - res = clone_ast(res); - set_child(res, 0, get_child_(ENUM_KW, type, 0)); - } + } else if (get_op(type) == ENUM_KW && get_child_opt_(ENUM_KW, ',', type, 2) == 0) { // enum with empty def => reference + binding = cgc_lookup_enum(get_val_(IDENTIFIER, get_child__(ENUM_KW, IDENTIFIER, type, 1)), cgc_globals); + } else { + return res; + } + + if (binding == 0) { + putstr("type="); putstr(STRING_BUF(get_val_(IDENTIFIER, get_child(type, 1)))); putchar('\n'); + fatal_error("canonicalize_type: Type is not defined"); } + res = heap[binding+3]; return res; } @@ -545,34 +507,21 @@ int struct_union_size(ast type) { ast members; ast member_type; int member_size; - int size = 0; + int sum_size = 0, max_size = 0; type = canonicalize_type(type); members = get_child(type, 2); - switch (get_op(type)) { - case STRUCT_KW: while (get_op(members) == ',') { - member_type = get_child_(',', members, 1); - members = get_child_opt_(',', ',', members, 2); - member_size = type_width_ast(member_type, true, true); - size += member_size; - } - break; - case UNION_KW: - while (get_op(members) == ',') { - member_type = get_child_(',', members, 1); - members = get_child_opt_(',', ',', members, 2); - member_size = type_width_ast(member_type, true, true); - // Union size is the max of its members - if (member_size > size) size = member_size; - } - break; - default: - fatal_error("struct_union_size: not a struct or union type"); + member_type = get_child_(DECL, get_child__(',', DECL, members, 0), 1); + members = get_child_opt_(',', ',', members, 1); + member_size = type_width(member_type, true, true); + sum_size += member_size; // Struct size is the sum of its members + if (member_size > max_size) max_size = member_size; // Union size is the max of its members } - return round_up_to_word_size(size); + // Don't need to round the size of a union to the word size since type_width already did + return get_op(type) == STRUCT_KW ? sum_size : max_size; } // Find offset of struct member @@ -580,12 +529,13 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { ast members = get_child(canonicalize_type(struct_type), 2); int offset = 0; int sub_offset; - ast ident; + ast decl, ident; - while (get_op(members) == ',') { - ident = get_child_opt_(',', IDENTIFIER, members, 0); + while (members != 0) { + decl = get_child_opt_(',', DECL, members, 0); + ident = get_child_opt_(DECL, IDENTIFIER, decl, 0); if (ident == 0) { // Anonymous struct member, search that struct - sub_offset = struct_member_offset_go(get_child_(',', members, 1), member_ident); + sub_offset = struct_member_offset_go(get_child_(DECL, decl, 1), member_ident); if (sub_offset != -1) return offset + sub_offset; } else if (get_val_(IDENTIFIER, member_ident) == get_val_(IDENTIFIER, ident)) { return offset; @@ -593,10 +543,11 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { if (get_op(struct_type) == STRUCT_KW) { // For unions, fields are always at offset 0. We must still iterate - // because the field may be in an anonymous struct. - offset += round_up_to_word_size(type_width_ast(get_child_(',', members, 1), true, true)); + // because the field may be in an anonymous struct, in which case the + // final offset is not 0. + offset += round_up_to_word_size(type_width(get_child_(DECL, decl, 1), true, true)); } - members = get_child_opt_(',', ',', members, 2); + members = get_child_opt_(',', ',', members, 1); } return -1; @@ -611,17 +562,18 @@ int struct_member_offset(ast struct_type, ast member_ident) { // Find a struct member ast struct_member_go(ast struct_type, ast member_ident) { ast members = get_child(canonicalize_type(struct_type), 2); - ast ident; + ast decl, ident; while (members != 0) { - ident = get_child_opt_(',', IDENTIFIER, members, 0); + decl = get_child_opt_(',', DECL, members, 0); + ident = get_child_opt_(DECL, IDENTIFIER, decl, 0); if (ident == 0) { // Anonymous struct member, search that struct - ident = struct_member_go(get_child_(',', members, 1), member_ident); + ident = struct_member_go(get_child_(DECL, decl, 1), member_ident); if (ident != 0) return ident; // Found member in the anonymous struct } else if (get_val_(IDENTIFIER, member_ident) == get_val_(IDENTIFIER, ident)) { - return members; + return decl; } - members = get_child_opt_(',', ',', members, 2); + members = get_child_opt_(',', ',', members, 1); } return -1; @@ -635,12 +587,13 @@ ast struct_member(ast struct_type, ast member_ident) { // Width of an object pointed to by a reference type. int ref_type_width(ast type) { - if (get_op(type) == '[') { - return type_width_ast(get_child_('[', type, 1), false, false); // size of inner type - } else if (get_stars(type) == 1) { // pointer * - return type_width(type, 0, false, false); // size of inner type - } else { - return word_size; + switch (get_op(type)) { + case '[': + return type_width(get_child_('[', type, 0), false, false); // size of inner type + case '*': + return type_width(get_child_('*', type, 1), false, false); // size of inner type; + default: + return word_size; } } @@ -650,6 +603,19 @@ ast string_type; ast void_type; ast void_star_type; +ast dereference_type(ast type) { + switch (get_op(type)) { + case '[': // Array type + return get_child_('[', type, 0); + case '*': // Pointer type + return get_child_('*', type, 1); + default: + putstr("type="); putint(get_op(type)); putchar('\n'); + fatal_error("dereference_type: non pointer is being dereferenced with *"); + return -1; + } +} + // Compute the type of an expression ast value_type(ast node) { int op = get_op(node); @@ -701,27 +667,10 @@ ast value_type(ast node) { if (op == '*') { left_type = value_type(child0); - if (get_op(left_type) == '[') { // Array type - return get_child_('[', left_type, 1); - } else if (get_stars(left_type) != 0) { // Pointer type - left_type = clone_ast(left_type); - set_stars(left_type, get_stars(left_type) - 1); // one less indirection - return left_type; - } else { - putstr("left_type="); putint(left_type); putchar('\n'); - fatal_error("pointer_width: non pointer is being dereferenced with *"); - return -1; - } + return dereference_type(left_type); } else if (op == '&') { left_type = value_type(child0); - if (get_op(left_type) == '[') { - left_type = clone_ast(get_child_('[', left_type, 1)); // Inner type - set_stars(left_type, get_stars(left_type) + 1); // Increment star by 2, to account for the [ we just removed - } else { - left_type = clone_ast(left_type); - set_stars(left_type, get_stars(left_type) + 1); // Increment star by 1 - } - return left_type; + return pointer_type(left_type, false); } else if (op == '+' || op == '-' || op == '~' || op == '!' || op == MINUS_MINUS || op == PLUS_PLUS || op == MINUS_MINUS_POST || op == PLUS_PLUS_POST || op == PLUS_PLUS_PRE || op == MINUS_MINUS_PRE || op == PARENS) { // Unary operation don't change the type return value_type(child0); @@ -754,21 +703,14 @@ ast value_type(ast node) { left_type = value_type(child0); right_type = value_type(child1); - if (get_op(left_type) == '[') { // Array - return get_child_('[', left_type, 1); // array inner type - } else if (get_stars(left_type) != 0) { // Pointer - left_type = clone_ast(left_type); - set_stars(left_type, get_stars(left_type) - 1); // one less indirection - return left_type; - } else if (get_op(right_type) == '[') { // Array, but with the operands flipped (i.e. 0[arr] instead of arr[0]) - return get_child_('[', right_type, 1); // array inner type - } else if (get_stars(right_type) != 0) { - right_type = clone_ast(right_type); - set_stars(right_type, get_stars(right_type) - 1); // one less indirection - return right_type; + if (get_op(left_type) == '[' || get_op(left_type) == '*') { + return dereference_type(left_type); + } else if (get_op(right_type) == '[' || get_op(right_type) == '*') { + return dereference_type(right_type); } else { - putstr("left_type="); putint(left_type); putchar('\n'); - fatal_error("value_type: non pointer is being dereferenced with *"); + putstr("left_type="); putint(get_op(left_type)); putchar('\n'); + putstr("right_type="); putint(get_op(right_type)); putchar('\n'); + fatal_error("value_type: non pointer is being dereferenced as array"); return -1; } } else if (op == '=' || op == AMP_EQ || op == BAR_EQ || op == CARET_EQ || op == LSHIFT_EQ || op == MINUS_EQ || op == PERCENT_EQ || op == PLUS_EQ || op == RSHIFT_EQ || op == SLASH_EQ || op == STAR_EQ) { @@ -789,8 +731,8 @@ ast value_type(ast node) { } } else if (op == '.') { left_type = value_type(child0); - if (is_struct_or_union_type(left_type) && get_stars(left_type) == 0) { - return get_child_(',', struct_member(left_type, child1), 1); // child 1 of member is the type + if (is_struct_or_union_type(left_type)) { + return get_child_(DECL, struct_member(left_type, child1), 1); // child 1 of member is the type } else { fatal_error("value_type: . operator on non-struct pointer type"); return -1; @@ -798,14 +740,14 @@ ast value_type(ast node) { } else if (op == ARROW) { // Same as '.', but left_type must be a pointer left_type = value_type(child0); - if (is_struct_or_union_type(left_type) && get_stars(left_type) == 1) { - return get_child_(',', struct_member(left_type, child1), 1); // child 1 of member is the type + if (get_op(left_type) == '*' && is_struct_or_union_type(get_child_('*', left_type, 1))) { + return get_child_(DECL, struct_member(get_child_('*', left_type, 1), child1), 1); // child 1 of member is the type } else { fatal_error("value_type: -> operator on non-struct pointer type"); return -1; } } else if (op == CAST) { - return child0; + return get_child_(DECL, child0, 1); } else { fatal_error("value_type: unknown expression with 2 children"); return -1; @@ -933,7 +875,7 @@ int codegen_param(ast param) { int type = value_type(param); int left_width; - if (is_struct_or_union_type(type) && get_stars(type) == 0) { + if (is_struct_or_union_type(type)) { left_width = codegen_lvalue(param); pop_reg(reg_X); grow_fs(-1); @@ -944,7 +886,7 @@ int codegen_param(ast param) { codegen_rvalue(param); } - return type_width_ast(type, false, true) / word_size; + return type_width(type, false, true) / word_size; } int codegen_params(ast params) { @@ -1034,7 +976,7 @@ int codegen_lvalue(ast node) { fatal_error("codegen_lvalue: identifier not found"); } } - lvalue_width = type_width_ast(heap[binding+5], true, true); + lvalue_width = type_width(heap[binding+5], true, true); } else { putstr("op="); putint(op); putchar('\n'); fatal_error("codegen_lvalue: unknown lvalue with nb_children == 0"); @@ -1065,38 +1007,39 @@ int codegen_lvalue(ast node) { lvalue_width = ref_type_width(type); } else if (op == '.') { type = value_type(child0); - if (is_struct_or_union_type(type) && get_stars(type) == 0) { + if (is_struct_or_union_type(type)) { codegen_lvalue(child0); pop_reg(reg_X); // union members are at the same offset: 0 if (get_op(type) == STRUCT_KW) { - add_reg_imm(reg_X, struct_member_offset(type, child1)); + add_reg_imm(reg_X, struct_member_offset(type, child1)); } push_reg(reg_X); grow_fs(-1); - lvalue_width = type_width_ast(get_child_(',', struct_member(type, child1), 1), true, true); // child 1 of member is the type + lvalue_width = type_width(get_child_(DECL, struct_member(type, child1), 1), true, true); // child 1 of member is the type } else { fatal_error("codegen_lvalue: . operator on non-struct type"); } } else if (op == ARROW) { // Same as '.', but type must be a pointer type = value_type(child0); - if (is_struct_or_union_type(type) && get_stars(type) == 1) { + if (get_op(type) == '*' && is_struct_or_union_type(get_child_('*', type, 1))) { + type = get_child_('*', type, 1); codegen_rvalue(child0); pop_reg(reg_X); // union members are at the same offset: 0 if (get_op(type) == STRUCT_KW) { - add_reg_imm(reg_X, struct_member_offset(type, child1)); + add_reg_imm(reg_X, struct_member_offset(type, child1)); } push_reg(reg_X); grow_fs(-1); - lvalue_width = type_width_ast(get_child_(',', struct_member(type, child1), 1), true, true); // child 1 of member is the type + lvalue_width = type_width(get_child_(DECL, struct_member(type, child1), 1), true, true); // child 1 of member is the type } else { fatal_error("codegen_lvalue: -> operator on non-struct pointer type"); } } else if (op == CAST) { codegen_lvalue(child1); - lvalue_width = type_width_ast(child0, true, true); + lvalue_width = type_width(child0, true, true); grow_fs(-1); // grow_fs is called at the end of the function, so we need to decrement it here } else { fatal_error("codegen_lvalue: unknown lvalue with 2 children"); @@ -1167,11 +1110,8 @@ void codegen_rvalue(ast node) { if (binding != 0) { mov_reg_imm(reg_X, (cgc_fs - heap[binding+4]) * word_size); add_reg_reg(reg_X, reg_SP); - // local arrays are allocated on the stack, so no need to dereference - // same thing for non-pointer structs and unions. - if (get_op(heap[binding+5]) != '[' - && (get_op(heap[binding+5]) != STRUCT_KW || get_stars(heap[binding+5]) != 0) - && (get_op(heap[binding+5]) != UNION_KW || get_stars(heap[binding+5]) != 0)) { + // local arrays/structs/unions are allocated on the stack, so no need to dereference + if (get_op(heap[binding+5]) != '[' && get_op(heap[binding+5]) != STRUCT_KW && get_op(heap[binding+5]) != UNION_KW) { mov_reg_mem(reg_X, reg_X, 0); } push_reg(reg_X); @@ -1180,11 +1120,8 @@ void codegen_rvalue(ast node) { if (binding != 0) { mov_reg_imm(reg_X, heap[binding+4]); add_reg_reg(reg_X, reg_glo); - // global arrays are allocated on the stack, so no need to dereference - // same thing for non-pointer structs and unions. - if (get_op(heap[binding+5]) != '[' - && (get_op(heap[binding+5]) != STRUCT_KW || get_stars(heap[binding+5]) != 0) - && (get_op(heap[binding+5]) != UNION_KW || get_stars(heap[binding+5]) != 0)) { + // global arrays/structs/unions are also allocated on the stack, so no need to dereference + if (get_op(heap[binding+5]) != '[' && get_op(heap[binding+5]) != STRUCT_KW && get_op(heap[binding+5]) != UNION_KW) { mov_reg_mem(reg_X, reg_X, 0); } push_reg(reg_X); @@ -1275,10 +1212,10 @@ void codegen_rvalue(ast node) { codegen_lvalue(child0); grow_fs(-1); } else if (op == SIZEOF_KW) { - if (is_type(child0)) { - mov_reg_imm(reg_X, type_width_ast(child0, true, false)); + if (get_op(child0) == DECL) { + mov_reg_imm(reg_X, type_width(get_child_(DECL, child0, 1), true, false)); } else { - mov_reg_imm(reg_X, type_width_ast(value_type(child0), true, false)); + mov_reg_imm(reg_X, type_width(value_type(child0), true, false)); } push_reg(reg_X); } else { @@ -1295,7 +1232,7 @@ void codegen_rvalue(ast node) { } else if (op == '=') { type1 = value_type(child0); left_width = codegen_lvalue(child0); - if (is_struct_or_union_type(type1) && get_stars(type1) == 0) { + if (is_struct_or_union_type(type1)) { // Struct assignment, we copy the struct. codegen_lvalue(child1); pop_reg(reg_X); @@ -1343,8 +1280,8 @@ void codegen_rvalue(ast node) { codegen_call(node); } else if (op == '.') { type1 = value_type(child0); - if (is_struct_or_union_type(type1) && get_stars(type1) == 0) { - type2 = get_child_(',', struct_member(type1, child1), 1); + if (is_struct_or_union_type(type1)) { + type2 = get_child_(DECL, struct_member(type1, child1), 1); codegen_lvalue(child0); pop_reg(reg_Y); grow_fs(-1); @@ -1353,7 +1290,7 @@ void codegen_rvalue(ast node) { add_reg_imm(reg_Y, struct_member_offset(type1, child1)); } if (!is_aggregate_type(type2)) { - load_mem_location(reg_Y, reg_Y, 0, type_width_ast(type2, false, false)); + load_mem_location(reg_Y, reg_Y, 0, type_width(type2, false, false)); } push_reg(reg_Y); } else { @@ -1361,8 +1298,9 @@ void codegen_rvalue(ast node) { } } else if (op == ARROW) { type1 = value_type(child0); - if (is_struct_or_union_type(type1) && get_stars(type1) == 1) { - type2 = get_child_(',', struct_member(type1, child1), 1); + if (get_op(type1) == '*' && is_struct_or_union_type(get_child_('*', type1, 1))) { + type1 = get_child_('*', type1, 1); + type2 = get_child_(DECL, struct_member(type1, child1), 1); codegen_rvalue(child0); pop_reg(reg_Y); grow_fs(-1); @@ -1427,9 +1365,9 @@ void codegen_begin() { int_type = new_ast0(INT_KW, 0); char_type = new_ast0(CHAR_KW, 0); - string_type = new_ast0(CHAR_KW, 1); + string_type = pointer_type(new_ast0(CHAR_KW, 0), false); void_type = new_ast0(VOID_KW, 0); - void_star_type = new_ast0(VOID_KW, 1); + void_star_type = pointer_type(new_ast0(VOID_KW, 0), false); main_lbl = alloc_label(); cgc_add_global_fun(init_ident(IDENTIFIER, "main"), main_lbl, void_type); @@ -1500,18 +1438,21 @@ void codegen_struct_or_union(ast node, enum BINDING kind) { ast members = get_child(node, 2); int binding; - if (name != 0 && members != 0) { // if struct has a name and members (not a reference to an existing type) + // if struct has a name and members (not a reference to an existing type) + if (name != 0 && members != 0) { binding = cgc_lookup_binding_ident(kind, get_val_(IDENTIFIER, name), cgc_globals); - if (binding != 0 && heap[binding + 3] != node) { fatal_error("codegen_struct_or_union: struct/union/enum already declared"); } + if (binding != 0 && heap[binding + 3] != node && get_child(heap[binding + 3], 2) != members) { + fatal_error("codegen_struct_or_union: struct/union already declared"); + } cgc_add_typedef(get_val_(IDENTIFIER, name), kind, node); } // Traverse the structure to find any other declarations. // This is not the right semantic because inner declarations are scoped to // this declaration, but it's probably good enough for TCC. - while (members != 0 && get_op(members) == ',') { - handle_enum_struct_union_type_decl(get_child_(',', members, 1)); - members = get_child_opt_(',', ',', members, 2); + while (members != 0) { + handle_enum_struct_union_type_decl(get_child_(DECL, get_child__(',', DECL, members, 0), 1)); + members = get_child_opt_(',', ',', members, 1); } } @@ -1528,101 +1469,104 @@ void handle_enum_struct_union_type_decl(ast type) { } void codegen_glo_var_decl(ast node) { - ast name = get_child_(VAR_DECL, node, 0); - ast type = get_child_(VAR_DECL, node, 1); - ast init = get_child_(VAR_DECL, node, 2); + ast name = get_child__(DECL, IDENTIFIER, node, 0); + ast type = get_child_(DECL, node, 1); + ast init = get_child_(DECL, node, 2); + int name_probe = get_val_(IDENTIFIER, name); int size; - int binding = cgc_lookup_var(name, cgc_globals); + int binding = cgc_lookup_var(name_probe, cgc_globals); if (get_op(type) == '[') { // Array declaration - size = get_val_(INTEGER, get_child_('[', type, 0)); + size = get_child_('[', type, 0); } else { // All non-array types have size 1 size = 1; } + if (get_op(type) == '(') { + // Forward declaration + binding = cgc_lookup_fun(name_probe, cgc_globals); + if (binding == 0) cgc_add_global_fun(name_probe, alloc_label(), type); - handle_enum_struct_union_type_decl(type); - - if (binding == 0) { - cgc_add_global(name, size, type_width_ast(type, true, true), type); - binding = cgc_globals; - } + } else { + handle_enum_struct_union_type_decl(type); - if (get_op(type) != '[') { // not array declaration + if (binding == 0) { + cgc_add_global(name_probe, size, type_width(type, true, true), type); + binding = cgc_globals; + } - def_label(init_next_lbl); - init_next_lbl = alloc_label(); + if (get_op(type) != '[') { // not array declaration - if (init != 0) { + def_label(init_next_lbl); + init_next_lbl = alloc_label(); - codegen_rvalue(init); - } else { - xor_reg_reg(reg_X, reg_X); - push_reg(reg_X); - grow_fs(1); - } + if (init != 0) { + codegen_rvalue(init); + } else { + xor_reg_reg(reg_X, reg_X); + push_reg(reg_X); + grow_fs(1); + } - pop_reg(reg_X); - grow_fs(-1); + pop_reg(reg_X); + grow_fs(-1); - mov_mem_reg(reg_glo, heap[binding+4], reg_X); + mov_mem_reg(reg_glo, heap[binding+4], reg_X); - jump(init_next_lbl); + jump(init_next_lbl); + } } } void codegen_local_var_decl(ast node) { - ast name = get_child_(VAR_DECL, node, 0); - ast type = get_child_(VAR_DECL, node, 1); - ast init = get_child_(VAR_DECL, node, 2); + ast name = get_child__(DECL, IDENTIFIER, node, 0); + ast type = get_child_(DECL, node, 1); + ast init = get_child_(DECL, node, 2); int size; - if (get_op(type) == '[') { // Array declaration - size = type_width_ast(type, true, true); // size in bytes (word aligned) - grow_stack_bytes(size); - size /= word_size; // size in words - } else if (is_struct_or_union_type(type) && get_stars(type) == 0) { - size = struct_union_size(type); // size in bytes (word aligned) - grow_stack_bytes(size); - size /= word_size; // size in words - } else { - // All non-array types are represented as a word, even if they are smaller - if (init != 0) { - codegen_rvalue(init); - grow_fs(-1); - } else { - xor_reg_reg(reg_X, reg_X); - push_reg(reg_X); - } - - size = 1; - } - cgc_add_local_var(name, size, type); + if (is_aggregate_type(type)) { // Array/struct/union declaration + size = type_width(type, true, true); // size in bytes (word aligned) + grow_stack_bytes(size); + size /= word_size; // size in words + } else { + // All non-array types are represented as a word, even if they are smaller + if (init != 0) { + codegen_rvalue(init); + grow_fs(-1); + } else { + xor_reg_reg(reg_X, reg_X); + push_reg(reg_X); + } + size = 1; + } + + cgc_add_local_var(get_val_(IDENTIFIER, name), size, type); } void codegen_body(ast node) { int save_fs = cgc_fs; int save_locals = cgc_locals; ast stmt; - ast decls; + ast declarations; while (node != 0) { stmt = get_child_('{', node, 0); - if (get_op(stmt) == VAR_DECLS) { // Variable declaration - decls = get_child__(VAR_DECLS, ',', stmt, 0); // Declaration list - while(decls != 0) { // Multiple variable declarations - codegen_local_var_decl(get_child__(',', VAR_DECL, decls, 0)); - decls = get_child_opt_(',', ',', decls, 1); // Move to the next declaration in the list - } - } else { - codegen_statement(stmt); + if (get_op(stmt) == DECLS) { // Variable declaration + declarations = get_child__(DECLS, ',', stmt, 0); + while (declarations != 0) { // Multiple variable declarations + codegen_local_var_decl(get_child__(',', DECL, declarations, 0)); + declarations = get_child_opt_(',', ',', declarations, 1); } - node = get_child_opt_('{', '{', node, 1); + } else { + codegen_statement(stmt); } - grow_stack(save_fs - cgc_fs); + node = get_child_opt_('{', '{', node, 1); + } - cgc_fs = save_fs; - cgc_locals = save_locals; + grow_stack(save_fs - cgc_fs); + + cgc_fs = save_fs; + cgc_locals = save_locals; } void codegen_statement(ast node) { @@ -1846,53 +1790,51 @@ void codegen_statement(ast node) { } void add_params(ast params) { - - ast decl; + ast decl, type; int ident; - ast type; while (params != 0) { - decl = get_child__(',', VAR_DECL, params, 0); - ident = get_child_(VAR_DECL, decl, 0); - type = get_child_(VAR_DECL, decl, 1); + decl = get_child__(',', DECL, params, 0); + ident = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); + type = get_child_(DECL, decl, 1); if (cgc_lookup_var(ident, cgc_locals) != 0) fatal_error("add_params: duplicate parameter"); - cgc_add_local_param(ident, type_width_ast(type, false, true) / word_size, type); + cgc_add_local_param(ident, type_width(type, false, true) / word_size, type); params = get_child_opt_(',', ',', params, 1); } } void codegen_glo_fun_decl(ast node) { - ast name = get_child_(FUN_DECL, node, 0); - ast fun_type = get_child_(FUN_DECL, node, 1); - ast params = get_child_(FUN_DECL, node, 2); - ast body = get_child_opt_(FUN_DECL, '{', node, 3); + ast decl = get_child__(FUN_DECL, DECL, node, 0); + ast body = get_child_opt_(FUN_DECL, '{', node, 1); + ast name_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); + ast fun_type = get_child__(DECL, '(', decl, 1); + ast params = get_child_opt_('(', ',', fun_type, 1); + ast fun_return_type = get_child_('(', fun_type, 0); int lbl; int binding; int save_locals_fun = cgc_locals_fun; - if (is_struct_or_union_type(fun_type) && get_stars(fun_type) == 0) { - fatal_error("add_params: returning structs from function not supported"); - } else if (get_op(fun_type) == '[') { - fatal_error("add_params: returning arrays from function not supported"); + if (is_aggregate_type(fun_return_type)) { + fatal_error("Returning arrays or structs from function not supported"); } // If the function is main - if (name == MAIN_ID) { + if (name_probe == MAIN_ID) { // Check if main returns an exit code. - if (get_op(fun_type) != VOID_KW) main_returns = true; + if (get_op(fun_return_type) != VOID_KW) main_returns = true; } - binding = cgc_lookup_fun(name, cgc_globals); + binding = cgc_lookup_fun(name_probe, cgc_globals); if (binding == 0) { lbl = alloc_label(); - cgc_add_global_fun(name, lbl, fun_type); + cgc_add_global_fun(name_probe, lbl, fun_type); binding = cgc_globals; } - if (body > 0) { // 0 is empty body, -1 is forward declaration + if (body != 0) { // 0 is empty body lbl = heap[binding+4]; @@ -1914,20 +1856,31 @@ void codegen_glo_fun_decl(ast node) { cgc_locals_fun = save_locals_fun; } +// For now, we don't do anything with the declarations in a typedef. +// The only thing we need to do is to call handle_enum_struct_union_type_decl +// on the type specifier, which is the same for all declarations. +void handle_typedef(ast node) { + ast decls = get_child__(TYPEDEF_KW, ',', node, 0); + ast decl = get_child__(',', DECL, decls, 0); + ast type = get_child_(DECL, decl, 1); + + handle_enum_struct_union_type_decl(get_type_specifier(type)); +} + void codegen_glo_decl(ast node) { ast decls; int op = get_op(node); - if (op == VAR_DECLS) { - decls = get_child__(VAR_DECLS, ',', node, 0); // Declaration list + if (op == DECLS) { + decls = get_child__(DECLS, ',', node, 0); // Declaration list while (decls != 0) { // Multiple variable declarations - codegen_glo_var_decl(get_child__(',', VAR_DECL, decls, 0)); + codegen_glo_var_decl(get_child__(',', DECL, decls, 0)); decls = get_child_opt_(',', ',', decls, 1); // Next variable declaration } } else if (op == FUN_DECL) { codegen_glo_fun_decl(node); } else if (op == TYPEDEF_KW) { - handle_enum_struct_union_type_decl(get_child_(TYPEDEF_KW, node, 1)); + handle_typedef(node); } else if (op == ENUM_KW || op == STRUCT_KW || op == UNION_KW) { handle_enum_struct_union_type_decl(node); } else { diff --git a/pnut.c b/pnut.c index 234396e0..0c654307 100644 --- a/pnut.c +++ b/pnut.c @@ -415,33 +415,6 @@ ast get_child_opt_go(char* file, int line, int expected_parent_node, int expecte #define get_child__(expected_parent_node, expected_node, node, i) get_child__go(__FILE__, __LINE__, expected_parent_node, expected_node, node, i) #define get_child_opt_(expected_parent_node, expected_node, node, i) get_child_opt_go(__FILE__, __LINE__, expected_parent_node, expected_node, node, i) -int get_stars(ast type) { - switch (get_op(type)) { - case INT_KW: - return get_child_(INT_KW, type, 0); - case CHAR_KW: - return get_child_(CHAR_KW, type, 0); - case VOID_KW: - return get_child_(VOID_KW, type, 0); - case ENUM_KW: - return get_child_(ENUM_KW, type, 0); - case STRUCT_KW: - return get_child_(STRUCT_KW, type, 0); - case UNION_KW: - return get_child_(UNION_KW, type, 0); - case '[': - return get_child_('[', type, 0); - default: - printf("get_stars: unexpected type: %d\n", get_op(type)); - exit(1); - return 0; - } -} - -void set_stars(ast type, int stars) { - set_child(type, 0, stars); -} - #else int get_val(ast node) { @@ -465,14 +438,6 @@ void set_child(ast node, int i, ast child) { #define get_child__(expected_parent_node, expected_node, node, i) get_child(node, i) #define get_child_opt_(expected_parent_node, expected_node, node, i) get_child(node, i) -int get_stars(ast type) { - return get_child(type, 0); -} - -void set_stars(ast type, int stars) { - set_child(type, 0, stars); -} - #endif ast ast_result; From 531ee6ae6d6d00a493c4e5e0878bb5b4d1e47fbc Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 25 Jan 2025 21:27:46 -0500 Subject: [PATCH 10/89] Add test showing last switch case runs as default When a switch statement had no matching case, the last conditional block execute, which caused a very hard to find bug in the pnut-exe bootstrap. --- tests/_exe/switch.c | 107 +++++++++++++++++++++------------------ tests/_exe/switch.golden | 6 +-- 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/tests/_exe/switch.c b/tests/_exe/switch.c index 4daf5cd8..851b4409 100644 --- a/tests/_exe/switch.c +++ b/tests/_exe/switch.c @@ -14,61 +14,70 @@ void no_case_switch() { } } -void basic_switch(){ - int a = 2; +void basic_switch() { + int a = 0; - switch (a) { - case 1: - putchar('A'); - break; - case 2: - putchar('B'); - break; - case 3: - putchar('C'); - break; - default: - putchar('D'); - break; + while (a < 5) { + switch (a) { + case 1: + putchar('A'); + break; + case 2: + putchar('B'); + break; + case 3: + putchar('C'); + break; + default: + putchar('D'); + break; + } + a++; } } -void no_default_break_switch(){ - int a = 2; +void no_default_break_switch() { + int a = 0; - switch (a) { - case 1: - putchar('A'); - break; - case 2: - putchar('B'); - break; - case 3: - putchar('C'); - break; - default: - putchar('D'); - // break; + while (a < 5) { + switch (a) { + case 1: + putchar('A'); + break; + case 2: + putchar('B'); + break; + case 3: + putchar('C'); + break; + default: + putchar('D'); + // break; + } + a++; } } -void no_default_switch(){ - int a = 2; +void no_default_switch() { + int a = 0; - switch (a) { - case 1: - putchar('A'); - break; - case 2: - putchar('B'); - break; - case 3: - putchar('C'); - break; + while (a < 5) { + switch (a) { + case 1: + putchar('A'); + break; + case 2: + putchar('B'); + break; + case 3: + putchar('C'); + break; + } + a++; } } -void goto_switch(int a){ +void goto_switch(int a) { switch (a) { case 1: @@ -91,7 +100,7 @@ void goto_switch(int a){ putchar('E'); } -void gotos_switch(){ +void gotos_switch() { int a = 0; start: @@ -118,7 +127,7 @@ void gotos_switch(){ putchar('E'); } -void switch_while(){ +void switch_while() { int i = 0; while (i < 5) { @@ -198,10 +207,10 @@ void state_machine_switch() { switch (state) { case 0: for (i = 0; i < 10; i++) { - state = 1; // Next call will start at case 1 - putchar('A'); - return; - case 1: putchar('B'); + state = 1; // Next call will start at case 1 + putchar('A'); + return; + case 1: putchar('B'); } } } diff --git a/tests/_exe/switch.golden b/tests/_exe/switch.golden index e46c6432..1fcef192 100644 --- a/tests/_exe/switch.golden +++ b/tests/_exe/switch.golden @@ -1,8 +1,8 @@ -B -B -B +DABCD +DABCD +ABC A CE ABCE From 6de24504c3d0fe319bd70910215c1800d9718925 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 25 Jan 2025 21:34:51 -0500 Subject: [PATCH 11/89] Fix the bug Turns out it was just an uninitialized label... --- exe.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/exe.c b/exe.c index dd440c8d..544418a3 100644 --- a/exe.c +++ b/exe.c @@ -1733,11 +1733,15 @@ void codegen_statement(ast node) { lbl2 = alloc_label(); // lbl2: next case cgc_add_enclosing_switch(cgc_fs, lbl1, lbl2); + binding = cgc_locals; codegen_rvalue(get_child_(SWITCH_KW, node, 0)); // switch operand - jump(lbl2); // Jump to first case + jump(lbl2); // Jump to first case codegen_statement(get_child_(SWITCH_KW, node, 1)); // switch body + // false jump location of last case + // Reload because the label is updated when a new case is added + lbl2 = heap[binding + 4]; if (heap[lbl2 + 1] >= 0) { def_label(lbl2); // No case statement => jump to end of switch } From d5938d4bd7ff503a7b824c0fb764cab89ffc8499 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 25 Jan 2025 21:37:29 -0500 Subject: [PATCH 12/89] Remove char_width that's no longer useful char_width was used to parameterize the size of characters in strings, but I don't see why we'd want to change this. --- exe.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/exe.c b/exe.c index dd440c8d..e19c0beb 100644 --- a/exe.c +++ b/exe.c @@ -76,8 +76,6 @@ void grow_fs(int words) { cgc_fs += words; } -const int char_width = 1; - const int reg_X; const int reg_Y; const int reg_Z; @@ -485,7 +483,7 @@ int type_width(ast type, int stars, bool array_value, bool word_align) { // Basic type kw switch (get_op(type)) { case CHAR_KW: - return word_align ? word_size : char_width; + return word_align ? word_size : 1; case STRUCT_KW: case UNION_KW: return struct_union_size(type); @@ -1123,20 +1121,11 @@ void codegen_string(int string_probe) { call(lbl); while (string_start != string_end) { - if (char_width == 1) { - emit_i8(*string_start); - } else { - emit_word_le(*string_start); - } + emit_i8(*string_start); string_start += 1; } - - if (char_width == 1) { - emit_i8(0); - } else { - emit_word_le(0); - } + emit_i8(0); def_label(lbl); } From 05f52d75e2354b4dc5f3014337504a45b76dc3cc Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau <16990250+laurenthuberdeau@users.noreply.github.com> Date: Sat, 25 Jan 2025 21:41:55 -0500 Subject: [PATCH 13/89] Pass error location to expect_tok (#140) That way, the location of the expect_tok call in the pnut source code can easily be found. --- pnut.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pnut.c b/pnut.c index e0476b26..2226cea3 100644 --- a/pnut.c +++ b/pnut.c @@ -2267,8 +2267,7 @@ void parse_error_internal(char * msg, int token, char * file, int line) { exit(1); } - -void expect_tok(int expected_tok) { +void expect_tok_(int expected_tok, char* file, int line) { if (tok != expected_tok) { #ifdef NICE_ERR_MSG putstr("expected tok="); print_tok_type(expected_tok); @@ -2277,11 +2276,13 @@ void expect_tok(int expected_tok) { putstr("expected tok="); putint(expected_tok); putstr("\ncurrent tok="); putint(tok); putchar('\n'); #endif - parse_error("unexpected token", tok); + parse_error_internal("unexpected token", tok, file, line); } get_tok(); } +#define expect_tok(expected_tok) expect_tok_(expected_tok, __FILE__, __LINE__) + ast parse_comma_expression(); ast parse_cast_expression(); ast parse_compound_statement(); From b3014e2e33e4833f97c87ae735af13491baa5e20 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 27 Jan 2025 11:09:18 -0500 Subject: [PATCH 14/89] Fail when calling unknown function Before, we'd create a new binding for the function, which was defined when the function was declared. That was required when pnut didn't support forward declaration, but foward declarations are now supported so this can be removed. --- exe.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/exe.c b/exe.c index 544418a3..03718563 100644 --- a/exe.c +++ b/exe.c @@ -977,9 +977,10 @@ void codegen_call(ast node) { int lbl; if (binding == 0) { - lbl = alloc_label(); - cgc_add_global_fun(ident_probe, lbl, 0); - binding = cgc_globals; + putstr("ident = "); + putstr(string_pool + probe_string(ident_probe)); + putchar('\n'); + fatal_error("codegen_call: function not found"); } call(heap[binding+4]); From a739fe5fb32e127b3cbd982007f12753a9ba905f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 27 Jan 2025 11:12:00 -0500 Subject: [PATCH 15/89] exe backend: add check that all labels are defined To make sure we don't pay the cost of checking addresses when bootstrapping, the check is gated by the SAFE_MODE option. --- exe.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/exe.c b/exe.c index 03718563..f61f9d77 100644 --- a/exe.c +++ b/exe.c @@ -299,10 +299,53 @@ enum { GOTO_LABEL, }; +#ifdef SAFE_MODE +int labels[100000]; +int labels_ix = 0; + +void assert_all_labels_defined() { + int i = 0; + int lbl; + // Check that all labels are defined + for (; i < labels_ix; i++) { + lbl = labels[i]; + if (heap[lbl + 1] > 0) { + putstr("Label "); + if (heap[lbl] == GENERIC_LABEL && heap[lbl + 2] != 0) { + putstr((char*) heap[lbl + 2]); + } else { + putint(lbl); + } + putstr(" is not defined\n"); + exit(1); + } + } +} + +void add_label(int lbl) { + labels[labels_ix++] = lbl; +} + +int alloc_label(char* name) { + int lbl = alloc_obj(3); + heap[lbl] = GENERIC_LABEL; + heap[lbl + 1] = 0; // Address of label + heap[lbl + 2] = (intptr_t) name; // Name of label + add_label(lbl); + return lbl; +} +#else + +#define assert_all_labels_defined() // No-op +#define add_label(lbl) // No-op + +#endif + int alloc_label() { int lbl = alloc_obj(2); heap[lbl] = GENERIC_LABEL; heap[lbl + 1] = 0; // Address of label + add_label(lbl); return lbl; } @@ -311,6 +354,7 @@ int alloc_goto_label() { heap[lbl] = GOTO_LABEL; heap[lbl + 1] = 0; // Address of label heap[lbl + 2] = 0; // cgc-fs of label + add_label(lbl); return lbl; } @@ -2096,5 +2140,7 @@ void codegen_end() { rt_crash("printf is not supported yet."); ret(); + assert_all_labels_defined(); + generate_exe(); } From b3be3acadcf730bf17c2661575f4cadaf2df7584 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 27 Jan 2025 11:14:28 -0500 Subject: [PATCH 16/89] Add name to labels to make error more useful --- exe.c | 87 +++++++++++++++++++++++++++++----------------------------- pnut.c | 7 ++++- sh.c | 4 --- x86.c | 8 +++--- 4 files changed, 53 insertions(+), 53 deletions(-) diff --git a/exe.c b/exe.c index f61f9d77..c0e0b6a4 100644 --- a/exe.c +++ b/exe.c @@ -338,16 +338,16 @@ int alloc_label(char* name) { #define assert_all_labels_defined() // No-op #define add_label(lbl) // No-op +#define alloc_label(name) alloc_label_() -#endif - -int alloc_label() { +int alloc_label_() { int lbl = alloc_obj(2); heap[lbl] = GENERIC_LABEL; heap[lbl + 1] = 0; // Address of label add_label(lbl); return lbl; } +#endif int alloc_goto_label() { int lbl = alloc_obj(3); @@ -894,8 +894,8 @@ void codegen_binop(int op, ast lhs, ast rhs) { if (cond != -1) { - lbl1 = alloc_label(); - lbl2 = alloc_label(); + lbl1 = alloc_label(0); + lbl2 = alloc_label(0); jump_cond_reg_reg(cond, lbl1, reg_X, reg_Y); xor_reg_reg(reg_X, reg_X); jump(lbl2); @@ -1161,7 +1161,7 @@ int codegen_lvalue(ast node) { } void codegen_string(int string_probe) { - int lbl = alloc_label(); + int lbl = alloc_label(0); char *string_start = string_pool + heap[string_probe + 1]; char *string_end = string_start + heap[string_probe + 4]; @@ -1370,7 +1370,7 @@ void codegen_rvalue(ast node) { write_mem_location(reg_Y, 0, reg_X, left_width); push_reg(reg_X); } else if (op == AMP_AMP || op == BAR_BAR) { - lbl1 = alloc_label(); + lbl1 = alloc_label(0); codegen_rvalue(child0); pop_reg(reg_X); push_reg(reg_X); @@ -1432,8 +1432,8 @@ void codegen_rvalue(ast node) { } else if (nb_children == 3) { if (op == '?') { - lbl1 = alloc_label(); // false label - lbl2 = alloc_label(); // end label + lbl1 = alloc_label(0); // false label + lbl2 = alloc_label(0); // end label codegen_rvalue(child0); pop_reg(reg_X); grow_fs(-1); @@ -1461,8 +1461,8 @@ void codegen_rvalue(ast node) { void codegen_begin() { - setup_lbl = alloc_label(); - init_start_lbl = alloc_label(); + setup_lbl = alloc_label("setup"); + init_start_lbl = alloc_label("init_start"); init_next_lbl = init_start_lbl; // Make room for heap start and malloc bump pointer. @@ -1476,46 +1476,46 @@ void codegen_begin() { void_type = new_ast0(VOID_KW, 0); void_star_type = new_ast0(VOID_KW, 1); - main_lbl = alloc_label(); + main_lbl = alloc_label("main"); cgc_add_global_fun(init_ident(IDENTIFIER, "main"), main_lbl, void_type); - exit_lbl = alloc_label(); + exit_lbl = alloc_label("exit"); cgc_add_global_fun(init_ident(IDENTIFIER, "exit"), exit_lbl, void_type); - getchar_lbl = alloc_label(); + getchar_lbl = alloc_label("getchar"); cgc_add_global_fun(init_ident(IDENTIFIER, "getchar"), getchar_lbl, char_type); - putchar_lbl = alloc_label(); + putchar_lbl = alloc_label("putchar"); cgc_add_global_fun(init_ident(IDENTIFIER, "putchar"), putchar_lbl, void_type); - fopen_lbl = alloc_label(); + fopen_lbl = alloc_label("fopen"); cgc_add_global_fun(init_ident(IDENTIFIER, "fopen"), fopen_lbl, int_type); - fclose_lbl = alloc_label(); + fclose_lbl = alloc_label("fclose"); cgc_add_global_fun(init_ident(IDENTIFIER, "fclose"), fclose_lbl, void_type); - fgetc_lbl = alloc_label(); + fgetc_lbl = alloc_label("fgetc"); cgc_add_global_fun(init_ident(IDENTIFIER, "fgetc"), fgetc_lbl, char_type); - malloc_lbl = alloc_label(); + malloc_lbl = alloc_label("malloc"); cgc_add_global_fun(init_ident(IDENTIFIER, "malloc"), malloc_lbl, void_star_type); - free_lbl = alloc_label(); + free_lbl = alloc_label("free"); cgc_add_global_fun(init_ident(IDENTIFIER, "free"), free_lbl, char_type); - read_lbl = alloc_label(); + read_lbl = alloc_label("read"); cgc_add_global_fun(init_ident(IDENTIFIER, "read"), read_lbl, int_type); - write_lbl = alloc_label(); + write_lbl = alloc_label("write"); cgc_add_global_fun(init_ident(IDENTIFIER, "write"), write_lbl, int_type); - open_lbl = alloc_label(); + open_lbl = alloc_label("open"); cgc_add_global_fun(init_ident(IDENTIFIER, "open"), open_lbl, int_type); - close_lbl = alloc_label(); + close_lbl = alloc_label("close"); cgc_add_global_fun(init_ident(IDENTIFIER, "close"), close_lbl, int_type); - printf_lbl = alloc_label(); + printf_lbl = alloc_label("printf"); cgc_add_global_fun(init_ident(IDENTIFIER, "printf"), printf_lbl, void_type); jump(setup_lbl); @@ -1596,10 +1596,9 @@ void codegen_glo_var_decl(ast node) { if (get_op(type) != '[') { // not array declaration def_label(init_next_lbl); - init_next_lbl = alloc_label(); + init_next_lbl = alloc_label("init_next"); if (init != 0) { - codegen_rvalue(init); } else { xor_reg_reg(reg_X, reg_X); @@ -1683,8 +1682,8 @@ void codegen_statement(ast node) { if (op == IF_KW) { - lbl1 = alloc_label(); // else statement - lbl2 = alloc_label(); // join point after if + lbl1 = alloc_label(0); // else statement + lbl2 = alloc_label(0); // join point after if codegen_rvalue(get_child_(IF_KW, node, 0)); pop_reg(reg_X); grow_fs(-1); @@ -1698,8 +1697,8 @@ void codegen_statement(ast node) { } else if (op == WHILE_KW) { - lbl1 = alloc_label(); // while statement start - lbl2 = alloc_label(); // join point after while + lbl1 = alloc_label(0); // while statement start + lbl2 = alloc_label(0); // join point after while save_fs = cgc_fs; save_locals = cgc_locals; @@ -1721,9 +1720,9 @@ void codegen_statement(ast node) { } else if (op == FOR_KW) { - lbl1 = alloc_label(); // while statement start - lbl2 = alloc_label(); // join point after while - lbl3 = alloc_label(); // initial loop starting point + lbl1 = alloc_label(0); // while statement start + lbl2 = alloc_label(0); // join point after while + lbl3 = alloc_label(0); // initial loop starting point save_fs = cgc_fs; save_locals = cgc_locals; @@ -1749,8 +1748,8 @@ void codegen_statement(ast node) { } else if (op == DO_KW) { - lbl1 = alloc_label(); // do statement start - lbl2 = alloc_label(); // break point + lbl1 = alloc_label(0); // do statement start + lbl2 = alloc_label(0); // break point save_fs = cgc_fs; save_locals = cgc_locals; @@ -1774,8 +1773,8 @@ void codegen_statement(ast node) { save_fs = cgc_fs; save_locals = cgc_locals; - lbl1 = alloc_label(); // lbl1: end of switch - lbl2 = alloc_label(); // lbl2: next case + lbl1 = alloc_label(0); // lbl1: end of switch + lbl2 = alloc_label(0); // lbl2: next case cgc_add_enclosing_switch(cgc_fs, lbl1, lbl2); binding = cgc_locals; @@ -1806,10 +1805,10 @@ void codegen_statement(ast node) { binding = cgc_lookup_enclosing_switch(cgc_locals); if (binding != 0) { - lbl1 = alloc_label(); // skip case when falling through + lbl1 = alloc_label(0); // skip case when falling through jump(lbl1); def_label(heap[binding + 4]); // false jump location of previous case - heap[binding + 4] = alloc_label(); // create false jump location for current case + heap[binding + 4] = alloc_label(0); // create false jump location for current case dup(reg_X); // duplicate switch operand for the comparison codegen_rvalue(get_child_(CASE_KW, node, 0)); // evaluate case expression and compare it pop_reg(reg_Y); pop_reg(reg_X); grow_fs(-2); @@ -1827,7 +1826,7 @@ void codegen_statement(ast node) { if (binding != 0) { def_label(heap[binding + 4]); // false jump location of previous case - heap[binding + 4] = alloc_label(); // create label for next case (even if default catches all cases) + heap[binding + 4] = alloc_label(0); // create label for next case (even if default catches all cases) codegen_statement(get_child_(DEFAULT_KW, node, 0)); // default statement } else { fatal_error("default outside of switch"); @@ -1936,7 +1935,7 @@ void codegen_glo_fun_decl(ast node) { binding = cgc_lookup_fun(name, cgc_globals); if (binding == 0) { - lbl = alloc_label(); + lbl = alloc_label(STRING_BUF(name)); cgc_add_global_fun(name, lbl, fun_type); binding = cgc_globals; } @@ -2002,7 +2001,7 @@ void rt_crash(char* msg) { } void rt_malloc() { - int end_lbl = alloc_label(); + int end_lbl = alloc_label("rt_malloc_success"); mov_reg_mem(reg_Y, reg_glo, word_size); // Bump pointer add_reg_reg(reg_X, reg_Y); // New bump pointer @@ -2029,7 +2028,7 @@ void rt_free() { void codegen_end() { - int glo_setup_loop_lbl = alloc_label(); + int glo_setup_loop_lbl = alloc_label("glo_setup_loop"); def_label(setup_lbl); diff --git a/pnut.c b/pnut.c index e0476b26..cb4b6e2a 100644 --- a/pnut.c +++ b/pnut.c @@ -6,6 +6,11 @@ #include #include // for intptr_t +#ifdef PNUT_CC +// On pnut, intptr_t is not defined +#define intptr_t int +#endif + #define ast int #define true 1 #define false 0 @@ -264,7 +269,7 @@ int hash; #define HASH_PARAM 1026 #define HASH_PRIME 1009 #define HEAP_SIZE 200000 -int heap[HEAP_SIZE]; +intptr_t heap[HEAP_SIZE]; int heap_alloc = HASH_PRIME; int alloc_result; diff --git a/sh.c b/sh.c index b9531afd..eacbe968 100644 --- a/sh.c +++ b/sh.c @@ -28,10 +28,6 @@ void handle_shell_include() { #define text int #define TEXT_POOL_SIZE 1000000 -#ifdef PNUT_CC -// On pnut, intptr_t is not defined -#define intptr_t int -#endif intptr_t text_pool[TEXT_POOL_SIZE]; int text_alloc = 1; // Start at 1 because 0 is the empty text diff --git a/x86.c b/x86.c index 0c9cdefa..ae0b3d5f 100644 --- a/x86.c +++ b/x86.c @@ -445,7 +445,7 @@ void setup_proc_args(int global_vars_size) { #ifdef target_i386_linux void os_getchar() { - int lbl = alloc_label(); + int lbl = alloc_label("get_char_eof"); push_reg(BX); // save address of global variables table mov_reg_imm(AX, 0); // mov eax, 0 push_reg(AX); // push eax # buffer to read byte @@ -494,7 +494,7 @@ void os_fclose() { } void os_fgetc() { - int lbl = alloc_label(); // label for EOF + int lbl = alloc_label("fgetc_eof"); // label for EOF push_reg(BX); // save address of global variables table mov_reg_reg(BX, reg_X); // mov ebx, file descriptor mov_reg_imm(AX, 3); // mov eax, 3 == SYS_READ @@ -600,7 +600,7 @@ void os_close() { #ifdef SYSTEM_V_ABI void os_getchar() { - int lbl = alloc_label(); + int lbl = alloc_label("get_char_eof"); mov_reg_imm(AX, 0); // mov eax, 0 push_reg(AX); // push eax # buffer to read byte mov_reg_imm(DI, 0); // mov edi, 0 # edi = 0 = STDIN @@ -641,7 +641,7 @@ void os_fclose() { } void os_fgetc() { - int lbl = alloc_label(); // label for EOF + int lbl = alloc_label("fgetc_eof"); // label for EOF mov_reg_reg(DI, reg_X); // mov edi, file descriptor mov_reg_imm(AX, 0); // mov eax, 0 push_reg(AX); // push eax # buffer to read byte From 402e050e2d5069da82b92899b53f4dc47336f131 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 27 Jan 2025 11:32:27 -0500 Subject: [PATCH 17/89] Fix tests --- tests/_all/six-cc-tests/even-odd.c | 67 +++++++++++----------------- tests/_all/six-cc-tests/winter-pi2.c | 11 +---- 2 files changed, 29 insertions(+), 49 deletions(-) diff --git a/tests/_all/six-cc-tests/even-odd.c b/tests/_all/six-cc-tests/even-odd.c index fc65390b..eafbb618 100644 --- a/tests/_all/six-cc-tests/even-odd.c +++ b/tests/_all/six-cc-tests/even-odd.c @@ -1,70 +1,57 @@ -void putnumber(int n) { - int acc = 0; - int i = 0; - int *digits = malloc(10 * sizeof(int)); // Dynamically allocate memory for digits +#include - if (digits == 0) { - putstring("Memory allocation failed\n"); - return; +void putstr(char *str) { + while (*str) { + putchar(*str); + str += 1; } +} - if (n == 0) { - putchar(48); - free(digits); // Free allocated memory - return; - } +void putint_aux(int n) { + if (n <= -10) putint_aux(n / 10); + putchar('0' - (n % 10)); +} - while (n > 0) { - digits[i] = n % 10; - n = n / 10; - i++; +void putint(int n) { + if (n < 0) { + putchar('-'); + putint_aux(n); + } else { + putint_aux(-n); } - i--; - while (i >= 0) { - putchar(digits[i] + 48); - i--; - } - - free(digits); // Free allocated memory } -int abs(int number) -{ +int abs(int number); +int even(int number); +int odd(int number); + +int abs(int number) { if(number < 0) return -number; return number; } -int even(int number) -{ +int even(int number) { int a; /* Local variable so that the function is not simple */ if(number == 0) return 1; return odd(abs(number)-1); } -int odd(int number) -{ +int odd(int number) { int a; /* Local variable so that the function is not simple */ if( number == 0 ) return 0; return even(abs(number)-1); } -void putstring(char *s) { - while (*s) { - putchar(*s); - s = s + 1; - } -} - int main() { int n1; int n2; n1 = even(10); n2 = odd(10); - putstring("n1 = "); - putnumber(n1); + putstr("n1 = "); + putint(n1); putchar('\n'); - putstring("n2 = "); - putnumber(n2); + putstr("n2 = "); + putint(n2); putchar('\n'); return 0; } diff --git a/tests/_all/six-cc-tests/winter-pi2.c b/tests/_all/six-cc-tests/winter-pi2.c index 8ba7331f..206191a9 100644 --- a/tests/_all/six-cc-tests/winter-pi2.c +++ b/tests/_all/six-cc-tests/winter-pi2.c @@ -18,10 +18,7 @@ int d; int c = 0; int main() { - int newline; - int newline2; int i = 0; - newline = identity(10, 2, 3); while (i < 2800) { r[i] = 2000; @@ -54,11 +51,7 @@ int main() { k = k - 14; } - putchar(newline); + putchar('\n'); return 0; -} - -int identity(int x, int y, int z) { - return x; -} +} \ No newline at end of file From 45d10f88fc75be15c64319bc46cca11d605939c7 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 27 Jan 2025 11:34:35 -0500 Subject: [PATCH 18/89] Fix debug.c --- debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debug.c b/debug.c index 1b965d0e..a0586dbd 100644 --- a/debug.c +++ b/debug.c @@ -196,7 +196,7 @@ void show_ast(char* name, ast obj) { int nb_children = get_nb_children(obj); if (nb_children == 0) nb_children = 1; // Account for value of ast nodes with no child for (i = 0; i < nb_children + 1; i++) { - printf("%s[%d] = %d\n", name, i, heap[obj + i]); + printf("%s[%d] = %d\n", name, i, (int) heap[obj + i]); } } From 633d4994fd45b30cffc341cc6d7b363dade5e85e Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 27 Jan 2025 11:43:47 -0500 Subject: [PATCH 19/89] Remove unused var --- exe.c | 1 - 1 file changed, 1 deletion(-) diff --git a/exe.c b/exe.c index a066738b..67c33a1b 100644 --- a/exe.c +++ b/exe.c @@ -1016,7 +1016,6 @@ void codegen_call(ast node) { ast nb_params = codegen_params(params); int binding = cgc_lookup_fun(ident_probe, cgc_globals); - int lbl; if (binding == 0) { putstr("ident = "); From 60200dc4ea3194810d56e0fb70caa16c1e569443 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 28 Jan 2025 13:25:13 -0500 Subject: [PATCH 20/89] Check local variable declaration name and type --- sh.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sh.c b/sh.c index 9bc9f89f..da1cb322 100644 --- a/sh.c +++ b/sh.c @@ -566,7 +566,7 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for } } -void check_param_decls(ast lst) { +void check_decls(ast lst) { while (lst != 0) { assert_var_decl_is_safe(get_child__(',', DECL, lst, 0), true); lst = get_child_(',', lst, 1); @@ -2014,6 +2014,7 @@ void comp_var_decls(ast node) { while (node != 0) { // Add to local env and cummulative env, then initialize var_decl = get_child__(',', DECL, node, 0); + assert_var_decl_is_safe(var_decl, true); add_var_to_local_env(var_decl, BINDING_VAR_LOCAL); if (get_child_(DECL, var_decl, 2) != 0) { // Initializer comp_assignment(get_child__(DECL, IDENTIFIER, var_decl, 0), get_child_(DECL, var_decl, 2)); @@ -2124,7 +2125,7 @@ void comp_glo_fun_decl(ast node) { top_level_stmt = false; - check_param_decls(params); + check_decls(params); add_fun_params_to_local_env(params); // If the function is main From cd23d1759cb1c8fccb5a6a0ec38a28bfaf6d76c5 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 28 Jan 2025 13:26:33 -0500 Subject: [PATCH 21/89] Shorten pnut-sh error handling --- sh.c | 161 ++++++++++++++++++++++++----------------------------------- 1 file changed, 65 insertions(+), 96 deletions(-) diff --git a/sh.c b/sh.c index da1cb322..249e443d 100644 --- a/sh.c +++ b/sh.c @@ -239,7 +239,6 @@ void print_escaped_text(text t, bool for_printf) { } else if (text_pool[t] == TEXT_FROM_INT(TEXT_ESCAPED)) { fatal_error("Cannot escape a string that is already escaped"); } else { - printf("\nt=%d %d\n", t, TEXT_TO_INT(text_pool[t])); fatal_error("print_escaped_text: unexpected string tree node"); } } @@ -276,7 +275,6 @@ void print_text(text t) { } else if (text_pool[t] == TEXT_FROM_INT(TEXT_ESCAPED)) { print_escaped_text(TEXT_TO_INT(text_pool[t + 1]), TEXT_TO_INT(text_pool[t + 2])); } else { - printf("\nt=%d %d\n", t, TEXT_TO_INT(text_pool[t])); fatal_error("print_text: unexpected string tree node"); } } @@ -542,7 +540,7 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for // In zsh, writing to argv assigns to $@, so we map argv to argv_, and forbid argv_. // This check only applies to local variables because globals are prefixed with _. if (local && (ident_probe == ARGV__ID || ident_probe == IFS_ID)) { - printf("%s ", name); + printf("\"%s\" ", name); fatal_error("variable name is invalid. It can't be 'IFS' or 'argv_'."); } @@ -550,7 +548,7 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for // Local variables don't correspond to memory locations, and can't store // more than 1 number/pointer. if (get_op(type) == '[' || get_op(type) == STRUCT_KW) { - printf("%s ", name); + printf("\"%s\" variable: ", name); fatal_error("array/struct value type is not supported for shell backend. Use a reference type instead."); } } else { @@ -560,7 +558,7 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for if ( (get_op(type) == '[' && get_op(get_child_('[', type, 0)) == STRUCT_KW) // Array of structs || (get_op(type) == '[' && get_op(get_child_('[', type, 0)) == '[') // Array of arrays || get_op(type) == STRUCT_KW) { // Struct value type - printf("%s ", name); + printf("\"%s\" variable: ", name); fatal_error("array of struct and struct value type are not supported in shell backend. Use a reference type instead."); } } @@ -920,7 +918,7 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { literals_inits = new_ast2(',', new_ast2('=', sub1, get_val_(STRING, node)), literals_inits); return sub1; } else { - printf("handle_side_effects_go: op=%d %c", op, op); + printf("op=%d %c", op, op); fatal_error("unexpected operator"); return 0; } @@ -934,7 +932,7 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { } else if (op == SIZEOF_KW) { return node; // sizeof is a compile-time operator } else { - printf("1: op=%d %c", op, op); + printf("op=%d %c", op, op); fatal_error("unexpected operator"); return 0; } @@ -979,35 +977,24 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { } else if (op == CAST) { return new_ast2(CAST, child0, handle_side_effects_go(child1, executes_conditionally)); } else { - printf("2: op=%d %c", op, op); fatal_error("unexpected operator"); return 0; } - } else if (nb_children == 3) { - if (op == '?') { - previous_conditional_fun_calls = conditional_fun_calls; - conditional_fun_calls = 0; - sub1 = handle_side_effects_go(child1, true); - left_conditional_fun_calls = conditional_fun_calls; - conditional_fun_calls = 0; - sub2 = handle_side_effects_go(child2, true); - right_conditional_fun_calls = conditional_fun_calls; - if (left_conditional_fun_calls != 0 || right_conditional_fun_calls != 0) { - fatal_error("Conditional function calls in ternary operator not allowed"); - } - - return new_ast3('?', handle_side_effects_go(child0, executes_conditionally), sub1, sub2); - } else { - printf("3: op=%d %c\n", op, op); - fatal_error("unexpected operator"); - return 0; + } else if (nb_children == 3 && op == '?') { + previous_conditional_fun_calls = conditional_fun_calls; + conditional_fun_calls = 0; + sub1 = handle_side_effects_go(child1, true); + left_conditional_fun_calls = conditional_fun_calls; + conditional_fun_calls = 0; + sub2 = handle_side_effects_go(child2, true); + right_conditional_fun_calls = conditional_fun_calls; + if (left_conditional_fun_calls != 0 || right_conditional_fun_calls != 0) { + fatal_error("Conditional function calls in ternary operator not allowed"); } - } else if (nb_children == 4) { - printf("4: op=%d %c\n", op, op); - fatal_error("unexpected operator"); - return 0; + + return new_ast3('?', handle_side_effects_go(child0, executes_conditionally), sub1, sub2); } else { - printf("5: op=%d %c with %d children\n", op, op, get_nb_children(node)); + printf("op=%d %c with %d children\n", op, op, get_nb_children(node)); fatal_error("unexpected operator"); return 0; } @@ -1141,9 +1128,6 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } else if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR) { if (context == RVALUE_CTX_ARITH_EXPANSION) { return env_var_with_prefix(node, false); } else { return wrap_in_condition_if_needed(context, test_side_effects, string_concat(wrap_char('$'), env_var_with_prefix(node, true))); } - } else if (op == STRING) { - fatal_error("comp_rvalue_go: string should have been removed by handle_side_effects"); - return 0; } else { printf("op=%d %c", op, op); fatal_error("comp_rvalue_go: unknown rvalue with nb_children == 0"); @@ -1201,20 +1185,20 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } else if (get_op(child0) == STRUCT_KW) { return wrap_if_needed(false, context, test_side_effects, struct_sizeof_var(get_child__(STRUCT_KW, IDENTIFIER, child0, 1)), outer_op, op); } else { - printf("op=%d %c", get_op(child0), get_op(child0)); - printf("op=%d %c", get_op(get_child(child0, 1)), get_op(get_child(child0, 1))); + printf("op=%d %c\n", get_op(child0), get_op(child0)); + printf("op=%d %c\n", get_op(get_child(child0, 1)), get_op(get_child(child0, 1))); fatal_error("comp_rvalue_go: sizeof is not supported for this type or expression"); return 0; } } else { - printf("op=%d %c", get_op(child0), get_op(child0)); + printf("op=%d %c\n", get_op(child0), get_op(child0)); fatal_error("comp_rvalue_go: sizeof is not supported for this type or expression"); return 0; } } else if (op == '&') { return wrap_if_needed(false, context, test_side_effects, comp_lvalue_address(child0), outer_op, op); } else { - printf("1: op=%d %c", op, op); + printf("op=%d %c", op, op); fatal_error("comp_rvalue_go: unexpected operator"); return 0; } @@ -1247,75 +1231,60 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } } else if (op == CAST) { // Casts are no-op return comp_rvalue_go(child1, context, 0, op); - } else if (op == AMP_AMP || op == BAR_BAR) { - fatal_error("comp_rvalue_go: && and || should have 4 children by that point"); - return 0; } else { fatal_error("comp_rvalue_go: unknown rvalue with 2 children"); return 0; } - } else if (nb_children == 3) { - if (op == '?') { - sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub3 = comp_rvalue_go(child2, RVALUE_CTX_ARITH_EXPANSION, 0, op); - return wrap_if_needed(true, context, test_side_effects, string_concat5(sub1, op_to_str(op), sub2, wrap_str_lit(": "), sub3), outer_op, op); - return 0; - } else { - printf("op=%d %c\n", op, op); - fatal_error("comp_rvalue_go: unknown rvalue with 3 children"); - return 0; - } - } else if (nb_children == 4) { - if (op == AMP_AMP || op == BAR_BAR) { - // Note, this could also be compiled in a single [ ] block using -a and - // -o, which I think are POSIX compliant but are deprecated. - if (context == RVALUE_CTX_TEST) { - // When compiling in a test context, && and || can be compiled to - // Shell's && and || with [ ... ] blocks. - // - // A notable difference between these operators in Shell and C is that - // in Shell, they have equal precedence while in C, && has higher - // precedence. This means that we need to add parenthesis that would not - // be needed in C. - // - // As a heuristic, we add parenthesis whenever the left or right side of - // the operator is a different comparison operator. - sub1 = non_parenthesized_operand(child0); // un-parenthesized lhs - sub2 = non_parenthesized_operand(child1); // un-parenthesized rhs - - // if lhs is && or ||, and different from the current operator - if ((get_op(sub1) == AMP_AMP || get_op(sub1) == BAR_BAR) && get_op(sub1) != op) { - sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, child2, op); - sub1 = string_concat3(wrap_str_lit("{ "), sub1, wrap_str_lit("; }")); - } else { - sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, child2, op); - } + } else if (nb_children == 3 && op == '?') { + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub3 = comp_rvalue_go(child2, RVALUE_CTX_ARITH_EXPANSION, 0, op); + return wrap_if_needed(true, context, test_side_effects, string_concat5(sub1, op_to_str(op), sub2, wrap_str_lit(": "), sub3), outer_op, op); + return 0; + } else if (nb_children == 4 && (op == AMP_AMP || op == BAR_BAR)) { + // Note, this could also be compiled in a single [ ] block using -a and + // -o, which I think are POSIX compliant but are deprecated. + if (context == RVALUE_CTX_TEST) { + // When compiling in a test context, && and || can be compiled to + // Shell's && and || with [ ... ] blocks. + // + // A notable difference between these operators in Shell and C is that + // in Shell, they have equal precedence while in C, && has higher + // precedence. This means that we need to add parenthesis that would not + // be needed in C. + // + // As a heuristic, we add parenthesis whenever the left or right side of + // the operator is a different comparison operator. + sub1 = non_parenthesized_operand(child0); // un-parenthesized lhs + sub2 = non_parenthesized_operand(child1); // un-parenthesized rhs + + // if lhs is && or ||, and different from the current operator + if ((get_op(sub1) == AMP_AMP || get_op(sub1) == BAR_BAR) && get_op(sub1) != op) { + sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, child2, op); + sub1 = string_concat3(wrap_str_lit("{ "), sub1, wrap_str_lit("; }")); + } else { + sub1 = comp_rvalue_go(sub1, RVALUE_CTX_TEST, child2, op); + } - // if rhs is && or ||, and different from the current operator - if ((get_op(sub2) == AMP_AMP || get_op(sub2) == BAR_BAR) && get_op(sub2) != op) { - sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, child3, op); - sub2 = string_concat3(wrap_str_lit("{ "), sub2, wrap_str_lit("; }")); - } else { - sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, child3, op); - } - return string_concat3(sub1, op_to_str(op), sub2); + // if rhs is && or ||, and different from the current operator + if ((get_op(sub2) == AMP_AMP || get_op(sub2) == BAR_BAR) && get_op(sub2) != op) { + sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, child3, op); + sub2 = string_concat3(wrap_str_lit("{ "), sub2, wrap_str_lit("; }")); } else { - if (test_side_effects != 0 || child2 != 0 || child3 != 0) { - fatal_error("comp_rvalue_go: && and || with function calls can only be used in tests"); - } - sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); - sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); - return wrap_if_needed(true, context, test_side_effects, string_concat3(sub1, op_to_str(op), sub2), outer_op, op); + sub2 = comp_rvalue_go(sub2, RVALUE_CTX_TEST, child3, op); } + return string_concat3(sub1, op_to_str(op), sub2); } else { - printf("op=%d %c\n", op, op); - fatal_error("comp_rvalue_go: unknown rvalue with 4 children"); - return 0; + if (test_side_effects != 0 || child2 != 0 || child3 != 0) { + fatal_error("comp_rvalue_go: && and || with function calls can only be used in tests"); + } + sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); + sub2 = comp_rvalue_go(child1, RVALUE_CTX_ARITH_EXPANSION, 0, op); + return wrap_if_needed(true, context, test_side_effects, string_concat3(sub1, op_to_str(op), sub2), outer_op, op); } } else { printf("op=%d %c\n", op, op); - fatal_error("comp_rvalue_go: unknown rvalue with >4 children"); + fatal_error("comp_rvalue_go: unknown rvalue"); return 0; } } From 82dcc39308cffc5cbddf9156dcb3429bd8d6a462 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 28 Jan 2025 13:26:56 -0500 Subject: [PATCH 22/89] Don't insert ':' in empty function with local vars Empty functions with local variables have let/endlet calls so the ':' statement is not necessary. --- sh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sh.c b/sh.c index 249e443d..f9a28033 100644 --- a/sh.c +++ b/sh.c @@ -2140,7 +2140,6 @@ void comp_glo_fun_decl(ast node) { params_ix = 2; while (params != 0) { var = get_child__(',', DECL, params, 0); - // TODO: Constant param optimization // Constant parameters don't need to be initialized comp_assignment(get_child_(DECL, var, 0), new_ast0(IDENTIFIER_DOLLAR, params_ix)); @@ -2150,8 +2149,6 @@ void comp_glo_fun_decl(ast node) { #endif comp_body(body, STMT_CTX_DEFAULT); - // functions cannot be empty so we insert ':' if it's empty - if (!any_active_glo_decls(start_glo_decl_idx)) append_glo_decl(wrap_char(':')); // Set local environment to cummulative for the save_local_vars/restore_local_vars cgc_locals = cgc_locals_fun; @@ -2166,6 +2163,9 @@ void comp_glo_fun_decl(ast node) { rest_loc_var_fixups = get_child_opt_(',', ',', rest_loc_var_fixups, 1); } + // functions cannot be empty so we insert ':' if it's empty + if (!any_active_glo_decls(start_glo_decl_idx)) append_glo_decl(wrap_char(':')); + nest_level -= 1; append_glo_decl(wrap_str_lit("}\n")); From 0ae6e820188e348eddf151ae343888d7498b3a23 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 28 Jan 2025 13:28:29 -0500 Subject: [PATCH 23/89] Add tests for pnut-sh safe guard in codegen Because not everything in C is supported by pnut-sh, there are safeguards in place that prevent the use of unsupported constructs. The tests allow us to confirm that the recent parser changes haven't broken the checks. --- run-tests.sh | 68 +++++++++++++++---- tests/_sh/checks/address_of_global.c | 6 ++ tests/_sh/checks/address_of_global.golden | 1 + tests/_sh/checks/address_of_local.c | 5 ++ tests/_sh/checks/address_of_local.golden | 1 + tests/_sh/checks/break_outside_loop.c | 4 ++ tests/_sh/checks/break_outside_loop.golden | 1 + tests/_sh/checks/continue_outside_loop.c | 4 ++ tests/_sh/checks/continue_outside_loop.golden | 1 + tests/_sh/checks/global_var_underscore.c | 6 ++ tests/_sh/checks/global_var_underscore.golden | 1 + tests/_sh/checks/invalid_printf.c | 4 ++ tests/_sh/checks/invalid_printf.golden | 1 + tests/_sh/checks/local_array_arg.c | 4 ++ tests/_sh/checks/local_array_arg.golden | 1 + tests/_sh/checks/local_array_param.c | 9 +++ tests/_sh/checks/local_array_param.golden | 1 + tests/_sh/checks/local_struct_arg.c | 9 +++ tests/_sh/checks/local_struct_arg.golden | 1 + tests/_sh/checks/local_struct_param.c | 9 +++ tests/_sh/checks/local_struct_param.golden | 1 + tests/_sh/checks/local_var_shadowing.c | 5 ++ tests/_sh/checks/local_var_shadowing.golden | 1 + tests/_sh/checks/local_var_underscore.c | 4 ++ tests/_sh/checks/local_var_underscore.golden | 1 + .../checks/shortcut_eval_outside_condition.c | 6 ++ .../shortcut_eval_outside_condition.golden | 1 + tests/_sh/checks/sizeof_array.c | 6 ++ tests/_sh/checks/sizeof_array.golden | 1 + tests/_sh/checks/sizeof_expr.c | 4 ++ tests/_sh/checks/sizeof_expr.golden | 1 + tests/_sh/checks/struct_no_nested_array.c | 10 +++ .../_sh/checks/struct_no_nested_array.golden | 1 + tests/_sh/checks/struct_no_nested_struct.c | 12 ++++ .../_sh/checks/struct_no_nested_struct.golden | 1 + tests/_sh/checks/switch_early_exit.c | 13 ++++ tests/_sh/checks/switch_early_exit.golden | 1 + tests/_sh/checks/switch_fallthrough.c | 10 +++ tests/_sh/checks/switch_fallthrough.golden | 1 + tests/_sh/checks/switch_no_body.c | 4 ++ tests/_sh/checks/switch_no_body.golden | 1 + tests/_sh/checks/ternary_with_fun_call.c | 5 ++ tests/_sh/checks/ternary_with_fun_call.golden | 1 + tests/_sh/checks/var_IFS.c | 4 ++ tests/_sh/checks/var_IFS.golden | 1 + 45 files changed, 220 insertions(+), 13 deletions(-) create mode 100644 tests/_sh/checks/address_of_global.c create mode 100644 tests/_sh/checks/address_of_global.golden create mode 100644 tests/_sh/checks/address_of_local.c create mode 100644 tests/_sh/checks/address_of_local.golden create mode 100644 tests/_sh/checks/break_outside_loop.c create mode 100644 tests/_sh/checks/break_outside_loop.golden create mode 100644 tests/_sh/checks/continue_outside_loop.c create mode 100644 tests/_sh/checks/continue_outside_loop.golden create mode 100644 tests/_sh/checks/global_var_underscore.c create mode 100644 tests/_sh/checks/global_var_underscore.golden create mode 100644 tests/_sh/checks/invalid_printf.c create mode 100644 tests/_sh/checks/invalid_printf.golden create mode 100644 tests/_sh/checks/local_array_arg.c create mode 100644 tests/_sh/checks/local_array_arg.golden create mode 100644 tests/_sh/checks/local_array_param.c create mode 100644 tests/_sh/checks/local_array_param.golden create mode 100644 tests/_sh/checks/local_struct_arg.c create mode 100644 tests/_sh/checks/local_struct_arg.golden create mode 100644 tests/_sh/checks/local_struct_param.c create mode 100644 tests/_sh/checks/local_struct_param.golden create mode 100644 tests/_sh/checks/local_var_shadowing.c create mode 100644 tests/_sh/checks/local_var_shadowing.golden create mode 100644 tests/_sh/checks/local_var_underscore.c create mode 100644 tests/_sh/checks/local_var_underscore.golden create mode 100644 tests/_sh/checks/shortcut_eval_outside_condition.c create mode 100644 tests/_sh/checks/shortcut_eval_outside_condition.golden create mode 100644 tests/_sh/checks/sizeof_array.c create mode 100644 tests/_sh/checks/sizeof_array.golden create mode 100644 tests/_sh/checks/sizeof_expr.c create mode 100644 tests/_sh/checks/sizeof_expr.golden create mode 100644 tests/_sh/checks/struct_no_nested_array.c create mode 100644 tests/_sh/checks/struct_no_nested_array.golden create mode 100644 tests/_sh/checks/struct_no_nested_struct.c create mode 100644 tests/_sh/checks/struct_no_nested_struct.golden create mode 100644 tests/_sh/checks/switch_early_exit.c create mode 100644 tests/_sh/checks/switch_early_exit.golden create mode 100644 tests/_sh/checks/switch_fallthrough.c create mode 100644 tests/_sh/checks/switch_fallthrough.golden create mode 100644 tests/_sh/checks/switch_no_body.c create mode 100644 tests/_sh/checks/switch_no_body.golden create mode 100644 tests/_sh/checks/ternary_with_fun_call.c create mode 100644 tests/_sh/checks/ternary_with_fun_call.golden create mode 100644 tests/_sh/checks/var_IFS.c create mode 100644 tests/_sh/checks/var_IFS.golden diff --git a/run-tests.sh b/run-tests.sh index 9800ce23..fbadd4ae 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -135,10 +135,20 @@ test_args() { # not relevant to the bootstrap process. # // expect_failure_for: bash-2* # // expect_failure_for: yash -test_expect_falure_for_shells() { +test_expect_failure_for_shells() { echo `sed -n -e "/\/\/ expect_failure_for: /p" "$1" | sed -e "s/^\/\/ expect_failure_for: //"` } +# Some tests are expected to fail with a compilation error +# // expect_comp_failure +test_expect_comp_failure() { + if grep -q "// expect_comp_failure" "$1"; then + return 1 + else + return 0 + fi +} + # Some tests take a long time to run, so we set a timeout to prevent infinite # loops However, we don't want to set a high timeout for all tests, so we have # an option to set a specific timeout. @@ -147,7 +157,7 @@ test_timeout() { } test_expect_failure_for_shell() { # file: $1 - failing_shells=$(test_expect_falure_for_shells "$1") + failing_shells=$(test_expect_failure_for_shells "$1") for failing_shell in $failing_shells; do failing_shell_name=$(echo "$failing_shell" | sed 's/-.*//') failing_shell_version=$(echo "$failing_shell" | sed 's/.*-//') @@ -190,19 +200,23 @@ compile_test() { # c file: $1 run_test() { # file_to_test: $1 file="$1" - filename=$(basename "$file" .c) # Get the filename without extension - dir=$(dirname "$file") # Get the directory of the test file + filename=$(basename "$file" .c) # Get the filename without extension + dir=$(dirname "$file") # Get the directory of the test file + golden_file="$dir/$filename.golden" # Path of the expected output + + failed_pnut_comp=0 # Flag to indicate if compilation failed - golden_file="$dir/$filename.golden" + expect_failed_comp=0 # Flag to indicate if compilation is expected to fail + test_expect_comp_failure "$file" || expect_failed_comp=1 # Print file name before generating golden file so we know it's getting processed printf "$file: " # Generate golden file if it doesn't exist if [ ! -f "$golden_file" ]; then - compile_test "$file" > "$dir/$filename.$ext" 2> "$dir/$filename.pnut.err" && \ - gcc "$file" $(test_comp_options $file) -o "$dir/$filename-gcc.$ext" 2> "$dir/$filename.gcc.err" - if [ $? -eq 0 ]; then + compile_test "$file" > "$dir/$filename.$ext" && \ + gcc "$file" $(test_comp_options $file) -o "$dir/$filename-gcc.$ext" 2> "$dir/$filename-by-gcc.err" + if [ $? -eq 0 ] && [ "$expect_failed_comp" -eq 0 ]; then chmod +x "$dir/$filename.$ext" execute_test "$dir/$filename.$ext" "$(test_timeout $file)" "$(test_args $file)" > "$dir/$filename.output" $dir/$filename-gcc.$ext $(test_args $file) > "$dir/$filename-gcc.output" @@ -213,16 +227,23 @@ run_test() { # file_to_test: $1 echo "❌ Program compiled by gcc and pnut produced different outputs" fi + elif [ "$expect_failed_comp" -eq 1 ]; then + + echo "🟡 Golden file generated by pnut" + tail -n 1 "$dir/$filename.$ext" > "$golden_file" # Save the error message which is the last line + else - echo "❌ Failed to compile with pnut. See $dir/$filename.pnut.err and $dir/$filename.gcc.err" + echo "❌ Failed to compile with pnut. See $dir/$filename.$ext and $dir/$filename-by-gcc.err" fi return 1 fi # Compile the test file with pnut.exe - compile_test "$file" > "$dir/$filename.$ext" 2> "$dir/$filename.err" + compile_test "$file" > "$dir/$filename.$ext" + compile_test_exit_code="$?" + + if [ "$compile_test_exit_code" -eq 0 ] && [ "$expect_failed_comp" -eq 0 ]; then # If compilation was successful and not expected to fail - if [ $? -eq 0 ]; then # If compilation was successful if [ "$compile_only" -eq 1 ]; then echo "✅ Compiled $file" return 0 @@ -251,9 +272,30 @@ run_test() { # file_to_test: $1 echo "❌ Failed to run: $(cat "$dir/$filename.err")" return 1 fi - else - echo "❌ Failed to compile with pnut: $(cat "$dir/$filename.err")" + + elif [ "$expect_failed_comp" -eq 1 ]; then # Compilation failed as expected + + if [ "$compile_test_exit_code" -eq 0 ]; then + echo "❌ Compilation succeeded when it should have failed" + return 1 + else + diff_out=$(tail -n 1 "$dir/$filename.$ext" | diff - "$dir/$filename.golden") + if [ $? -eq 0 ]; then # If the error message matches the golden file + echo "✅ Test passed (compilation failed as expected)" + return 0 + else + echo "❌ Compilation failed for a different reason than expected:" + echo "diff (error vs expected)" + echo "$diff_out" + return 1 + fi + fi + + else # Compilation failed when it should have succeeded + + echo "❌ Failed to compile with pnut: $(cat "$dir/$filename.$ext")" return 1 + fi } diff --git a/tests/_sh/checks/address_of_global.c b/tests/_sh/checks/address_of_global.c new file mode 100644 index 00000000..a73f037c --- /dev/null +++ b/tests/_sh/checks/address_of_global.c @@ -0,0 +1,6 @@ +// expect_comp_failure +int a; + +void main() { + int* b = &a; +} diff --git a/tests/_sh/checks/address_of_global.golden b/tests/_sh/checks/address_of_global.golden new file mode 100644 index 00000000..6b2fe82a --- /dev/null +++ b/tests/_sh/checks/address_of_global.golden @@ -0,0 +1 @@ +comp_rvalue_go: can't take the address of a local variable diff --git a/tests/_sh/checks/address_of_local.c b/tests/_sh/checks/address_of_local.c new file mode 100644 index 00000000..8afd8a52 --- /dev/null +++ b/tests/_sh/checks/address_of_local.c @@ -0,0 +1,5 @@ +// expect_comp_failure +void main() { + int a; + int* b = &a; +} diff --git a/tests/_sh/checks/address_of_local.golden b/tests/_sh/checks/address_of_local.golden new file mode 100644 index 00000000..6b2fe82a --- /dev/null +++ b/tests/_sh/checks/address_of_local.golden @@ -0,0 +1 @@ +comp_rvalue_go: can't take the address of a local variable diff --git a/tests/_sh/checks/break_outside_loop.c b/tests/_sh/checks/break_outside_loop.c new file mode 100644 index 00000000..6390fca5 --- /dev/null +++ b/tests/_sh/checks/break_outside_loop.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + break; +} diff --git a/tests/_sh/checks/break_outside_loop.golden b/tests/_sh/checks/break_outside_loop.golden new file mode 100644 index 00000000..1612b4cb --- /dev/null +++ b/tests/_sh/checks/break_outside_loop.golden @@ -0,0 +1 @@ +comp_statement: break not in loop or switch diff --git a/tests/_sh/checks/continue_outside_loop.c b/tests/_sh/checks/continue_outside_loop.c new file mode 100644 index 00000000..67e14312 --- /dev/null +++ b/tests/_sh/checks/continue_outside_loop.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + continue; +} diff --git a/tests/_sh/checks/continue_outside_loop.golden b/tests/_sh/checks/continue_outside_loop.golden new file mode 100644 index 00000000..f2c3341e --- /dev/null +++ b/tests/_sh/checks/continue_outside_loop.golden @@ -0,0 +1 @@ +comp_statement: continue not in loop diff --git a/tests/_sh/checks/global_var_underscore.c b/tests/_sh/checks/global_var_underscore.c new file mode 100644 index 00000000..edc026bb --- /dev/null +++ b/tests/_sh/checks/global_var_underscore.c @@ -0,0 +1,6 @@ +// expect_comp_failure +int _IFS; + +void main() { + return; +} diff --git a/tests/_sh/checks/global_var_underscore.golden b/tests/_sh/checks/global_var_underscore.golden new file mode 100644 index 00000000..36b4e5cf --- /dev/null +++ b/tests/_sh/checks/global_var_underscore.golden @@ -0,0 +1 @@ +_IFS variable name is invalid. It can't start or end with '_'. diff --git a/tests/_sh/checks/invalid_printf.c b/tests/_sh/checks/invalid_printf.c new file mode 100644 index 00000000..bb456a92 --- /dev/null +++ b/tests/_sh/checks/invalid_printf.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + printf("%d %d %d", 1, 2); // Too few arguments +} diff --git a/tests/_sh/checks/invalid_printf.golden b/tests/_sh/checks/invalid_printf.golden new file mode 100644 index 00000000..82e2be8d --- /dev/null +++ b/tests/_sh/checks/invalid_printf.golden @@ -0,0 +1 @@ +Not enough parameters for printf diff --git a/tests/_sh/checks/local_array_arg.c b/tests/_sh/checks/local_array_arg.c new file mode 100644 index 00000000..44fa1fe5 --- /dev/null +++ b/tests/_sh/checks/local_array_arg.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + int a[10]; +} diff --git a/tests/_sh/checks/local_array_arg.golden b/tests/_sh/checks/local_array_arg.golden new file mode 100644 index 00000000..3dd621ea --- /dev/null +++ b/tests/_sh/checks/local_array_arg.golden @@ -0,0 +1 @@ +"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_array_param.c b/tests/_sh/checks/local_array_param.c new file mode 100644 index 00000000..26200b36 --- /dev/null +++ b/tests/_sh/checks/local_array_param.c @@ -0,0 +1,9 @@ +// expect_comp_failure +struct Pair { + int a; + int b; +}; + +void main(struct Pair a) { + return; +} diff --git a/tests/_sh/checks/local_array_param.golden b/tests/_sh/checks/local_array_param.golden new file mode 100644 index 00000000..3dd621ea --- /dev/null +++ b/tests/_sh/checks/local_array_param.golden @@ -0,0 +1 @@ +"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_struct_arg.c b/tests/_sh/checks/local_struct_arg.c new file mode 100644 index 00000000..d5030cd5 --- /dev/null +++ b/tests/_sh/checks/local_struct_arg.c @@ -0,0 +1,9 @@ +// expect_comp_failure +struct Pair { + int a; + int b; +}; + +void main() { + struct Pair a; +} diff --git a/tests/_sh/checks/local_struct_arg.golden b/tests/_sh/checks/local_struct_arg.golden new file mode 100644 index 00000000..3dd621ea --- /dev/null +++ b/tests/_sh/checks/local_struct_arg.golden @@ -0,0 +1 @@ +"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_struct_param.c b/tests/_sh/checks/local_struct_param.c new file mode 100644 index 00000000..26200b36 --- /dev/null +++ b/tests/_sh/checks/local_struct_param.c @@ -0,0 +1,9 @@ +// expect_comp_failure +struct Pair { + int a; + int b; +}; + +void main(struct Pair a) { + return; +} diff --git a/tests/_sh/checks/local_struct_param.golden b/tests/_sh/checks/local_struct_param.golden new file mode 100644 index 00000000..3dd621ea --- /dev/null +++ b/tests/_sh/checks/local_struct_param.golden @@ -0,0 +1 @@ +"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_var_shadowing.c b/tests/_sh/checks/local_var_shadowing.c new file mode 100644 index 00000000..db72ada9 --- /dev/null +++ b/tests/_sh/checks/local_var_shadowing.c @@ -0,0 +1,5 @@ +// expect_comp_failure +void main() { + int a; + int a; +} diff --git a/tests/_sh/checks/local_var_shadowing.golden b/tests/_sh/checks/local_var_shadowing.golden new file mode 100644 index 00000000..4418e3b8 --- /dev/null +++ b/tests/_sh/checks/local_var_shadowing.golden @@ -0,0 +1 @@ +Variable is already in local environment diff --git a/tests/_sh/checks/local_var_underscore.c b/tests/_sh/checks/local_var_underscore.c new file mode 100644 index 00000000..f1cb0023 --- /dev/null +++ b/tests/_sh/checks/local_var_underscore.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + int _a; +} diff --git a/tests/_sh/checks/local_var_underscore.golden b/tests/_sh/checks/local_var_underscore.golden new file mode 100644 index 00000000..131b4019 --- /dev/null +++ b/tests/_sh/checks/local_var_underscore.golden @@ -0,0 +1 @@ +_a variable name is invalid. It can't start or end with '_'. diff --git a/tests/_sh/checks/shortcut_eval_outside_condition.c b/tests/_sh/checks/shortcut_eval_outside_condition.c new file mode 100644 index 00000000..f2f2f49f --- /dev/null +++ b/tests/_sh/checks/shortcut_eval_outside_condition.c @@ -0,0 +1,6 @@ +// expect_comp_failure +void main() { + int a = f() && g(); + int b = 1 && g(); + int c = f() && 1; +} diff --git a/tests/_sh/checks/shortcut_eval_outside_condition.golden b/tests/_sh/checks/shortcut_eval_outside_condition.golden new file mode 100644 index 00000000..172d7e39 --- /dev/null +++ b/tests/_sh/checks/shortcut_eval_outside_condition.golden @@ -0,0 +1 @@ +comp_rvalue_go: && and || with function calls can only be used in tests diff --git a/tests/_sh/checks/sizeof_array.c b/tests/_sh/checks/sizeof_array.c new file mode 100644 index 00000000..ccbe22d2 --- /dev/null +++ b/tests/_sh/checks/sizeof_array.c @@ -0,0 +1,6 @@ +// expect_comp_failure +typedef int arr[1000000]; + +void main() { + int a = sizeof(arr); +} diff --git a/tests/_sh/checks/sizeof_array.golden b/tests/_sh/checks/sizeof_array.golden new file mode 100644 index 00000000..b662b6a6 --- /dev/null +++ b/tests/_sh/checks/sizeof_array.golden @@ -0,0 +1 @@ +comp_rvalue_go: sizeof is not supported for this type or expression diff --git a/tests/_sh/checks/sizeof_expr.c b/tests/_sh/checks/sizeof_expr.c new file mode 100644 index 00000000..8f9c1812 --- /dev/null +++ b/tests/_sh/checks/sizeof_expr.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + int a = sizeof a; +} diff --git a/tests/_sh/checks/sizeof_expr.golden b/tests/_sh/checks/sizeof_expr.golden new file mode 100644 index 00000000..b662b6a6 --- /dev/null +++ b/tests/_sh/checks/sizeof_expr.golden @@ -0,0 +1 @@ +comp_rvalue_go: sizeof is not supported for this type or expression diff --git a/tests/_sh/checks/struct_no_nested_array.c b/tests/_sh/checks/struct_no_nested_array.c new file mode 100644 index 00000000..c5092860 --- /dev/null +++ b/tests/_sh/checks/struct_no_nested_array.c @@ -0,0 +1,10 @@ +// expect_comp_failure + +struct A { + int a[3]; + int a; +}; + +void main() { + return; +} diff --git a/tests/_sh/checks/struct_no_nested_array.golden b/tests/_sh/checks/struct_no_nested_array.golden new file mode 100644 index 00000000..db0080b5 --- /dev/null +++ b/tests/_sh/checks/struct_no_nested_array.golden @@ -0,0 +1 @@ +Nested structures not supported by shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/struct_no_nested_struct.c b/tests/_sh/checks/struct_no_nested_struct.c new file mode 100644 index 00000000..d007afbc --- /dev/null +++ b/tests/_sh/checks/struct_no_nested_struct.c @@ -0,0 +1,12 @@ +// expect_comp_failure + +struct A { + struct B { + int a; + int b; + } b; +}; + +void main() { + return; +} diff --git a/tests/_sh/checks/struct_no_nested_struct.golden b/tests/_sh/checks/struct_no_nested_struct.golden new file mode 100644 index 00000000..db0080b5 --- /dev/null +++ b/tests/_sh/checks/struct_no_nested_struct.golden @@ -0,0 +1 @@ +Nested structures not supported by shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/switch_early_exit.c b/tests/_sh/checks/switch_early_exit.c new file mode 100644 index 00000000..c2d3842d --- /dev/null +++ b/tests/_sh/checks/switch_early_exit.c @@ -0,0 +1,13 @@ +// expect_comp_failure +void main() { + switch (1) { + case 1: + if (0) { + break; + } else { + // This should not be allowed + } + case 2: + return; + } +} diff --git a/tests/_sh/checks/switch_early_exit.golden b/tests/_sh/checks/switch_early_exit.golden new file mode 100644 index 00000000..14d221da --- /dev/null +++ b/tests/_sh/checks/switch_early_exit.golden @@ -0,0 +1 @@ +Early break out of a switch case is unsupported diff --git a/tests/_sh/checks/switch_fallthrough.c b/tests/_sh/checks/switch_fallthrough.c new file mode 100644 index 00000000..6717caf3 --- /dev/null +++ b/tests/_sh/checks/switch_fallthrough.c @@ -0,0 +1,10 @@ +// expect_comp_failure +void main() { + switch (1) { + case 1: + putchar('a'); + case 2: + putchar('b'); + break; + } +} diff --git a/tests/_sh/checks/switch_fallthrough.golden b/tests/_sh/checks/switch_fallthrough.golden new file mode 100644 index 00000000..217d1cd3 --- /dev/null +++ b/tests/_sh/checks/switch_fallthrough.golden @@ -0,0 +1 @@ +case/default must be at the beginning of a switch conditional block diff --git a/tests/_sh/checks/switch_no_body.c b/tests/_sh/checks/switch_no_body.c new file mode 100644 index 00000000..e420e9b8 --- /dev/null +++ b/tests/_sh/checks/switch_no_body.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + switch (1) putchar('a'); +} diff --git a/tests/_sh/checks/switch_no_body.golden b/tests/_sh/checks/switch_no_body.golden new file mode 100644 index 00000000..283aa55f --- /dev/null +++ b/tests/_sh/checks/switch_no_body.golden @@ -0,0 +1 @@ +comp_statement: switch without body diff --git a/tests/_sh/checks/ternary_with_fun_call.c b/tests/_sh/checks/ternary_with_fun_call.c new file mode 100644 index 00000000..43bef78d --- /dev/null +++ b/tests/_sh/checks/ternary_with_fun_call.c @@ -0,0 +1,5 @@ +// expect_comp_failure +void main() { + int a = f() ? 1 : 2; // Valid + int b = f() ? 1 : f(); // Invalid +} diff --git a/tests/_sh/checks/ternary_with_fun_call.golden b/tests/_sh/checks/ternary_with_fun_call.golden new file mode 100644 index 00000000..d3840948 --- /dev/null +++ b/tests/_sh/checks/ternary_with_fun_call.golden @@ -0,0 +1 @@ +Conditional function calls in ternary operator not allowed diff --git a/tests/_sh/checks/var_IFS.c b/tests/_sh/checks/var_IFS.c new file mode 100644 index 00000000..1f7b41cd --- /dev/null +++ b/tests/_sh/checks/var_IFS.c @@ -0,0 +1,4 @@ +// expect_comp_failure +void main() { + int IFS; +} diff --git a/tests/_sh/checks/var_IFS.golden b/tests/_sh/checks/var_IFS.golden new file mode 100644 index 00000000..d4c3293f --- /dev/null +++ b/tests/_sh/checks/var_IFS.golden @@ -0,0 +1 @@ +"IFS" variable name is invalid. It can't be 'IFS' or 'argv_'. From 863cf77290bfd3dc6e2aaab00ba2c2833f854f7a Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 28 Jan 2025 13:31:06 -0500 Subject: [PATCH 24/89] Fix base64.c example --- examples/base64.c | 3 --- examples/compiled/base64.sh | 6 ------ 2 files changed, 9 deletions(-) diff --git a/examples/base64.c b/examples/base64.c index dfcf028c..cfbfd6f2 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -12,9 +12,6 @@ char buf[BUF_SIZE]; -void cat_fd(int fd) { -} - char *codes = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; void encode() { diff --git a/examples/compiled/base64.sh b/examples/compiled/base64.sh index ac22f57b..0426e1db 100755 --- a/examples/compiled/base64.sh +++ b/examples/compiled/base64.sh @@ -13,12 +13,6 @@ _malloc() { # $2 = object size defarr() { _malloc $1 $2; } defarr _buf 1024 -: $((fd = 0)) -_cat_fd() { let fd $2 - : - endlet $1 fd -} - unpack_escaped_string() { __buf="$1" From 5a5076e2578fa79fba3dcae16f592f3de203bfcc Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 28 Jan 2025 14:35:18 -0500 Subject: [PATCH 25/89] Default failure tests to safe mode Otherwise, when running the tests in safe mode, the error message contains the error location which breaks the test. --- run-tests.sh | 37 +++++++++++++------ tests/_sh/checks/address_of_global.golden | 2 +- tests/_sh/checks/address_of_local.golden | 2 +- tests/_sh/checks/break_outside_loop.golden | 2 +- tests/_sh/checks/continue_outside_loop.golden | 2 +- tests/_sh/checks/global_var_underscore.golden | 2 +- tests/_sh/checks/invalid_printf.golden | 2 +- tests/_sh/checks/local_array_arg.golden | 2 +- tests/_sh/checks/local_array_param.golden | 2 +- tests/_sh/checks/local_struct_arg.golden | 2 +- tests/_sh/checks/local_struct_param.golden | 2 +- tests/_sh/checks/local_var_shadowing.golden | 2 +- tests/_sh/checks/local_var_underscore.golden | 2 +- .../shortcut_eval_outside_condition.golden | 2 +- tests/_sh/checks/sizeof_array.golden | 2 +- tests/_sh/checks/sizeof_expr.golden | 2 +- .../_sh/checks/struct_no_nested_array.golden | 2 +- .../_sh/checks/struct_no_nested_struct.golden | 2 +- tests/_sh/checks/switch_early_exit.golden | 2 +- tests/_sh/checks/switch_fallthrough.golden | 2 +- tests/_sh/checks/switch_no_body.golden | 2 +- tests/_sh/checks/ternary_with_fun_call.golden | 2 +- tests/_sh/checks/var_IFS.golden | 2 +- 23 files changed, 47 insertions(+), 34 deletions(-) diff --git a/run-tests.sh b/run-tests.sh index fbadd4ae..7b22bde4 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -67,25 +67,32 @@ fi # Compile pnut, either using gcc or with pnut itself. Set pnut_comp to the compiled pnut executable # The compiled pnut executable is cached in the tests folder to speed up the process -compile_pnut() { # extra pnut compilation options: $1 +compile_pnut() { # extra pnut compilation options: $1, expect_failed_compilation?: $2 pnut_source="pnut.c" extra_opts="$1" + expect_failed_compilation="${2:-0}" + if [ "$safe" -eq 0 ] && [ "$expect_failed_compilation" -eq 1 ]; then + extra_opts="$extra_opts -DSAFE_MODE" + fi + if [ -z "$extra_opts" ]; then extra_opts_id="base" else extra_opts_id=$(printf "%s" "$extra_opts" | md5sum | cut -c 1-16) # 16 characters should be enough fi - extra_opts_suffix=${extra_opts_id:+"-"}$extra_opts_id # Add a dash if there are extra options + extra_opts_suffix=${extra_opts_id:+"-"}$extra_opts_id # Add a dash if there are extra options pnut_exe="./tests/pnut-by-gcc${extra_opts_suffix}.exe" - pnut_exe_backend="./tests/pnut-$extra_opts_suffix.$ext" + pnut_exe_backend="./tests/pnut$extra_opts_suffix.$ext" if [ ! -f "$pnut_exe" ]; then - gcc "$pnut_source" $PNUT_EXE_OPTIONS $extra_opts -o "$pnut_exe" 2> /dev/null || fail "Error: Failed to compile $pnut_source with $backend" + gcc "$pnut_source" $PNUT_EXE_OPTIONS $extra_opts -o "$pnut_exe" 2> /dev/null \ + || fail "Error: Failed to compile $pnut_source with $backend" fi if [ "$bootstrap" -eq 1 ]; then if [ ! -f "$pnut_exe_backend" ]; then - $pnut_exe $PNUT_EXE_OPTIONS $extra_opts "$pnut_source" > "$pnut_exe_backend" || fail "Error: Failed to compile $pnut_source with $pnut_exe (bootstrap)" + $pnut_exe $PNUT_EXE_OPTIONS $extra_opts "$pnut_source" > "$pnut_exe_backend" \ + || fail "Error: Failed to compile $pnut_source with $pnut_exe (bootstrap)" chmod +x "$pnut_exe_backend" fi pnut_comp="$pnut_exe_backend" @@ -184,9 +191,9 @@ execute_test() { # executable: $1, timeout: $2, args: $3 fi } -compile_test() { # c file: $1 +compile_test() { # c file: $1, expect_failed_compilation?: $2 # 15s timeout to prevent infinite loops in pnut - compile_pnut $(test_pnut_comp_options $1) + compile_pnut "$(test_pnut_comp_options $1)" "$2" if [ $bootstrap -eq 1 ]; then if [ "$backend" = "sh" ]; then timeout 15 $shell $pnut_comp "$1" $(test_comp_options $1) @@ -214,9 +221,10 @@ run_test() { # file_to_test: $1 # Generate golden file if it doesn't exist if [ ! -f "$golden_file" ]; then - compile_test "$file" > "$dir/$filename.$ext" && \ + compile_test "$file" "$expect_failed_comp" > "$dir/$filename.$ext" && \ gcc "$file" $(test_comp_options $file) -o "$dir/$filename-gcc.$ext" 2> "$dir/$filename-by-gcc.err" - if [ $? -eq 0 ] && [ "$expect_failed_comp" -eq 0 ]; then + compile_test_exit_code="$?" + if [ "$compile_test_exit_code" -eq 0 ] && [ "$expect_failed_comp" -eq 0 ]; then chmod +x "$dir/$filename.$ext" execute_test "$dir/$filename.$ext" "$(test_timeout $file)" "$(test_args $file)" > "$dir/$filename.output" $dir/$filename-gcc.$ext $(test_args $file) > "$dir/$filename-gcc.output" @@ -229,8 +237,13 @@ run_test() { # file_to_test: $1 elif [ "$expect_failed_comp" -eq 1 ]; then - echo "🟡 Golden file generated by pnut" - tail -n 1 "$dir/$filename.$ext" > "$golden_file" # Save the error message which is the last line + if [ "$compile_test_exit_code" -eq 0 ]; then + echo "❌ Compilation succeeded when it should have failed" + return 1 + else + echo "🟡 Golden file generated by pnut" + tail -n 1 "$dir/$filename.$ext" > "$golden_file" # Save the error message which is the last line + fi else echo "❌ Failed to compile with pnut. See $dir/$filename.$ext and $dir/$filename-by-gcc.err" @@ -239,7 +252,7 @@ run_test() { # file_to_test: $1 fi # Compile the test file with pnut.exe - compile_test "$file" > "$dir/$filename.$ext" + compile_test "$file" "$expect_failed_comp" > "$dir/$filename.$ext" compile_test_exit_code="$?" if [ "$compile_test_exit_code" -eq 0 ] && [ "$expect_failed_comp" -eq 0 ]; then # If compilation was successful and not expected to fail diff --git a/tests/_sh/checks/address_of_global.golden b/tests/_sh/checks/address_of_global.golden index 6b2fe82a..d18dfcfa 100644 --- a/tests/_sh/checks/address_of_global.golden +++ b/tests/_sh/checks/address_of_global.golden @@ -1 +1 @@ -comp_rvalue_go: can't take the address of a local variable +tests/_sh/checks/address_of_global.c:7:0 comp_rvalue_go: can't take the address of a local variable diff --git a/tests/_sh/checks/address_of_local.golden b/tests/_sh/checks/address_of_local.golden index 6b2fe82a..5638ed85 100644 --- a/tests/_sh/checks/address_of_local.golden +++ b/tests/_sh/checks/address_of_local.golden @@ -1 +1 @@ -comp_rvalue_go: can't take the address of a local variable +tests/_sh/checks/address_of_local.c:6:0 comp_rvalue_go: can't take the address of a local variable diff --git a/tests/_sh/checks/break_outside_loop.golden b/tests/_sh/checks/break_outside_loop.golden index 1612b4cb..210849b3 100644 --- a/tests/_sh/checks/break_outside_loop.golden +++ b/tests/_sh/checks/break_outside_loop.golden @@ -1 +1 @@ -comp_statement: break not in loop or switch +tests/_sh/checks/break_outside_loop.c:5:0 comp_statement: break not in loop or switch diff --git a/tests/_sh/checks/continue_outside_loop.golden b/tests/_sh/checks/continue_outside_loop.golden index f2c3341e..eaa599d6 100644 --- a/tests/_sh/checks/continue_outside_loop.golden +++ b/tests/_sh/checks/continue_outside_loop.golden @@ -1 +1 @@ -comp_statement: continue not in loop +tests/_sh/checks/continue_outside_loop.c:5:0 comp_statement: continue not in loop diff --git a/tests/_sh/checks/global_var_underscore.golden b/tests/_sh/checks/global_var_underscore.golden index 36b4e5cf..8b25d2d5 100644 --- a/tests/_sh/checks/global_var_underscore.golden +++ b/tests/_sh/checks/global_var_underscore.golden @@ -1 +1 @@ -_IFS variable name is invalid. It can't start or end with '_'. +_IFS tests/_sh/checks/global_var_underscore.c:3:0 variable name is invalid. It can't start or end with '_'. diff --git a/tests/_sh/checks/invalid_printf.golden b/tests/_sh/checks/invalid_printf.golden index 82e2be8d..8e3f48e2 100644 --- a/tests/_sh/checks/invalid_printf.golden +++ b/tests/_sh/checks/invalid_printf.golden @@ -1 +1 @@ -Not enough parameters for printf +tests/_sh/checks/invalid_printf.c:5:0 Not enough parameters for printf diff --git a/tests/_sh/checks/local_array_arg.golden b/tests/_sh/checks/local_array_arg.golden index 3dd621ea..4ae50f86 100644 --- a/tests/_sh/checks/local_array_arg.golden +++ b/tests/_sh/checks/local_array_arg.golden @@ -1 +1 @@ -"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. +"a" variable: tests/_sh/checks/local_array_arg.c:5:0 array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_array_param.golden b/tests/_sh/checks/local_array_param.golden index 3dd621ea..2da9bf63 100644 --- a/tests/_sh/checks/local_array_param.golden +++ b/tests/_sh/checks/local_array_param.golden @@ -1 +1 @@ -"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. +"a" variable: tests/_sh/checks/local_array_param.c:10:0 array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_struct_arg.golden b/tests/_sh/checks/local_struct_arg.golden index 3dd621ea..80a03b04 100644 --- a/tests/_sh/checks/local_struct_arg.golden +++ b/tests/_sh/checks/local_struct_arg.golden @@ -1 +1 @@ -"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. +"a" variable: tests/_sh/checks/local_struct_arg.c:10:0 array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_struct_param.golden b/tests/_sh/checks/local_struct_param.golden index 3dd621ea..49192a44 100644 --- a/tests/_sh/checks/local_struct_param.golden +++ b/tests/_sh/checks/local_struct_param.golden @@ -1 +1 @@ -"a" variable: array/struct value type is not supported for shell backend. Use a reference type instead. +"a" variable: tests/_sh/checks/local_struct_param.c:10:0 array/struct value type is not supported for shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/local_var_shadowing.golden b/tests/_sh/checks/local_var_shadowing.golden index 4418e3b8..fc390f82 100644 --- a/tests/_sh/checks/local_var_shadowing.golden +++ b/tests/_sh/checks/local_var_shadowing.golden @@ -1 +1 @@ -Variable is already in local environment +tests/_sh/checks/local_var_shadowing.c:6:0 Variable is already in local environment diff --git a/tests/_sh/checks/local_var_underscore.golden b/tests/_sh/checks/local_var_underscore.golden index 131b4019..67c0b1f4 100644 --- a/tests/_sh/checks/local_var_underscore.golden +++ b/tests/_sh/checks/local_var_underscore.golden @@ -1 +1 @@ -_a variable name is invalid. It can't start or end with '_'. +_a tests/_sh/checks/local_var_underscore.c:5:0 variable name is invalid. It can't start or end with '_'. diff --git a/tests/_sh/checks/shortcut_eval_outside_condition.golden b/tests/_sh/checks/shortcut_eval_outside_condition.golden index 172d7e39..c6d78b06 100644 --- a/tests/_sh/checks/shortcut_eval_outside_condition.golden +++ b/tests/_sh/checks/shortcut_eval_outside_condition.golden @@ -1 +1 @@ -comp_rvalue_go: && and || with function calls can only be used in tests +tests/_sh/checks/shortcut_eval_outside_condition.c:7:0 comp_rvalue_go: && and || with function calls can only be used in tests diff --git a/tests/_sh/checks/sizeof_array.golden b/tests/_sh/checks/sizeof_array.golden index b662b6a6..d2825183 100644 --- a/tests/_sh/checks/sizeof_array.golden +++ b/tests/_sh/checks/sizeof_array.golden @@ -1 +1 @@ -comp_rvalue_go: sizeof is not supported for this type or expression +tests/_sh/checks/sizeof_array.c:7:0 comp_rvalue_go: sizeof is not supported for this type or expression diff --git a/tests/_sh/checks/sizeof_expr.golden b/tests/_sh/checks/sizeof_expr.golden index b662b6a6..57980bb7 100644 --- a/tests/_sh/checks/sizeof_expr.golden +++ b/tests/_sh/checks/sizeof_expr.golden @@ -1 +1 @@ -comp_rvalue_go: sizeof is not supported for this type or expression +tests/_sh/checks/sizeof_expr.c:5:0 comp_rvalue_go: sizeof is not supported for this type or expression diff --git a/tests/_sh/checks/struct_no_nested_array.golden b/tests/_sh/checks/struct_no_nested_array.golden index db0080b5..0efe4c60 100644 --- a/tests/_sh/checks/struct_no_nested_array.golden +++ b/tests/_sh/checks/struct_no_nested_array.golden @@ -1 +1 @@ -Nested structures not supported by shell backend. Use a reference type instead. +tests/_sh/checks/struct_no_nested_array.c:7:0 Nested structures not supported by shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/struct_no_nested_struct.golden b/tests/_sh/checks/struct_no_nested_struct.golden index db0080b5..2a16b6ec 100644 --- a/tests/_sh/checks/struct_no_nested_struct.golden +++ b/tests/_sh/checks/struct_no_nested_struct.golden @@ -1 +1 @@ -Nested structures not supported by shell backend. Use a reference type instead. +tests/_sh/checks/struct_no_nested_struct.c:9:0 Nested structures not supported by shell backend. Use a reference type instead. diff --git a/tests/_sh/checks/switch_early_exit.golden b/tests/_sh/checks/switch_early_exit.golden index 14d221da..3e311751 100644 --- a/tests/_sh/checks/switch_early_exit.golden +++ b/tests/_sh/checks/switch_early_exit.golden @@ -1 +1 @@ -Early break out of a switch case is unsupported +tests/_sh/checks/switch_early_exit.c:14:0 Early break out of a switch case is unsupported diff --git a/tests/_sh/checks/switch_fallthrough.golden b/tests/_sh/checks/switch_fallthrough.golden index 217d1cd3..7fea6935 100644 --- a/tests/_sh/checks/switch_fallthrough.golden +++ b/tests/_sh/checks/switch_fallthrough.golden @@ -1 +1 @@ -case/default must be at the beginning of a switch conditional block +tests/_sh/checks/switch_fallthrough.c:11:0 case/default must be at the beginning of a switch conditional block diff --git a/tests/_sh/checks/switch_no_body.golden b/tests/_sh/checks/switch_no_body.golden index 283aa55f..88d73f46 100644 --- a/tests/_sh/checks/switch_no_body.golden +++ b/tests/_sh/checks/switch_no_body.golden @@ -1 +1 @@ -comp_statement: switch without body +tests/_sh/checks/switch_no_body.c:5:0 comp_statement: switch without body diff --git a/tests/_sh/checks/ternary_with_fun_call.golden b/tests/_sh/checks/ternary_with_fun_call.golden index d3840948..09a0ba55 100644 --- a/tests/_sh/checks/ternary_with_fun_call.golden +++ b/tests/_sh/checks/ternary_with_fun_call.golden @@ -1 +1 @@ -Conditional function calls in ternary operator not allowed +tests/_sh/checks/ternary_with_fun_call.c:6:0 Conditional function calls in ternary operator not allowed diff --git a/tests/_sh/checks/var_IFS.golden b/tests/_sh/checks/var_IFS.golden index d4c3293f..4395f421 100644 --- a/tests/_sh/checks/var_IFS.golden +++ b/tests/_sh/checks/var_IFS.golden @@ -1 +1 @@ -"IFS" variable name is invalid. It can't be 'IFS' or 'argv_'. +"IFS" tests/_sh/checks/var_IFS.c:5:0 variable name is invalid. It can't be 'IFS' or 'argv_'. From 3b71854e7d7629de7b22527e8ef67ebab42a96b9 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 29 Jan 2025 16:37:43 -0500 Subject: [PATCH 26/89] Parse initializer lists --- pnut.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/pnut.c b/pnut.c index fa9be1c9..8fdbc21d 100644 --- a/pnut.c +++ b/pnut.c @@ -195,6 +195,7 @@ enum { MINUS_MINUS_POST, ELLIPSIS, PARENS, + INITIALIZER_LIST, // Other tokens MACRO_ARG = 499, @@ -2261,6 +2262,8 @@ ast parse_enum(); ast parse_struct_or_union(int struct_or_union_tok); ast parse_declarator(bool abstract_decl, ast parent_type); ast parse_declaration_specifiers(); +ast parse_initializer_list(); +ast parse_initializer(); // The storage class specifier and type qualifier tokens are all between 300 (AUTO_KW) and 326 (VOLATILE_KW) so we store them as bits in an int. #define MK_TYPE_SPECIFIER(tok) (1 << (tok - AUTO_KW)) @@ -2686,6 +2689,38 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { return decl; } +ast parse_initializer_list() { + ast result = 0, tail = 0; + + expect_tok('{'); + + while (tok != '}' && tok != EOF) { +#ifdef sh + if (tok == '{') fatal_error("nested initializer lists not supported"); +#endif + if (result == 0) { + tail = result = new_ast2(',', parse_initializer(), 0); + } else { + set_child(tail, 1, new_ast2(',', parse_initializer(), 0)); + tail = get_child_(',', tail, 1); + } + if (tok == ',') get_tok(); + else break; + } + + expect_tok('}'); + + return new_ast1(INITIALIZER_LIST, result); +} + +ast parse_initializer() { + if (tok == '{') { + return parse_initializer_list(); + } else { + return parse_assignment_expression(); + } +} + ast parse_declarator_and_initializer(ast type_specifier) { ast declarator = parse_declarator(false, type_specifier); @@ -2693,7 +2728,7 @@ ast parse_declarator_and_initializer(ast type_specifier) { if (tok == '=') { get_tok(); // parse_declarator returns a DECL node where the initializer is child#2 - set_child(declarator, 2, parse_conditional_expression()); + set_child(declarator, 2, parse_initializer()); } } From 13944c246e623c3fb8c607ae841da031a4e1f4a5 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Wed, 29 Jan 2025 17:16:10 -0500 Subject: [PATCH 27/89] Support initializer list for global array decl Since local variables cannot arrays or structures, it doesn't make sense to support initializer lists in their case. --- pnut.c | 2 +- sh-runtime.c | 57 ++++++++++++++++------- sh.c | 126 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 149 insertions(+), 36 deletions(-) diff --git a/pnut.c b/pnut.c index 8fdbc21d..8b4a8204 100644 --- a/pnut.c +++ b/pnut.c @@ -615,7 +615,7 @@ int end_ident() { heap[probe+1] = string_start; heap[probe+2] = IDENTIFIER; heap[probe+3] = 0; // Token tag - heap[probe+4] = string_pool_alloc - string_start - 1; // string length + heap[probe+4] = string_pool_alloc - string_start - 1; // string length (excluding terminator) return probe; } diff --git a/sh-runtime.c b/sh-runtime.c index d8016e38..252ddc18 100644 --- a/sh-runtime.c +++ b/sh-runtime.c @@ -290,25 +290,43 @@ DEFINE_RUNTIME_FUN(malloc) putstr("}\n"); END_RUNTIME_FUN(malloc) -DEFINE_RUNTIME_FUN(initialize_memory) - putstr("# Initialize the memory to 0\n"); - putstr("initialize_memory() { # $1 = address, $2 = length\n"); - putstr(" __ix=$1\n"); - putstr(" __last=$(($1 + $2))\n"); - putstr(" while [ $__ix -lt $__last ]; do\n"); - putstr(" : $((_$__ix=0))\n"); - putstr(" : $((__ix += 1))\n"); +DEFINE_RUNTIME_FUN(initialize) + putstr("# Initialize memory with the list of values.\n"); + putstr("# When the expected number of elements is higher than the actual number of\n"); + putstr("# elements, the remaining elements are set to 0\n"); + putstr("initialize() { # $1 = var name, $2 = length, $3... = elements\n"); + putstr(" __ptr=$(($1))\n"); + putstr(" __size=$2\n"); + putstr(" __i=0\n"); + putstr(" while [ $# -ge 3 ]; do\n"); + putstr(" : $((_$((__ptr + __i)) = $3))\n"); + putstr(" : $((__i += 1))\n"); + putstr(" shift\n"); + putstr(" done\n"); + putstr("\n"); + putstr(" while [ $__i -lt $__size ]; do\n"); + putstr(" : $((_$((__ptr + __i)) = 0))\n"); + putstr(" : $((__i += 1))\n"); putstr(" done\n"); putstr("}\n"); -END_RUNTIME_FUN(initialize_memory) +END_RUNTIME_FUN(initialize) DEFINE_RUNTIME_FUN(defarr) DEPENDS_ON(malloc) #ifdef RT_NO_INIT_GLOBALS - putstr("defarr() { _malloc $1 $2; }\n"); + // If some array initializers were used, defarr is extended to support initialization + if (runtime_use_initialize) { + DEPENDS_ON(initialize) + putstr("defarr() {\n"); + putstr(" _malloc $1 $2;\n"); + putstr(" if [ $# -gt 2 ]; then initialize $@; fi\n"); + putstr("}\n"); + } else { + putstr("defarr() { _malloc $1 $2; }\n"); + } #else -DEPENDS_ON(initialize_memory) - putstr("defarr() { _malloc $1 $2; initialize_memory $(($1)) $2; }\n"); +DEPENDS_ON(initialize) + putstr("defarr() { _malloc $1 $2; initialize_memory $@; }\n"); #endif END_RUNTIME_FUN(defarr) @@ -436,11 +454,13 @@ DEPENDS_ON(char_to_int) extract_line_head(" ", "__us_buf16", "__us_buf256", ANY_STRING_16, "16", "") putstr("}\n"); #endif - putstr("unpack_escaped_string() {\n"); + putstr("unpack_escaped_string() { # $1 = string, $2 = size (optional)\n"); putstr(" __buf=\"$1\"\n"); + putstr(" __len=${2:-${#__buf}}\n"); putstr(" # Allocates enough space for all characters, assuming that no character is escaped\n"); - putstr(" _malloc __addr $((${#__buf} + 1))\n"); + putstr(" _malloc __addr $((${2:-${#__buf} + 1}))\n"); putstr(" __ptr=$__addr\n"); + putstr(" __end=$((__ptr + __len))\n"); #ifdef OPTIMIZE_LONG_LINES putstr(" __us_buf16=\n"); putstr(" __us_buf256=\n"); @@ -459,7 +479,10 @@ DEPENDS_ON(char_to_int) putstr(" : $((__ptr += 1))\n"); putstr(" done\n"); #endif - putstr(" : $((_$__ptr = 0))\n"); + putstr(" while [ $__ptr -le $__end ]; do\n"); + putstr(" : $((_$__ptr = 0))\n"); + putstr(" : $((__ptr += 1))\n"); + putstr(" done\n"); putstr("}\n"); END_RUNTIME_FUN(unpack_escaped_string) @@ -468,12 +491,12 @@ DEPENDS_ON(unpack_escaped_string) putstr("# Define a string, and return a reference to it in the varible taken as argument.\n"); putstr("# If the variable is already defined, this function does nothing.\n"); putstr("# Note that it's up to the caller to ensure that no 2 strings share the same variable.\n"); - putstr("defstr() { # $1 = variable name, $2 = string\n"); + putstr("defstr() { # $1 = variable name, $2 = string, $3 = size (optional)\n"); #ifndef RT_UNSAFE_HEAP putstr(" set +u # Necessary to allow the variable to be empty\n"); #endif putstr(" if [ $(($1)) -eq 0 ]; then\n"); - putstr(" unpack_escaped_string \"$2\"\n"); + putstr(" unpack_escaped_string \"$2\" $3\n"); putstr(" : $(($1 = __addr))\n"); putstr(" fi\n"); #ifndef RT_UNSAFE_HEAP diff --git a/sh.c b/sh.c index f9a28033..443f5dc2 100644 --- a/sh.c +++ b/sh.c @@ -1008,9 +1008,14 @@ ast handle_side_effects(ast node) { return handle_side_effects_go(node, false); } -void comp_defstr(ast ident, int string_probe) { +void comp_defstr(ast ident, int string_probe, int array_size) { char *string_start = string_pool + heap[string_probe + 1]; char *string_end = string_start + heap[string_probe + 4]; + text array_size_text = 0; + + if (array_size != -1) { + array_size_text = string_concat(wrap_char(' '), wrap_int(array_size)); + } if (top_level_stmt) { // If defstr is used at the top level, it needs to be included beforehand @@ -1019,11 +1024,57 @@ void comp_defstr(ast ident, int string_probe) { runtime_use_defstr = true; } - append_glo_decl(string_concat5( wrap_str_lit("defstr ") - , format_special_var(ident, false) - , wrap_str_lit(" \"") - , escape_text(wrap_str_imm(string_start, string_end), false) - , wrap_char('\"'))); + append_glo_decl(string_concat4( wrap_str_lit("defstr ") + , env_var(ident) + , string_concat3( wrap_str_lit(" \"") + , escape_text(wrap_str_imm(string_start, string_end), false) + , wrap_char('\"') + ) + , array_size_text)); +} + +int initializer_list_len(ast node) { + int res = 0; + + // Each element of the list has size 1 since nested initializers are not allowed + while (node != 0) { + res += 1; + node = get_child_(',', node, 1); + } + + return res; +} + +text comp_initializer_list(ast initializer_list, int expected_len) { + text args = 0; + ast element; + ast str_ident; + + runtime_use_initialize = true; + + while (initializer_list != 0) { + element = get_child_(',', initializer_list, 0); + switch (get_op(element)) { + case INTEGER: + args = concatenate_strings_with(args, wrap_int(-get_val_(INTEGER, element)), wrap_char(' ')); + break; + case CHARACTER: + // TODO: Character identifiers are only defined at the end of the script, so we can't use them here + args = concatenate_strings_with(args, wrap_int(get_val_(CHARACTER, element)), wrap_char(' ')); + break; + case STRING: + str_ident = fresh_string_ident(get_val_(STRING, element)); + comp_defstr(str_ident, get_val_(STRING, element), -1); + args = concatenate_strings_with(args, string_concat(wrap_char('$'), format_special_var(str_ident, true)), wrap_char(' ')); + break; + default: + // TODO: Support nested initializers and constant expressions + fatal_error("comp_initializer: unexpected operator"); + } + initializer_list = get_child_opt_(',', ',', initializer_list, 1); + } + + return args; } enum VALUE_CTX { @@ -1303,7 +1354,7 @@ text comp_rvalue(ast node, int context) { while (literals_inits != 0) { side_effect = get_child__(',', '=', literals_inits, 0); - comp_defstr(get_child_('=', side_effect, 0), get_child_('=', side_effect, 1)); + comp_defstr(get_child_('=', side_effect, 0), get_child_('=', side_effect, 1), -1); literals_inits = get_child_opt_(',', ',', literals_inits, 1); } @@ -2175,26 +2226,64 @@ void comp_glo_var_decl(ast node) { ast name = get_child__(DECL, IDENTIFIER, node, 0); ast type = get_child_(DECL, node, 1); ast init = get_child_(DECL, node, 2); + int arr_len, init_len; + text args = 0; if (get_op(type) == '(') return; // Ignore function declarations - if (init == 0) init = new_ast0(INTEGER, 0); - // TODO: Add enum/struct/union to env if it's not already there // handle_enum_struct_union_type_decl(type); assert_var_decl_is_safe(node, false); if (get_op(type) == '[') { // Array declaration - runtime_defarr(); - append_glo_decl( - string_concat4( - wrap_str_lit("defarr "), - env_var(name), - wrap_char(' '), - wrap_int(get_child_('[', type, 1)) - ) - ); + arr_len = get_child_('[', type, 1); + + // If the array is initialized with a string, we want to call defstr on the + // string, and then initialize the array variable with the variable passed + // to defstr. + if (init != 0 && get_op(init) == STRING) { + init_len = heap[get_val_(STRING, init) + 4] + 1; // string_end - string_start + if (arr_len != 0 && arr_len < init_len) { + fatal_error("Array type is too small for initializer"); + } + comp_defstr(name, get_val_(STRING, init), arr_len != 0 ? arr_len : init_len); + } else { + // If the array is initialized with an initializer list, we want to pass + // the list of values to the defarr function. Because the array size is + // optional, we need to calculate the size of the array from the + // initializer list if it's not provided. + if (init != 0) { + if (get_op(init) != INITIALIZER_LIST) fatal_error("Array declaration with invalid initializer"); + init = get_child_(INITIALIZER_LIST, init, 0); + + runtime_use_initialize = true; + init_len = initializer_list_len(init); + args = comp_initializer_list(init, arr_len); + if (arr_len == 0) { + arr_len = init_len; + } else if (arr_len < init_len) { + fatal_error("Array type is too small for initializer"); + } + } + + if (arr_len == 0) { + fatal_error("Array declaration without size or initializer list"); + } + + runtime_defarr(); + + append_glo_decl( + concatenate_strings_with( + string_concat4( + wrap_str_lit("defarr "), + env_var(name), + wrap_char(' '), + wrap_int(arr_len)), + args, + wrap_char(' ')) + ); + } } else { #ifdef SUPPORT_ADDRESS_OF_OP runtime_defglo(); @@ -2207,6 +2296,7 @@ void comp_glo_var_decl(ast node) { ) ); #else + if (init == 0) init = new_ast0(INTEGER, 0); comp_assignment(name, init); #endif } From 012909ea39ff4beefa61439531b688e41212c9de Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 30 Jan 2025 11:39:55 -0500 Subject: [PATCH 28/89] Add tests --- tests/_sh/checks/initializer_nested.c | 6 ++++ tests/_sh/checks/initializer_nested.golden | 1 + tests/_sh/checks/initializer_too_large.c | 6 ++++ tests/_sh/checks/initializer_too_large.golden | 1 + tests/_sh/checks/initializer_too_large_str.c | 6 ++++ .../checks/initializer_too_large_str.golden | 1 + tests/_sh/initializer.c | 30 ++++++++++++++++++ tests/_sh/initializer.golden | Bin 0 -> 106 bytes 8 files changed, 51 insertions(+) create mode 100644 tests/_sh/checks/initializer_nested.c create mode 100644 tests/_sh/checks/initializer_nested.golden create mode 100644 tests/_sh/checks/initializer_too_large.c create mode 100644 tests/_sh/checks/initializer_too_large.golden create mode 100644 tests/_sh/checks/initializer_too_large_str.c create mode 100644 tests/_sh/checks/initializer_too_large_str.golden create mode 100644 tests/_sh/initializer.c create mode 100644 tests/_sh/initializer.golden diff --git a/tests/_sh/checks/initializer_nested.c b/tests/_sh/checks/initializer_nested.c new file mode 100644 index 00000000..ca6a5f4f --- /dev/null +++ b/tests/_sh/checks/initializer_nested.c @@ -0,0 +1,6 @@ +// expect_comp_failure +int arr[] = { {1, 2, 3}, {4, 5, 6} }; + +void main() { + return; +} diff --git a/tests/_sh/checks/initializer_nested.golden b/tests/_sh/checks/initializer_nested.golden new file mode 100644 index 00000000..292864bc --- /dev/null +++ b/tests/_sh/checks/initializer_nested.golden @@ -0,0 +1 @@ +tests/_sh/checks/initializer_nested.c:2:14 nested initializer lists not supported diff --git a/tests/_sh/checks/initializer_too_large.c b/tests/_sh/checks/initializer_too_large.c new file mode 100644 index 00000000..f7fe8690 --- /dev/null +++ b/tests/_sh/checks/initializer_too_large.c @@ -0,0 +1,6 @@ +// expect_comp_failure +int arr[2] = {1, 2, 3}; + +void main() { + return; +} diff --git a/tests/_sh/checks/initializer_too_large.golden b/tests/_sh/checks/initializer_too_large.golden new file mode 100644 index 00000000..c763a0d3 --- /dev/null +++ b/tests/_sh/checks/initializer_too_large.golden @@ -0,0 +1 @@ +tests/_sh/checks/initializer_too_large.c:3:0 Array type is too small for initializer diff --git a/tests/_sh/checks/initializer_too_large_str.c b/tests/_sh/checks/initializer_too_large_str.c new file mode 100644 index 00000000..b27f756d --- /dev/null +++ b/tests/_sh/checks/initializer_too_large_str.c @@ -0,0 +1,6 @@ +// expect_comp_failure +char arr[2] = "ab"; + +void main() { + return; +} diff --git a/tests/_sh/checks/initializer_too_large_str.golden b/tests/_sh/checks/initializer_too_large_str.golden new file mode 100644 index 00000000..c800f564 --- /dev/null +++ b/tests/_sh/checks/initializer_too_large_str.golden @@ -0,0 +1 @@ +tests/_sh/checks/initializer_too_large_str.c:3:0 Array type is too small for initializer diff --git a/tests/_sh/initializer.c b/tests/_sh/initializer.c new file mode 100644 index 00000000..efa96abb --- /dev/null +++ b/tests/_sh/initializer.c @@ -0,0 +1,30 @@ +char str1[6] = "abcde"; +char str2[12] = "abcdef"; +char str3[] = "abcdef\t\t\t\t\t\t"; + +int arr1[5] = { 1, 2, 3, 'a', 'b' }; +int arr2[15] = { 1, 2, 3, 'a', 'b' }; + +void main() { + int i; + + for (i = 0; i < 6; i++) { + printf("%c\n", str1[i]); + } + + for (i = 0; i < 12; i++) { + printf("%c\n", str2[i]); + } + + for (i = 0; i < 13; i++) { + printf("%c\n", str3[i]); + } + + for (i = 0; i < 5; i++) { + printf("%d\n", arr1[i]); + } + + for (i = 0; i < 15; i++) { + printf("%d\n", arr2[i]); + } +} diff --git a/tests/_sh/initializer.golden b/tests/_sh/initializer.golden new file mode 100644 index 0000000000000000000000000000000000000000..1c4d86c416d26011b65715f92eb63705a25af5af GIT binary patch literal 106 ycmYe~O5#f9O5sZ7V&F=Iv(tb)1Vk3+ Date: Thu, 30 Jan 2025 13:17:37 -0500 Subject: [PATCH 29/89] Remove size from var binding, simplify env lookup --- env.c | 34 +++++++------- exe.c | 139 ++++++++++++++++++++++++++++++---------------------------- 2 files changed, 88 insertions(+), 85 deletions(-) diff --git a/env.c b/env.c index c57c057d..78becc2f 100644 --- a/env.c +++ b/env.c @@ -115,24 +115,23 @@ int cgc_loop_depth(int binding) { return loop_depth; } -int cgc_add_local(enum BINDING binding_type, int ident, int size, ast type, int env) { - int binding = alloc_obj(6); +int cgc_add_local(enum BINDING binding_type, int ident, ast type, int env) { + int binding = alloc_obj(5); heap[binding+0] = env; heap[binding+1] = binding_type; heap[binding+2] = ident; - heap[binding+3] = size; - heap[binding+4] = cgc_fs; - heap[binding+5] = type; + heap[binding+3] = cgc_fs; + heap[binding+4] = type; return binding; } #ifdef sh void cgc_add_local_var(enum BINDING binding_type, int ident, ast type) { cgc_fs += 1; - cgc_locals = cgc_add_local(binding_type, ident, 1, type, cgc_locals); + cgc_locals = cgc_add_local(binding_type, ident, type, cgc_locals); // Add to cgc_locals_fun as well, if not already there if (cgc_lookup_var(ident, cgc_locals_fun) == 0) { - cgc_locals_fun = cgc_add_local(binding_type, ident, 1, type, cgc_locals_fun); + cgc_locals_fun = cgc_add_local(binding_type, ident, type, cgc_locals_fun); } } @@ -153,14 +152,14 @@ void cgc_add_enclosing_switch(bool in_tail_position) { cgc_locals = binding; } #else -void cgc_add_local_param(int ident, int size, ast type) { - cgc_locals = cgc_add_local(BINDING_PARAM_LOCAL, ident, size, type, cgc_locals); - cgc_fs -= size; +void cgc_add_local_param(int ident, int width, ast type) { + cgc_locals = cgc_add_local(BINDING_PARAM_LOCAL, ident, type, cgc_locals); + cgc_fs -= width; } -void cgc_add_local_var(int ident, int size, ast type) { - cgc_fs += size; - cgc_locals = cgc_add_local(BINDING_VAR_LOCAL, ident, size, type, cgc_locals); +void cgc_add_local_var(int ident, int width, ast type) { + cgc_fs += width; + cgc_locals = cgc_add_local(BINDING_VAR_LOCAL, ident, type, cgc_locals); } void cgc_add_enclosing_loop(int loop_fs, int break_lbl, ast continue_lbl) { @@ -183,14 +182,13 @@ void cgc_add_enclosing_switch(int loop_fs, int break_lbl, int next_case_lbl) { cgc_locals = binding; } -void cgc_add_global(int ident, int size, int width, ast type) { - int binding = alloc_obj(6); +void cgc_add_global(int ident, int width, ast type) { + int binding = alloc_obj(5); heap[binding+0] = cgc_globals; heap[binding+1] = BINDING_VAR_GLOBAL; heap[binding+2] = ident; - heap[binding+3] = size; - heap[binding+4] = cgc_global_alloc; - heap[binding+5] = type; + heap[binding+3] = cgc_global_alloc; + heap[binding+4] = type; cgc_global_alloc += width; cgc_globals = binding; } diff --git a/exe.c b/exe.c index 8481ce5c..1dde5a66 100644 --- a/exe.c +++ b/exe.c @@ -537,7 +537,7 @@ ast canonicalize_type(ast type) { if (binding == 0) { putstr("type="); putstr(STRING_BUF(get_val_(IDENTIFIER, get_child(type, 1)))); putchar('\n'); - fatal_error("canonicalize_type: Type is not defined"); + fatal_error("canonicalize_type: type is not defined"); } res = heap[binding+3]; @@ -658,6 +658,20 @@ ast dereference_type(ast type) { } } +int resolve_identifier(int ident_probe) { + int binding = cgc_lookup_var(ident_probe, cgc_locals); + if (binding != 0) return binding; + + binding = cgc_lookup_var(ident_probe, cgc_globals); + if (binding != 0) return binding; + + binding = cgc_lookup_enum_value(ident_probe, cgc_globals); + if (binding != 0) return binding; + + putstr("ident = "); putstr(string_pool + probe_string(ident_probe)); putchar('\n'); + fatal_error("identifier not found"); +} + // Compute the type of an expression ast value_type(ast node) { int op = get_op(node); @@ -679,26 +693,23 @@ ast value_type(ast node) { return string_type; } else if (op == IDENTIFIER) { ident = get_val_(IDENTIFIER, node); - binding = cgc_lookup_var(ident, cgc_locals); - if (binding != 0) { - return heap[binding+5]; - } else { - binding = cgc_lookup_var(ident, cgc_globals); - if (binding != 0) { - return heap[binding+5]; - } else { - binding = cgc_lookup_enum_value(ident, cgc_globals); - if (binding != 0) { - return int_type; // Enums are always integers - } else { - putstr("ident = "); - putstr(string_pool + probe_string(ident)); - putchar('\n'); - fatal_error("value_type: identifier not found"); - return -1; - } - } + binding = resolve_identifier(ident); + switch (binding_kind(binding)) { + case BINDING_PARAM_LOCAL: + case BINDING_VAR_LOCAL: + return heap[binding+4]; + case BINDING_VAR_GLOBAL: + return heap[binding+4]; + case BINDING_ENUM_CST: + return int_type; + default: + putstr("ident = "); + putstr(string_pool + probe_string(ident)); + putchar('\n'); + fatal_error("value_type: unknown identifier"); + return -1; } + } else { putstr("op="); putint(op); putchar('\n'); fatal_error("value_type: unknown expression with nb_children == 0"); @@ -1003,22 +1014,24 @@ int codegen_lvalue(ast node) { if (nb_children == 0) { if (op == IDENTIFIER) { - binding = cgc_lookup_var(get_val_(IDENTIFIER, node), cgc_locals); - if (binding != 0) { - mov_reg_imm(reg_X, (cgc_fs - heap[binding+4]) * word_size); - add_reg_reg(reg_X, reg_SP); - push_reg(reg_X); - } else { - binding = cgc_lookup_var(get_val_(IDENTIFIER, node), cgc_globals); - if (binding != 0) { - mov_reg_imm(reg_X, heap[binding+4]); + binding = resolve_identifier(get_val_(IDENTIFIER, node)); + switch (binding_kind(binding)) { + case BINDING_PARAM_LOCAL: + case BINDING_VAR_LOCAL: + mov_reg_imm(reg_X, (cgc_fs - heap[binding+3]) * word_size); + add_reg_reg(reg_X, reg_SP); + push_reg(reg_X); + break; + case BINDING_VAR_GLOBAL: + mov_reg_imm(reg_X, heap[binding+3]); add_reg_reg(reg_X, reg_glo); push_reg(reg_X); - } else { + break; + default: fatal_error("codegen_lvalue: identifier not found"); - } + break; } - lvalue_width = type_width(heap[binding+5], true, true); + lvalue_width = type_width(heap[binding+4], true, true); } else { putstr("op="); putint(op); putchar('\n'); fatal_error("codegen_lvalue: unknown lvalue with nb_children == 0"); @@ -1121,7 +1134,6 @@ void codegen_rvalue(ast node) { int op = get_op(node); int nb_children = get_nb_children(node); int binding; - int ident; int lbl1, lbl2; int left_width; ast type1, type2; @@ -1138,36 +1150,36 @@ void codegen_rvalue(ast node) { mov_reg_imm(reg_X, get_val_(CHARACTER, node)); push_reg(reg_X); } else if (op == IDENTIFIER) { - ident = get_val_(IDENTIFIER, node); - binding = cgc_lookup_var(ident, cgc_locals); - if (binding != 0) { - mov_reg_imm(reg_X, (cgc_fs - heap[binding+4]) * word_size); - add_reg_reg(reg_X, reg_SP); - // local arrays/structs/unions are allocated on the stack, so no need to dereference - if (get_op(heap[binding+5]) != '[' && get_op(heap[binding+5]) != STRUCT_KW && get_op(heap[binding+5]) != UNION_KW) { - mov_reg_mem(reg_X, reg_X, 0); - } - push_reg(reg_X); - } else { - binding = cgc_lookup_var(ident, cgc_globals); - if (binding != 0) { - mov_reg_imm(reg_X, heap[binding+4]); + binding = resolve_identifier(get_val_(IDENTIFIER, node)); + switch (binding_kind(binding)) { + case BINDING_PARAM_LOCAL: + case BINDING_VAR_LOCAL: + mov_reg_imm(reg_X, (cgc_fs - heap[binding+3]) * word_size); + add_reg_reg(reg_X, reg_SP); + // local arrays/structs/unions are allocated on the stack, so no need to dereference + if (get_op(heap[binding+4]) != '[' && get_op(heap[binding+4]) != STRUCT_KW && get_op(heap[binding+4]) != UNION_KW) { + mov_reg_mem(reg_X, reg_X, 0); + } + push_reg(reg_X); + break; + case BINDING_VAR_GLOBAL: + mov_reg_imm(reg_X, heap[binding+3]); add_reg_reg(reg_X, reg_glo); // global arrays/structs/unions are also allocated on the stack, so no need to dereference - if (get_op(heap[binding+5]) != '[' && get_op(heap[binding+5]) != STRUCT_KW && get_op(heap[binding+5]) != UNION_KW) { + if (get_op(heap[binding+4]) != '[' && get_op(heap[binding+4]) != STRUCT_KW && get_op(heap[binding+4]) != UNION_KW) { mov_reg_mem(reg_X, reg_X, 0); } push_reg(reg_X); - } else { - binding = cgc_lookup_enum_value(ident, cgc_globals); - if (binding != 0) { - mov_reg_imm(reg_X, -get_val_(INTEGER, heap[binding+3])); - push_reg(reg_X); - } else { - putstr("ident = "); putstr(string_pool + probe_string(ident)); putchar('\n'); - fatal_error("codegen_rvalue: identifier not found"); - } - } + break; + case BINDING_ENUM_CST: + mov_reg_imm(reg_X, -get_val_(INTEGER, heap[binding+3])); + push_reg(reg_X); + break; + + default: + putstr("ident = "); putstr(string_pool + probe_string(get_val_(IDENTIFIER, node))); putchar('\n'); + fatal_error("codegen_rvalue: identifier not found"); + break; } } else if (op == STRING) { codegen_string(get_val_(STRING, node)); @@ -1506,15 +1518,8 @@ void codegen_glo_var_decl(ast node) { ast type = get_child_(DECL, node, 1); ast init = get_child_(DECL, node, 2); int name_probe = get_val_(IDENTIFIER, name); - int size; int binding = cgc_lookup_var(name_probe, cgc_globals); - if (get_op(type) == '[') { // Array declaration - size = get_child_('[', type, 0); - } else { - // All non-array types have size 1 - size = 1; - } if (get_op(type) == '(') { // Forward declaration binding = cgc_lookup_fun(name_probe, cgc_globals); @@ -1524,7 +1529,7 @@ void codegen_glo_var_decl(ast node) { handle_enum_struct_union_type_decl(type); if (binding == 0) { - cgc_add_global(name_probe, size, type_width(type, true, true), type); + cgc_add_global(name_probe, type_width(type, true, true), type); binding = cgc_globals; } @@ -1544,7 +1549,7 @@ void codegen_glo_var_decl(ast node) { pop_reg(reg_X); grow_fs(-1); - mov_mem_reg(reg_glo, heap[binding+4], reg_X); + mov_mem_reg(reg_glo, heap[binding+3], reg_X); jump(init_next_lbl); } From cb372cf8e9876e8cbcad7c78e55a1ac416dce468 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 31 Jan 2025 15:27:14 -0500 Subject: [PATCH 30/89] Support array/struct initializers in pnut-exe --- exe.c | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 193 insertions(+), 27 deletions(-) diff --git a/exe.c b/exe.c index 1dde5a66..14860f32 100644 --- a/exe.c +++ b/exe.c @@ -152,6 +152,18 @@ void copy_obj(int dst_base, int dst_offset, int src_base, int src_offset, int wi } } +// Initialize a memory location with a value +void initialize_memory(int val, int base, int offset, int width) { + int i; + mov_reg_imm(reg_Z, val); + for (i = 0; i < width / word_size; i += 1) { + mov_mem_reg(base, offset + i * word_size, reg_Z); + } + for (i = width - width % word_size; i < width; i += 1) { + mov_mem8_reg(base, offset + i, reg_Z); + } +} + int is_power_of_2(int n) { return n != 0 && (n & (n - 1)) == 0; } @@ -670,6 +682,7 @@ int resolve_identifier(int ident_probe) { putstr("ident = "); putstr(string_pool + probe_string(ident_probe)); putchar('\n'); fatal_error("identifier not found"); + return 0; } // Compute the type of an expression @@ -1513,6 +1526,174 @@ void handle_enum_struct_union_type_decl(ast type) { // If not an enum, struct, or union, do nothing } +void codegen_initializer_string(int string_probe, ast type, int base_reg, int offset) { + char *string_start = string_pool + heap[string_probe + 1]; + int i = 0; + int str_len = heap[string_probe + 4]; + int arr_len; + + // Only acceptable types are char[] or char* + if (get_op(type) == '[' && get_op(get_child_('[', type, 0)) == CHAR_KW) { + arr_len = get_child_('[', type, 1); + if (str_len >= arr_len) fatal_error("codegen_initializer: string initializer is too long for char[]"); + + // Place the bytes of the string in the memory location allocated for the array + for (; i < arr_len; i += 1) { + mov_reg_imm(reg_X, i < str_len ? string_start[i] : 0); + write_mem_location(base_reg, offset + i, reg_X, 1); + } + } else if (get_op(type) == '*' && get_op(get_child_('*', type, 1)) == CHAR_KW) { + // Create the string and assign global variable to the pointer + codegen_string(string_probe); + pop_reg(reg_X); + write_mem_location(base_reg, offset, reg_X, word_size); + } else { + fatal_error("codegen_initializer: string initializer must be assigned to a char[] or char*"); + } +} + +// Initialize a variable with an initializer +void codegen_initializer(bool local, ast init, ast type, int base_reg, int offset, bool in_array) { + ast members; + ast inner_type; + int arr_len; + int inner_type_width; + + type = canonicalize_type(type); + + // printf("codegen_initializer: init = %d, type = %d, in_array = %d offset = %d\n", get_op(init), get_op(type), in_array, offset); + + switch (get_op(init)) { + case STRING: + codegen_initializer_string(get_val_(STRING, init), type, base_reg, offset); + break; + + case INITIALIZER_LIST: + init = get_child_(INITIALIZER_LIST, init, 0); + // Acceptable types are: + // arrays + // structs + // union (if the initializer list has only one element) + // scalars (if the initializer list has only one element) + switch (get_op(type)) { + case '[': + inner_type = get_child_('[', type, 0); + arr_len = get_child_('[', type, 1); + inner_type_width = type_width(get_child_('[', type, 0), true, false); + + while (init != 0 && arr_len != 0) { + codegen_initializer(local, get_child_(',', init, 0), inner_type, base_reg, offset, true); + offset += inner_type_width; + init = get_child_opt_(',', ',', init, 1); + arr_len -= 1; // decrement the number of elements left to initialize to make sure we don't overflow + } + + if (init != 0) { + fatal_error("codegen_initializer: too many elements in initializer list"); + } + + // If there are still elements to initialize, set them to 0. + // If it's not a local variable, we don't need to initialize the + // memory since the stack is zeroed during setup. + if (local && arr_len > 0) initialize_memory(0, base_reg, offset, inner_type_width * arr_len); + break; + + case STRUCT_KW: + members = get_child_(STRUCT_KW, type, 2); + while (init != 0 && members != 0) { + inner_type = get_child_(DECL, get_child__(',', DECL, members, 0), 1); + codegen_initializer(local, get_child_(',', init, 0), inner_type, base_reg, offset, false); + offset += type_width(inner_type, true, true); + init = get_child_opt_(',', ',', init, 1); + members = get_child_opt_(',', ',', members, 1); + } + + // Initialize rest of the members to 0 + while (local && members != 0) { + inner_type = get_child_(DECL, get_child__(',', DECL, members, 0), 1); + initialize_memory(0, base_reg, offset, type_width(inner_type, true, true)); + offset += type_width(inner_type, true, true); + members = get_child_opt_(',', ',', members, 1); + } + break; + + case UNION_KW: + members = get_child_(STRUCT_KW, type, 2); + if (get_child_opt_(',', ',', init, 1) != 0) { + fatal_error("codegen_initializer: union initializer list has more than one element"); + } else if (members == 0) { + fatal_error("codegen_initializer: union has no members"); + } + codegen_initializer(local, get_child_(',', init, 0), get_child_(DECL, get_child__(',', DECL, members, 0), 1), base_reg, offset, false); + break; + + default: + if (get_child_opt_(',', ',', init, 1) != 0 // More than 1 element + || get_op(get_child_(',', init, 0)) == INITIALIZER_LIST) { // Or nested initializer list + fatal_error("codegen_initializer: scalar initializer list has more than one element"); + } + codegen_rvalue(get_child_(',', init, 0)); + pop_reg(reg_X); + grow_fs(-1); + write_mem_location(base_reg, offset, reg_X, type_width(type, true, in_array)); + break; + } + + break; + + default: + if (is_struct_or_union_type(type)) { + // Struct assignment, we copy the struct. + codegen_lvalue(init); + pop_reg(reg_X); + grow_fs(-1); + copy_obj(base_reg, offset, reg_X, 0, type_width(type, true, true)); + } else if (get_op(type) != '[') { + codegen_rvalue(init); + pop_reg(reg_X); + grow_fs(-1); + write_mem_location(base_reg, offset, reg_X, type_width(type, true, in_array)); + } else { + fatal_error("codegen_initializer: cannot initialize array with scalar value"); + } + break; + } +} + +// Return size of initializer. +// If it's an initializer list, return the number of elements +// If it's a string, return the length of the string and delimiter. +int initializer_size(ast initializer) { + int size = 0; + + switch (get_op(initializer)) { + case INITIALIZER_LIST: + initializer = get_child_(INITIALIZER_LIST, initializer, 0); + while (initializer != 0) { + size += 1; + initializer = get_child_opt_(',', ',', initializer, 1); + } + return size; + + case STRING: + return heap[get_val_(STRING, initializer) + 4] + 1; + + default: + fatal_error("initializer_size: unknown initializer"); + return -1; + } +} + +void infer_array_length(ast type, ast init) { + // Array declaration with no size + if (get_op(type) == '[' && get_child_('[', type, 1) == 0) { + if (init == 0) { + fatal_error("Array declaration with no size must have an initializer"); + } + set_child(type, 1, initializer_size(init)); + } +} + void codegen_glo_var_decl(ast node) { ast name = get_child__(DECL, IDENTIFIER, node, 0); ast type = get_child_(DECL, node, 1); @@ -1527,30 +1708,17 @@ void codegen_glo_var_decl(ast node) { } else { handle_enum_struct_union_type_decl(type); + infer_array_length(type, init); if (binding == 0) { cgc_add_global(name_probe, type_width(type, true, true), type); binding = cgc_globals; } - if (get_op(type) != '[') { // not array declaration - + if (init != 0) { def_label(init_next_lbl); init_next_lbl = alloc_label("init_next"); - - if (init != 0) { - codegen_rvalue(init); - } else { - xor_reg_reg(reg_X, reg_X); - push_reg(reg_X); - grow_fs(1); - } - - pop_reg(reg_X); - grow_fs(-1); - - mov_mem_reg(reg_glo, heap[binding+3], reg_X); - + codegen_initializer(false, init, type, reg_glo, heap[binding + 3], false); // heap[binding + 3] = offset jump(init_next_lbl); } } @@ -1562,23 +1730,21 @@ void codegen_local_var_decl(ast node) { ast init = get_child_(DECL, node, 2); int size; + infer_array_length(type, init); + if (is_aggregate_type(type)) { // Array/struct/union declaration - size = type_width(type, true, true); // size in bytes (word aligned) - grow_stack_bytes(size); - size /= word_size; // size in words + size = type_width(type, true, true) / word_size; // size in bytes (word aligned) } else { - // All non-array types are represented as a word, even if they are smaller - if (init != 0) { - codegen_rvalue(init); - grow_fs(-1); - } else { - xor_reg_reg(reg_X, reg_X); - push_reg(reg_X); - } size = 1; } cgc_add_local_var(get_val_(IDENTIFIER, name), size, type); + grow_stack(size); // Make room for the local variable + + if (init != 0) { + // offset (cgc_fs - heap[cgc_locals + 3]) should be 0 since we just allocated the space + codegen_initializer(true, init, type, reg_SP, 0, false); + } } void codegen_body(ast node) { From dd85b94a4bf2510926d1d3aeb0d36f3f1dbba078 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 31 Jan 2025 15:27:26 -0500 Subject: [PATCH 31/89] Add tests --- tests/_exe/initializers-global.c | 160 ++++++++++++++++++++++++++ tests/_exe/initializers-global.golden | 27 +++++ tests/_exe/initializers-local.c | 110 ++++++++++++++++++ tests/_exe/initializers-local.golden | 28 +++++ 4 files changed, 325 insertions(+) create mode 100644 tests/_exe/initializers-global.c create mode 100644 tests/_exe/initializers-global.golden create mode 100644 tests/_exe/initializers-local.c create mode 100644 tests/_exe/initializers-local.golden diff --git a/tests/_exe/initializers-global.c b/tests/_exe/initializers-global.c new file mode 100644 index 00000000..71fa5a66 --- /dev/null +++ b/tests/_exe/initializers-global.c @@ -0,0 +1,160 @@ +// Test initializers for global variables + +#include + +#ifdef PNUT_CC +// pnut does not support unsigned and we want gcc to use unsigned chars +#define unsigned +#endif + +struct S1 { + int a; + int b; +}; + +struct S2 { + struct S1 s[3]; + int b; +}; + +void putint_aux(int n) { + if (n <= -10) putint_aux(n / 10); + putchar('0' - (n % 10)); +} + +void putint(int n) { + if (n < 0) { + putchar('-'); + putint_aux(n); + } else { + putint_aux(-n); + } +} + +void print_chars(unsigned char* str, int len) { + int i = 0; + while (i < len) { + putchar('\''); putint(str[i]); putchar('\''); putchar(' '); + i++; + } +} + +void print_ints(int* arr, int len) { + int i = 0; + while (i < len) { + putint(arr[i]); putchar(' '); + i++; + } +} + +void print_s1(struct S1 s) { + putint(s.a); putchar(' '); putint(s.b); putchar('\n'); +} + +void print_s2(struct S2 s) { + int i = 0; + while (i < 3) { + print_s1(s.s[i]); + i++; + } + putint(s.b); putchar('\n'); +} + +// Copied from tccpp.c +#define TOK_LAND 0x90 +#define TOK_LOR 0x91 +#define TOK_EQ 0x94 +#define TOK_NE 0x95 +#define TOK_GE 0x9d +#define TOK_LE 0x9e + +#define TOK_DEC 0x80 /* -- */ +#define TOK_INC 0x82 /* ++ */ +#define TOK_SHL '<' /* shift left */ +#define TOK_SAR '>' /* signed shift right */ + +#define TOK_A_ADD 0xb0 +#define TOK_A_SUB 0xb1 +#define TOK_A_MUL 0xb2 +#define TOK_A_DIV 0xb3 +#define TOK_A_MOD 0xb4 +#define TOK_A_AND 0xb5 +#define TOK_A_OR 0xb6 +#define TOK_A_XOR 0xb7 +#define TOK_ARROW 0xa0 /* -> */ +#define TOK_TWODOTS 0xa2 /* C++ token ? */ +#define TOK_TWOSHARPS 0xa3 /* ## preprocessing token */ + +static const unsigned char tok_two_chars[] = { + '<','=', TOK_LE, + '>','=', TOK_GE, + '!','=', TOK_NE, + '&','&', TOK_LAND, + '|','|', TOK_LOR, + '+','+', TOK_INC, + '-','-', TOK_DEC, + '=','=', TOK_EQ, + '<','<', TOK_SHL, + '>','>', TOK_SAR, + '+','=', TOK_A_ADD, + '-','=', TOK_A_SUB, + '*','=', TOK_A_MUL, + '/','=', TOK_A_DIV, + '%','=', TOK_A_MOD, + '&','=', TOK_A_AND, + '^','=', TOK_A_XOR, + '|','=', TOK_A_OR, + '-','>', TOK_ARROW, + '.','.', TOK_TWODOTS, + '#','#', TOK_TWOSHARPS, + 0 +}; + +int arr[4] = {1, 2, 3, 4}; +int arr_partial[4] = {1, 0xcc}; // Rest should be 0 +int arr_inferred[] = {1, 2, 3, 4}; +int scalar = {42}; // This is a scalar, not an array, it should be + +char no_size_arr1[] = {1, 12343141, 3, 4, 12321}; +char no_size_arr2[] = "abcde"; + +struct S1 struct1 = { 1, 2 }; +struct S1 struct_partial = { 1, }; // Note trailing comma +struct S1 global_struct_scalar = { { 1 }, 2 }; +struct S1 inferred_struct[] = { { 4321, 123 }, { 12000, 110 } }; +struct S1 struct_arr[3] = { { 42, 39 }, { 32, 23 } }; + +struct S2 struct2 = { { { 123, 432131 }, { 4311, 53141 }, { 5311, 421313 } }, 2131321 }; +struct S2 struct2_partial1 = { { { 12321, 21321 } }, 421321431 }; // Nested array is partially initialized +struct S2 struct2_partial2 = { { { 231321, 4531321 } }, }; // Outer struct is partially initialized + +void test_global_initializers() { + print_chars(tok_two_chars, 64); putchar('\n'); + + print_ints(arr, 4); putchar('\n'); + print_ints(arr_partial, 4); putchar('\n'); + print_ints(arr_inferred, 4); putchar('\n'); + putint(scalar); putchar('\n'); + + print_chars(no_size_arr1, 5); putchar('\n'); + print_chars(no_size_arr2, 6); putchar('\n'); + + print_s1(struct1); + print_s1(struct_partial); + print_s1(global_struct_scalar); + print_s1(inferred_struct[0]); + print_s1(inferred_struct[1]); + print_s1(struct_arr[0]); + print_s1(struct_arr[1]); + print_s1(struct_arr[2]); + + print_s2(struct2); + print_s2(struct2_partial1); + print_s2(struct2_partial2); +} + +int main() { + test_global_initializers(); + + return 0; +} diff --git a/tests/_exe/initializers-global.golden b/tests/_exe/initializers-global.golden new file mode 100644 index 00000000..c69bf829 --- /dev/null +++ b/tests/_exe/initializers-global.golden @@ -0,0 +1,27 @@ +'60' '61' '158' '62' '61' '157' '33' '61' '149' '38' '38' '144' '124' '124' '145' '43' '43' '130' '45' '45' '128' '61' '61' '148' '60' '60' '60' '62' '62' '62' '43' '61' '176' '45' '61' '177' '42' '61' '178' '47' '61' '179' '37' '61' '180' '38' '61' '181' '94' '61' '183' '124' '61' '182' '45' '62' '160' '46' '46' '162' '35' '35' '163' '0' +1 2 3 4 +1 204 0 0 +1 2 3 4 +42 +'1' '101' '3' '4' '33' +'97' '98' '99' '100' '101' '0' +1 2 +1 0 +1 2 +4321 123 +12000 110 +42 39 +32 23 +0 0 +123 432131 +4311 53141 +5311 421313 +2131321 +12321 21321 +0 0 +0 0 +421321431 +231321 4531321 +0 0 +0 0 +0 diff --git a/tests/_exe/initializers-local.c b/tests/_exe/initializers-local.c new file mode 100644 index 00000000..20df3e35 --- /dev/null +++ b/tests/_exe/initializers-local.c @@ -0,0 +1,110 @@ + +#include + +struct S1 { + int a; + int b; +}; + +struct S2 { + struct S1 s[3]; + int b; +}; + +void putint_aux(int n) { + if (n <= -10) putint_aux(n / 10); + putchar('0' - (n % 10)); +} + +void putint(int n) { + if (n < 0) { + putchar('-'); + putint_aux(n); + } else { + putint_aux(-n); + } +} + +void print_chars(char* str, int len) { + int i = 0; + while (i < len) { + putchar('\''); putint(str[i]); putchar('\''); putchar(' '); + i++; + } +} + +void print_ints(int* arr, int len) { + int i = 0; + while (i < len) { + putint(arr[i]); putchar(' '); + i++; + } +} + +void print_s1(struct S1 s) { + putint(s.a); putchar(' '); putint(s.b); putchar('\n'); +} + +void print_s2(struct S2 s) { + int i = 0; + while (i < 3) { + print_s1(s.s[i]); + i++; + } + putint(s.b); putchar('\n'); +} + +void test_local_initializers() { + // I know assignment between 2 structs copies the fields from one to the other + // But what happens if assignment is inside rvalue? + struct S1 s1 = { 13, 29 }; + struct S1 s2 = s1; + + int arr[4] = {1, 2, 3, 4}; + int arr_partial[4] = {1, 0xcc}; // Rest should be 0 + int arr_inferred[] = {1, 2, 3, 4}; + int scalar = {42}; // This is a scalar, not an array, it should be + + char no_size_arr1[] = {1, 12343141, 3, 4, 12321}; + char no_size_arr2[] = "abcde"; + + struct S1 struct1 = { 1, 2 }; + struct S1 struct_partial = { 1, }; // Note trailing comma + struct S1 global_struct_scalar = { { 1 }, 2 }; + struct S1 inferred_struct[] = { { 4321, 123 }, { 12000, 110 } }; + struct S1 struct_arr[3] = { { 42, 39 }, { 32, 23 } }; + + struct S2 struct2 = { { { 123, 432131 }, { 4311, 53141 }, { 5311, 421313 } }, 2131321 }; + struct S2 struct2_partial1 = { { { 12321, 21321 } }, 421321431 }; // Nested array is partially initialized + struct S2 struct2_partial2 = { { { 231321, 4531321 } }, }; // Outer struct is partially initialized + + print_s1(s1); + print_s1(s2); + + print_ints(arr, 4); putchar('\n'); + print_ints(arr_partial, 4); putchar('\n'); + print_ints(arr_inferred, 4); putchar('\n'); + putint(scalar); putchar('\n'); + + print_chars(no_size_arr1, 5); putchar('\n'); + print_chars(no_size_arr2, 6); putchar('\n'); + + print_s1(struct1); + print_s1(struct_partial); + print_s1(global_struct_scalar); + print_s1(inferred_struct[0]); + print_s1(inferred_struct[1]); + print_s1(struct_arr[0]); + print_s1(struct_arr[1]); + print_s1(struct_arr[2]); + + print_s2(struct2); + print_s2(struct2_partial1); + print_s2(struct2_partial2); +} + +int main() { + test_local_initializers(); + + return 0; +} \ No newline at end of file diff --git a/tests/_exe/initializers-local.golden b/tests/_exe/initializers-local.golden new file mode 100644 index 00000000..db9ccc4e --- /dev/null +++ b/tests/_exe/initializers-local.golden @@ -0,0 +1,28 @@ +13 29 +13 29 +1 2 3 4 +1 204 0 0 +1 2 3 4 +42 +'1' '101' '3' '4' '33' +'97' '98' '99' '100' '101' '0' +1 2 +1 0 +1 2 +4321 123 +12000 110 +42 39 +32 23 +0 0 +123 432131 +4311 53141 +5311 421313 +2131321 +12321 21321 +0 0 +0 0 +421321431 +231321 4531321 +0 0 +0 0 +0 From cda58e0eb2c4497b4d00336bbe2e21075e767a24 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 31 Jan 2025 15:40:01 -0500 Subject: [PATCH 32/89] Don't treat empty fun decls as forward decl --- exe.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/exe.c b/exe.c index 14860f32..cebaa6a2 100644 --- a/exe.c +++ b/exe.c @@ -2020,7 +2020,6 @@ void codegen_glo_fun_decl(ast node) { ast fun_type = get_child__(DECL, '(', decl, 1); ast params = get_child_opt_('(', ',', fun_type, 1); ast fun_return_type = get_child_('(', fun_type, 0); - int lbl; int binding; int save_locals_fun = cgc_locals_fun; @@ -2037,29 +2036,24 @@ void codegen_glo_fun_decl(ast node) { binding = cgc_lookup_fun(name_probe, cgc_globals); if (binding == 0) { - lbl = alloc_label(STRING_BUF(name_probe)); - cgc_add_global_fun(name_probe, lbl, fun_type); + cgc_add_global_fun(name_probe, alloc_label(STRING_BUF(name_probe)), fun_type); binding = cgc_globals; } - if (body != 0) { // 0 is empty body - - lbl = heap[binding+4]; + def_label(heap[binding+4]); - def_label(lbl); + cgc_fs = -1; // space for return address + cgc_locals = 0; + add_params(params); + cgc_fs = 0; - cgc_fs = -1; // space for return address - cgc_locals = 0; - add_params(params); - cgc_fs = 0; + codegen_body(body); - codegen_body(body); + grow_stack(-cgc_fs); + cgc_fs = 0; - grow_stack(-cgc_fs); - cgc_fs = 0; + ret(); - ret(); - } cgc_locals_fun = save_locals_fun; } From a1f65dbe636539f57fc4126ee3123cc4a93d308f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 31 Jan 2025 19:20:29 -0500 Subject: [PATCH 33/89] Simplify unpack_escaped_string, update examples --- examples/compiled/base64.sh | 14 +++++++++----- examples/compiled/c4.sh | 14 +++++++++----- examples/compiled/repl.sh | 14 +++++++++----- examples/compiled/sha256sum.sh | 14 +++++++++----- sh-runtime.c | 3 +-- 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/examples/compiled/base64.sh b/examples/compiled/base64.sh index 0426e1db..5ec7b154 100755 --- a/examples/compiled/base64.sh +++ b/examples/compiled/base64.sh @@ -14,11 +14,12 @@ defarr() { _malloc $1 $2; } defarr _buf 1024 -unpack_escaped_string() { +unpack_escaped_string() { # $1 = string, $2 = size (optional) __buf="$1" # Allocates enough space for all characters, assuming that no character is escaped - _malloc __addr $((${#__buf} + 1)) + _malloc __addr $((${2:-${#__buf} + 1})) __ptr=$__addr + __end=$((__ptr + ${2:-${#__buf} + 1})) # End of allocated memory while [ -n "$__buf" ] ; do case "$__buf" in '\'*) @@ -49,16 +50,19 @@ unpack_escaped_string() { : $((_$__ptr = __c)) : $((__ptr += 1)) done - : $((_$__ptr = 0)) + while [ $__ptr -le $__end ]; do + : $((_$__ptr = 0)) + : $((__ptr += 1)) + done } # Define a string, and return a reference to it in the varible taken as argument. # If the variable is already defined, this function does nothing. # Note that it's up to the caller to ensure that no 2 strings share the same variable. -defstr() { # $1 = variable name, $2 = string +defstr() { # $1 = variable name, $2 = string, $3 = size (optional) set +u # Necessary to allow the variable to be empty if [ $(($1)) -eq 0 ]; then - unpack_escaped_string "$2" + unpack_escaped_string "$2" $3 : $(($1 = __addr)) fi set -u diff --git a/examples/compiled/c4.sh b/examples/compiled/c4.sh index 2308fc3b..51389321 100755 --- a/examples/compiled/c4.sh +++ b/examples/compiled/c4.sh @@ -1374,11 +1374,12 @@ next_sub_buffer() { fi fi } -unpack_escaped_string() { +unpack_escaped_string() { # $1 = string, $2 = size (optional) __buf="$1" # Allocates enough space for all characters, assuming that no character is escaped - _malloc __addr $((${#__buf} + 1)) + _malloc __addr $((${2:-${#__buf} + 1})) __ptr=$__addr + __end=$((__ptr + ${2:-${#__buf} + 1})) # End of allocated memory __us_buf16= __us_buf256= while [ ! -z "$__buf" ] || [ ! -z "$__us_buf256" ] ; do @@ -1415,15 +1416,18 @@ unpack_escaped_string() { : $((__ptr += 1)) done done - : $((_$__ptr = 0)) + while [ $__ptr -le $__end ]; do + : $((_$__ptr = 0)) + : $((__ptr += 1)) + done } # Define a string, and return a reference to it in the varible taken as argument. # If the variable is already defined, this function does nothing. # Note that it's up to the caller to ensure that no 2 strings share the same variable. -defstr() { # $1 = variable name, $2 = string +defstr() { # $1 = variable name, $2 = string, $3 = size (optional) if [ $(($1)) -eq 0 ]; then - unpack_escaped_string "$2" + unpack_escaped_string "$2" $3 : $(($1 = __addr)) fi } diff --git a/examples/compiled/repl.sh b/examples/compiled/repl.sh index 6dc40100..bd580762 100755 --- a/examples/compiled/repl.sh +++ b/examples/compiled/repl.sh @@ -135,11 +135,12 @@ next_sub_buffer() { fi fi } -unpack_escaped_string() { +unpack_escaped_string() { # $1 = string, $2 = size (optional) __buf="$1" # Allocates enough space for all characters, assuming that no character is escaped - _malloc __addr $((${#__buf} + 1)) + _malloc __addr $((${2:-${#__buf} + 1})) __ptr=$__addr + __end=$((__ptr + ${2:-${#__buf} + 1})) # End of allocated memory __us_buf16= __us_buf256= while [ ! -z "$__buf" ] || [ ! -z "$__us_buf256" ] ; do @@ -176,16 +177,19 @@ unpack_escaped_string() { : $((__ptr += 1)) done done - : $((_$__ptr = 0)) + while [ $__ptr -le $__end ]; do + : $((_$__ptr = 0)) + : $((__ptr += 1)) + done } # Define a string, and return a reference to it in the varible taken as argument. # If the variable is already defined, this function does nothing. # Note that it's up to the caller to ensure that no 2 strings share the same variable. -defstr() { # $1 = variable name, $2 = string +defstr() { # $1 = variable name, $2 = string, $3 = size (optional) set +u # Necessary to allow the variable to be empty if [ $(($1)) -eq 0 ]; then - unpack_escaped_string "$2" + unpack_escaped_string "$2" $3 : $(($1 = __addr)) fi set -u diff --git a/examples/compiled/sha256sum.sh b/examples/compiled/sha256sum.sh index 9ced31d5..5d6fe551 100755 --- a/examples/compiled/sha256sum.sh +++ b/examples/compiled/sha256sum.sh @@ -235,11 +235,12 @@ _main() { let argc $2; let myargv $3 readonly __NEWLINE__=10 # Runtime library -unpack_escaped_string() { +unpack_escaped_string() { # $1 = string, $2 = size (optional) __buf="$1" # Allocates enough space for all characters, assuming that no character is escaped - _malloc __addr $((${#__buf} + 1)) + _malloc __addr $((${2:-${#__buf} + 1})) __ptr=$__addr + __end=$((__ptr + ${2:-${#__buf} + 1})) # End of allocated memory while [ -n "$__buf" ] ; do case "$__buf" in '\'*) @@ -270,16 +271,19 @@ unpack_escaped_string() { : $((_$__ptr = __c)) : $((__ptr += 1)) done - : $((_$__ptr = 0)) + while [ $__ptr -le $__end ]; do + : $((_$__ptr = 0)) + : $((__ptr += 1)) + done } # Define a string, and return a reference to it in the varible taken as argument. # If the variable is already defined, this function does nothing. # Note that it's up to the caller to ensure that no 2 strings share the same variable. -defstr() { # $1 = variable name, $2 = string +defstr() { # $1 = variable name, $2 = string, $3 = size (optional) set +u # Necessary to allow the variable to be empty if [ $(($1)) -eq 0 ]; then - unpack_escaped_string "$2" + unpack_escaped_string "$2" $3 : $(($1 = __addr)) fi set -u diff --git a/sh-runtime.c b/sh-runtime.c index 252ddc18..a7d4875b 100644 --- a/sh-runtime.c +++ b/sh-runtime.c @@ -456,11 +456,10 @@ DEPENDS_ON(char_to_int) #endif putstr("unpack_escaped_string() { # $1 = string, $2 = size (optional)\n"); putstr(" __buf=\"$1\"\n"); - putstr(" __len=${2:-${#__buf}}\n"); putstr(" # Allocates enough space for all characters, assuming that no character is escaped\n"); putstr(" _malloc __addr $((${2:-${#__buf} + 1}))\n"); putstr(" __ptr=$__addr\n"); - putstr(" __end=$((__ptr + __len))\n"); + putstr(" __end=$((__ptr + ${2:-${#__buf} + 1})) # End of allocated memory\n"); #ifdef OPTIMIZE_LONG_LINES putstr(" __us_buf16=\n"); putstr(" __us_buf256=\n"); From 10136b6651ffe27cb4e92b37881ebb3a2c198c83 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 31 Jan 2025 20:17:30 -0500 Subject: [PATCH 34/89] Write characters of array as bytes --- exe.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/exe.c b/exe.c index cebaa6a2..1bb3b742 100644 --- a/exe.c +++ b/exe.c @@ -1561,8 +1561,6 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse type = canonicalize_type(type); - // printf("codegen_initializer: init = %d, type = %d, in_array = %d offset = %d\n", get_op(init), get_op(type), in_array, offset); - switch (get_op(init)) { case STRING: codegen_initializer_string(get_val_(STRING, init), type, base_reg, offset); @@ -1635,7 +1633,7 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse codegen_rvalue(get_child_(',', init, 0)); pop_reg(reg_X); grow_fs(-1); - write_mem_location(base_reg, offset, reg_X, type_width(type, true, in_array)); + write_mem_location(base_reg, offset, reg_X, type_width(type, true, !in_array)); break; } @@ -1652,7 +1650,7 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse codegen_rvalue(init); pop_reg(reg_X); grow_fs(-1); - write_mem_location(base_reg, offset, reg_X, type_width(type, true, in_array)); + write_mem_location(base_reg, offset, reg_X, type_width(type, true, !in_array)); } else { fatal_error("codegen_initializer: cannot initialize array with scalar value"); } From 4c1ccd70a5e2a30fd79e97a43456a1734abaff1a Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 12:47:05 -0500 Subject: [PATCH 35/89] Define PNUT_ARCH macros depending on comp target --- pnut.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pnut.c b/pnut.c index 8b4a8204..01757ed2 100644 --- a/pnut.c +++ b/pnut.c @@ -1514,6 +1514,20 @@ void init_builtin_int_macro(int macro_id, int value) { void init_pnut_macros() { init_ident(MACRO, "PNUT_CC"); + +#if defined(sh) + init_ident(MACRO, "PNUT_SH"); +#elif defined(target_i386_linux) + init_ident(MACRO, "PNUT_I386"); + init_ident(MACRO, "PNUT_I386_LINUX"); +#elif defined (target_x86_64_linux) + init_ident(MACRO, "PNUT_X86_64"); + init_ident(MACRO, "PNUT_X86_64_LINUX"); +#elif defined (target_x86_64_mac) + init_ident(MACRO, "PNUT_X86_64"); + init_ident(MACRO, "PNUT_X86_64_MAC"); +#endif + FILE__ID = init_ident(MACRO, "__FILE__"); LINE__ID = init_ident(MACRO, "__LINE__"); DATE__ID = init_ident(MACRO, "__DATE__"); From 2d3750fd4e7d297c76239d55198325ddba2a37f3 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 12:47:45 -0500 Subject: [PATCH 36/89] Add undo_token helper function --- pnut.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pnut.c b/pnut.c index 01757ed2..9d7e19bf 100644 --- a/pnut.c +++ b/pnut.c @@ -1649,6 +1649,14 @@ void play_macro(int tokens, int args) { } } +// Undoes the effect of get_tok by replacing the current token with the previous +// token and saving the current token to be returned by the next call to get_tok. +void undo_token(int prev_tok, int prev_val) { + play_macro(cons(cons(tok, val), 0), 0); // Push the current token back + tok = prev_tok; + val = prev_val; +} + // Try to expand a macro. // If a function-like macro is not called with (), it is not expanded and the identifier is returned as is. // If the wrong number of arguments is passed to a function-like macro, a fatal error is raised. @@ -1679,9 +1687,7 @@ bool attempt_macro_expansion(int macro) { // There was no argument list, i.e. not a function-like macro call even though it is a function-like macro if (new_macro_args == -1) { // get_macro_args_toks looked at the next token so we need to save it - play_macro(cons(cons(tok, val), 0), 0); - tok = IDENTIFIER; - val = macro; + undo_token(IDENTIFIER, macro); return false; } else { play_macro(tokens, new_macro_args); @@ -3036,7 +3042,6 @@ ast parse_unary_expression() { } ast parse_cast_expression() { - int tokens = 0; ast result; ast type; @@ -3064,10 +3069,7 @@ ast parse_cast_expression() { return result; } else { // We need to put the current token and '(' back on the token stream. - tokens = cons(cons(tok, val), 0); - play_macro(tokens, 0); - tok = '('; - val = 0; + undo_token('(', 0); } } From ea9f9822b075d1fe3cea778e65262145972f57f3 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 12:48:09 -0500 Subject: [PATCH 37/89] Parse `sizeof (expr)` correctly When looking at the C grammar, it looks like sizeof has 2 production rules (`sizeof (type)` and `sizeof expr`) that can be distinguished by the presence of parenthesis. When a parenthesis is present, it must be followed by a type, and no parenthesis means it must be followed by an expression. However, this is not true since expressions can be parenthesised meaning that the 2 rules can be differentiated by considering if the token that follows the parenthesis is part of a type. --- pnut.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pnut.c b/pnut.c index 9d7e19bf..0a987241 100644 --- a/pnut.c +++ b/pnut.c @@ -3011,8 +3011,16 @@ ast parse_unary_expression() { get_tok(); if (tok == '(') { get_tok(); + // May be a type or an expression + if (is_type_starter(tok)) { result = parse_declarator(true, parse_declaration_specifiers()); expect_tok(')'); + } else { + // We need to put the current token and '(' back on the token stream. + // Otherwise, sizeof (cast_expression) fails to parse. + undo_token('(', 0); + result = parse_unary_expression(); + } } else { result = parse_unary_expression(); } From 75b852cabf46b82cb9748cba7f5c77c339431aae Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 12:54:21 -0500 Subject: [PATCH 38/89] Add _exe test for sizeof --- tests/_exe/sizeof.c | 42 ++++++++++++++++++++++++++++++++++++++++ tests/_exe/sizeof.golden | 9 +++++++++ 2 files changed, 51 insertions(+) create mode 100644 tests/_exe/sizeof.c create mode 100644 tests/_exe/sizeof.golden diff --git a/tests/_exe/sizeof.c b/tests/_exe/sizeof.c new file mode 100644 index 00000000..d65831ea --- /dev/null +++ b/tests/_exe/sizeof.c @@ -0,0 +1,42 @@ +#include + +// The testing infrastructure doesn't distinguish between 32 bit and 64 bit +// tests, so we need to divide the size by 2 in the 64 bit case. + +#if defined (PNUT_I386) +#define MUL 1 +#elif defined (PNUT_X86_64) +#define MUL 2 +#else +#define MUL 2 +#endif + +void putint_aux(int n) { + if (n <= -10) putint_aux(n / 10); + putchar('0' - (n % 10)); +} + +void putint(int n) { + if (n < 0) { + putchar('-'); + putint_aux(n); + } else { + putint_aux(-n); + } +} + +int main() { + int a[10] = { 0, 1, 2, 3 }; + int b[] = { 0, 1, 2, 3 }; // Infer size from initializer (4 elements) + putint(sizeof(char)); putchar('\n'); // No division needed because sizeof(char) is always 1 + putint(sizeof(int) / MUL); putchar('\n'); + putint(sizeof(int[10]) / MUL); putchar('\n'); + putint(sizeof(int[10][2]) / MUL); putchar('\n'); + putint(sizeof(a) / MUL); putchar('\n'); // sizeof (expr) + putint(sizeof a / MUL); putchar('\n'); // sizeof expr + putint(sizeof(b) / MUL); putchar('\n'); // sizeof (expr) + putint(sizeof b / MUL); putchar('\n'); // sizeof expr + putint(sizeof((void *) a) / MUL); putchar('\n'); // sizeof (cast_expr) + + return 0; +} diff --git a/tests/_exe/sizeof.golden b/tests/_exe/sizeof.golden new file mode 100644 index 00000000..4c9e599a --- /dev/null +++ b/tests/_exe/sizeof.golden @@ -0,0 +1,9 @@ +1 +4 +40 +80 +40 +40 +16 +16 +4 From 6a830bb38f3c35cd63e3d7f20bf97d82e9bc3372 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 12:54:31 -0500 Subject: [PATCH 39/89] Fix abstract declarator parser When an abstract declarator reaches its terminal case, it must still parse the postfix part of the declarator for arrays and functions. Otherwise, types such as int[10] cannot be parsed. --- pnut.c | 1 - 1 file changed, 1 deletion(-) diff --git a/pnut.c b/pnut.c index 0a987241..a862b391 100644 --- a/pnut.c +++ b/pnut.c @@ -2670,7 +2670,6 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { } else { parse_error("Invalid declarator, expected an identifier but declarator doesn't have one", tok); } - return result; } // At this point, the only non-recursive declarator is an identifier From 6eebea96958c4f7bc7f94f1827997f3adcb1e5a6 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 13:06:56 -0500 Subject: [PATCH 40/89] Add HANDLE_SIGNALS option to help debug tokenizer. --- debug.c | 13 +++++++++++++ pnut.c | 6 +++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/debug.c b/debug.c index a0586dbd..b23b5ac8 100644 --- a/debug.c +++ b/debug.c @@ -1,3 +1,16 @@ + +#ifdef HANDLE_SIGNALS +#include + +void signal_callback_handler(int signum) { + if (signum == SIGINT){ + printf("Caught signal %d\n",signum); + printf("Tokenizer at %s:%d:%d\n", fp_filepath, line_number, column_number); + exit(1); + } +} +#endif + void print_string_char(int c) { if (c == 7) putstr("\\a"); else if (c == 8) putstr("\\b"); diff --git a/pnut.c b/pnut.c index a862b391..8dd213a6 100644 --- a/pnut.c +++ b/pnut.c @@ -2202,7 +2202,7 @@ void get_tok() { } // parser -#if defined DEBUG_CPP || defined DEBUG_EXPAND_INCLUDES || defined NICE_ERR_MSG +#if defined DEBUG_CPP || defined DEBUG_EXPAND_INCLUDES || defined NICE_ERR_MSG || defined HANDLE_SIGNALS #include "debug.c" #endif @@ -3530,6 +3530,10 @@ int main(int argc, char **argv) { int i; ast decl; +#ifdef HANDLE_SIGNALS + signal(SIGINT, signal_callback_handler); +#endif + init_ident_table(); init_pnut_macros(); From 0b80d7000984c7547129658941a8830c89c39624 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 1 Feb 2025 19:04:26 -0500 Subject: [PATCH 41/89] Prevent expansion of self-referencing macros It's not perfect, since this code expands to A(1 + 1 + x) # define x 1 + x # define A(a1) B(a1) # define B(a1) A(a1) A(x) --- pnut.c | 124 +++--- .../_all/preprocessor/macro/recursion-limit.c | 412 +++++++++--------- .../preprocessor/macro/recursion-limit.golden | 1 - 3 files changed, 275 insertions(+), 262 deletions(-) diff --git a/pnut.c b/pnut.c index 8dd213a6..0d6d8706 100644 --- a/pnut.c +++ b/pnut.c @@ -624,9 +624,11 @@ int probe_string(int probe) { return heap[probe+1]; // return the start of the string } +#define expect_tok(expected_tok) expect_tok_(expected_tok, __FILE__, __LINE__) + void get_tok(); void get_ident(); -void expect_tok(int expected); +void expect_tok_(int expected_tok, char* file, int line); #define IFDEF_DEPTH_MAX 20 bool if_macro_stack[IFDEF_DEPTH_MAX]; // Stack of if macro states @@ -644,12 +646,13 @@ bool expand_macro_arg = true; // Don't produce newline tokens. Used when reading the tokens of a macro definition. bool skip_newlines = true; -#define MACRO_RECURSION_MAX 100 +#define MACRO_RECURSION_MAX 180 // Supports up to 60 (180 / 3) nested macro expansions. int macro_stack[MACRO_RECURSION_MAX]; int macro_stack_ix = 0; int macro_tok_lst = 0; // Current list of tokens to replay for the macro being expanded int macro_args = 0; // Current list of arguments for the macro being expanded +int macro_ident = 0; // The identifier of the macro being expanded (if any) int macro_args_count; // Number of arguments for the current macro being expanded bool paste_last_token = false; // Whether the last token was a ## or not @@ -1584,14 +1587,7 @@ void check_macro_arity(int macro_args_count, int macro) { int get_macro_args_toks(int macro) { int args = 0; int macro_args_count = 0; - bool prev_is_comma = false; - get_tok_macro_expand(); // Skip the macro identifier - - if (tok != '(') { // Function-like macro with 0 arguments - check_macro_arity(macro_args_count, macro); - return -1; // No arguments - } - + bool prev_is_comma = tok == ','; get_tok_macro_expand(); // Skip '(' while (tok != ')' && tok != EOF) { @@ -1633,65 +1629,89 @@ int get_macro_arg(int ix) { return car(arg); } -void play_macro(int tokens, int args) { - if (tokens != 0) { - if (macro_tok_lst != 0) { - if (macro_stack_ix + 2 >= MACRO_RECURSION_MAX) { - syntax_error("Macro recursion depth exceeded."); - } - macro_stack[macro_stack_ix] = macro_tok_lst; - macro_stack[macro_stack_ix + 1] = macro_args; - macro_stack_ix += 2; - } +// "Pops" the current macro expansion and restores the previous macro expansion context. +// This is done when the current macro expansion is done. +void return_to_parent_macro() { + if (macro_stack_ix == 0) fatal_error("return_to_parent_macro: no parent macro"); + + macro_stack_ix -= 3; + macro_tok_lst = macro_stack[macro_stack_ix]; + macro_args = macro_stack[macro_stack_ix + 1]; + macro_ident = macro_stack[macro_stack_ix + 2]; +} + +// Begins a new macro expansion context, saving the current context onn the macro stack. +// Takes as argument the name of the macro, the tokens to be expanded and the arguments. +void begin_macro_expansion(int ident, int tokens, int args) { + if (macro_stack_ix + 3 >= MACRO_RECURSION_MAX) { + fatal_error("Macro recursion depth exceeded."); + } + + macro_stack[macro_stack_ix] = macro_tok_lst; + macro_stack[macro_stack_ix + 1] = macro_args; + macro_stack[macro_stack_ix + 2] = macro_ident; + macro_stack_ix += 3; + + macro_ident = ident; + macro_tok_lst = tokens; + macro_args = args; +} + +// Search the macro stack to see if the macro is already expanding. +bool macro_is_already_expanding(int ident) { + int i = macro_stack_ix; + if (ident == 0 || macro_ident == 0) return false; // Unnamed macro or no macro is expanding + if (ident == macro_ident) return true; // The same macro is already expanding - macro_tok_lst = tokens; - macro_args = args; + // Traverse the stack to see if the macro is already expanding + while (i > 0) { + i -= 3; + if (macro_stack[i + 2] == ident) return true; } + return false; } // Undoes the effect of get_tok by replacing the current token with the previous // token and saving the current token to be returned by the next call to get_tok. void undo_token(int prev_tok, int prev_val) { - play_macro(cons(cons(tok, val), 0), 0); // Push the current token back + begin_macro_expansion(0, cons(cons(tok, val), 0), 0); // Push the current token back tok = prev_tok; val = prev_val; } -// Try to expand a macro. -// If a function-like macro is not called with (), it is not expanded and the identifier is returned as is. +// Try to expand a macro and returns if the macro was expanded. +// A macro is not expanded if it is already expanding or if it's a function-like +// macro that is not called with parenthesis. In that case, the macro identifier +// is returned as a normal identifier. // If the wrong number of arguments is passed to a function-like macro, a fatal error is raised. -// For object like macros, the macro tokens are played back without any other parsing. -// Returns 1 if the macro was expanded, 0 otherwise. bool attempt_macro_expansion(int macro) { - int new_macro_args; // We must save the tokens because the macro may be redefined while reading the arguments int tokens = car(heap[macro + 3]); - macro = val; - if (cdr(heap[macro + 3]) == -1) { // Object-like macro - // Note: Redefining __{FILE,LINE}__ macros, either with the #define or #line - // directives is not supported. + + if (macro_is_already_expanding(macro)) { // Self referencing macro + tok = IDENTIFIER; + val = macro; + return false; + } else if (cdr(heap[macro + 3]) == -1) { // Object-like macro + // Note: Redefining __{FILE,LINE}__ macros, either with the #define or #line directives is not supported. if (macro == FILE__ID) { - play_macro(cons(cons(STRING, intern_str(fp_filepath)), 0), 0); + tokens = cons(cons(STRING, intern_str(fp_filepath)), 0); } #ifdef INCLUDE_LINE_NUMBER_ON_ERROR else if (macro == LINE__ID) { - play_macro(cons(cons(INTEGER, -line_number), 0), 0); + tokens = cons(cons(INTEGER, -line_number), 0); } #endif - else { - play_macro(tokens, 0); - } + begin_macro_expansion(macro, tokens, 0); return true; - } else { - new_macro_args = get_macro_args_toks(macro); - // There was no argument list, i.e. not a function-like macro call even though it is a function-like macro - if (new_macro_args == -1) { - // get_macro_args_toks looked at the next token so we need to save it + } else { // Function-like macro + expect_tok(MACRO); // Skip macro identifier + if (tok == '(') { + begin_macro_expansion(macro, tokens, get_macro_args_toks(macro)); + return true; + } else { undo_token(IDENTIFIER, macro); return false; - } else { - play_macro(tokens, new_macro_args); - return true; } } } @@ -1741,7 +1761,7 @@ void paste_tokens(int left_tok, int left_val) { val = left_val; return; } else { - play_macro(get_macro_arg(val), 0); // Play the tokens of the macro argument + begin_macro_expansion(0, get_macro_arg(val), 0); // Play the tokens of the macro argument get_tok_macro(); } } @@ -1838,9 +1858,7 @@ void get_tok() { // checked by read_macro_tokens. syntax_error("## cannot appear at the end of a macro expansion"); } - macro_stack_ix -= 2; - macro_tok_lst = macro_stack[macro_stack_ix]; - macro_args = macro_stack[macro_stack_ix + 1]; + return_to_parent_macro(); paste_last_token = false; // We are done pasting paste_tokens(tok, val); } @@ -1851,7 +1869,7 @@ void get_tok() { } break; } else if (tok == MACRO_ARG && expand_macro_arg) { - play_macro(get_macro_arg(val), 0); // Play the tokens of the macro argument + begin_macro_expansion(0, get_macro_arg(val), 0); // Play the tokens of the macro argument continue; } else if (tok == '#') { // Stringizing! stringify(); @@ -1859,9 +1877,7 @@ void get_tok() { } break; } else if (macro_stack_ix != 0) { - macro_stack_ix -= 2; - macro_tok_lst = macro_stack[macro_stack_ix]; - macro_args = macro_stack[macro_stack_ix + 1]; + return_to_parent_macro(); continue; } else if (ch <= ' ') { @@ -2272,8 +2288,6 @@ void expect_tok_(int expected_tok, char* file, int line) { get_tok(); } -#define expect_tok(expected_tok) expect_tok_(expected_tok, __FILE__, __LINE__) - ast parse_comma_expression(); ast parse_cast_expression(); ast parse_compound_statement(); diff --git a/tests/_all/preprocessor/macro/recursion-limit.c b/tests/_all/preprocessor/macro/recursion-limit.c index 3dad12f4..b64dd36a 100644 --- a/tests/_all/preprocessor/macro/recursion-limit.c +++ b/tests/_all/preprocessor/macro/recursion-limit.c @@ -3,209 +3,211 @@ // putchar #include -// Generally, the recursion limit for macros is 100 / 2 as defined by MACRO_RECURSION_MAX. -// However, when the macro is in "tail position", there's no limit on nesting depth. -// Testing with a chain of 200 macros to test this "tail call optimization". -#define A1 1 -#define A2 A1 -#define A3 A2 -#define A4 A3 -#define A5 A4 -#define A6 A5 -#define A7 A6 -#define A8 A7 -#define A9 A8 -#define A10 A9 -#define A11 A10 -#define A12 A11 -#define A13 A12 -#define A14 A13 -#define A15 A14 -#define A16 A15 -#define A17 A16 -#define A18 A17 -#define A19 A18 -#define A20 A19 -#define A21 A20 -#define A22 A21 -#define A23 A22 -#define A24 A23 -#define A25 A24 -#define A26 A25 -#define A27 A26 -#define A28 A27 -#define A29 A28 -#define A30 A29 -#define A31 A30 -#define A32 A31 -#define A33 A32 -#define A34 A33 -#define A35 A34 -#define A36 A35 -#define A37 A36 -#define A38 A37 -#define A39 A38 -#define A40 A39 -#define A41 A40 -#define A42 A41 -#define A43 A42 -#define A44 A43 -#define A45 A44 -#define A46 A45 -#define A47 A46 -#define A48 A47 -#define A49 A48 -#define A50 A49 -#define A51 A50 -#define A52 A51 -#define A53 A52 -#define A54 A53 -#define A55 A54 -#define A56 A55 -#define A57 A56 -#define A58 A57 -#define A59 A58 -#define A60 A59 -#define A61 A60 -#define A62 A61 -#define A63 A62 -#define A64 A63 -#define A65 A64 -#define A66 A65 -#define A67 A66 -#define A68 A67 -#define A69 A68 -#define A70 A69 -#define A71 A70 -#define A72 A71 -#define A73 A72 -#define A74 A73 -#define A75 A74 -#define A76 A75 -#define A77 A76 -#define A78 A77 -#define A79 A78 -#define A80 A79 -#define A81 A80 -#define A82 A81 -#define A83 A82 -#define A84 A83 -#define A85 A84 -#define A86 A85 -#define A87 A86 -#define A88 A87 -#define A89 A88 -#define A90 A89 -#define A91 A90 -#define A92 A91 -#define A93 A92 -#define A94 A93 -#define A95 A94 -#define A96 A95 -#define A97 A96 -#define A98 A97 -#define A99 A98 -#define A100 A99 -#define A101 A100 -#define A102 A101 -#define A103 A102 -#define A104 A103 -#define A105 A104 -#define A106 A105 -#define A107 A106 -#define A108 A107 -#define A109 A108 -#define A110 A109 -#define A111 A110 -#define A112 A111 -#define A113 A112 -#define A114 A113 -#define A115 A114 -#define A116 A115 -#define A117 A116 -#define A118 A117 -#define A119 A118 -#define A120 A119 -#define A121 A120 -#define A122 A121 -#define A123 A122 -#define A124 A123 -#define A125 A124 -#define A126 A125 -#define A127 A126 -#define A128 A127 -#define A129 A128 -#define A130 A129 -#define A131 A130 -#define A132 A131 -#define A133 A132 -#define A134 A133 -#define A135 A134 -#define A136 A135 -#define A137 A136 -#define A138 A137 -#define A139 A138 -#define A140 A139 -#define A141 A140 -#define A142 A141 -#define A143 A142 -#define A144 A143 -#define A145 A144 -#define A146 A145 -#define A147 A146 -#define A148 A147 -#define A149 A148 -#define A150 A149 -#define A151 A150 -#define A152 A151 -#define A153 A152 -#define A154 A153 -#define A155 A154 -#define A156 A155 -#define A157 A156 -#define A158 A157 -#define A159 A158 -#define A160 A159 -#define A161 A160 -#define A162 A161 -#define A163 A162 -#define A164 A163 -#define A165 A164 -#define A166 A165 -#define A167 A166 -#define A168 A167 -#define A169 A168 -#define A170 A169 -#define A171 A170 -#define A172 A171 -#define A173 A172 -#define A174 A173 -#define A175 A174 -#define A176 A175 -#define A177 A176 -#define A178 A177 -#define A179 A178 -#define A180 A179 -#define A181 A180 -#define A182 A181 -#define A183 A182 -#define A184 A183 -#define A185 A184 -#define A186 A185 -#define A187 A186 -#define A188 A187 -#define A189 A188 -#define A190 A189 -#define A191 A190 -#define A192 A191 -#define A193 A192 -#define A194 A193 -#define A195 A194 -#define A196 A195 -#define A197 A196 -#define A198 A197 -#define A199 A198 -#define A200 A199 +// Note(2025-02-01): The tail call optimization was removed to support self-referencing macros. +// +// // Generally, the recursion limit for macros is 100 / 2 as defined by MACRO_RECURSION_MAX. +// // However, when the macro is in "tail position", there's no limit on nesting depth. +// // Testing with a chain of 200 macros to test this "tail call optimization". +// #define A1 1 +// #define A2 A1 +// #define A3 A2 +// #define A4 A3 +// #define A5 A4 +// #define A6 A5 +// #define A7 A6 +// #define A8 A7 +// #define A9 A8 +// #define A10 A9 +// #define A11 A10 +// #define A12 A11 +// #define A13 A12 +// #define A14 A13 +// #define A15 A14 +// #define A16 A15 +// #define A17 A16 +// #define A18 A17 +// #define A19 A18 +// #define A20 A19 +// #define A21 A20 +// #define A22 A21 +// #define A23 A22 +// #define A24 A23 +// #define A25 A24 +// #define A26 A25 +// #define A27 A26 +// #define A28 A27 +// #define A29 A28 +// #define A30 A29 +// #define A31 A30 +// #define A32 A31 +// #define A33 A32 +// #define A34 A33 +// #define A35 A34 +// #define A36 A35 +// #define A37 A36 +// #define A38 A37 +// #define A39 A38 +// #define A40 A39 +// #define A41 A40 +// #define A42 A41 +// #define A43 A42 +// #define A44 A43 +// #define A45 A44 +// #define A46 A45 +// #define A47 A46 +// #define A48 A47 +// #define A49 A48 +// #define A50 A49 +// #define A51 A50 +// #define A52 A51 +// #define A53 A52 +// #define A54 A53 +// #define A55 A54 +// #define A56 A55 +// #define A57 A56 +// #define A58 A57 +// #define A59 A58 +// #define A60 A59 +// #define A61 A60 +// #define A62 A61 +// #define A63 A62 +// #define A64 A63 +// #define A65 A64 +// #define A66 A65 +// #define A67 A66 +// #define A68 A67 +// #define A69 A68 +// #define A70 A69 +// #define A71 A70 +// #define A72 A71 +// #define A73 A72 +// #define A74 A73 +// #define A75 A74 +// #define A76 A75 +// #define A77 A76 +// #define A78 A77 +// #define A79 A78 +// #define A80 A79 +// #define A81 A80 +// #define A82 A81 +// #define A83 A82 +// #define A84 A83 +// #define A85 A84 +// #define A86 A85 +// #define A87 A86 +// #define A88 A87 +// #define A89 A88 +// #define A90 A89 +// #define A91 A90 +// #define A92 A91 +// #define A93 A92 +// #define A94 A93 +// #define A95 A94 +// #define A96 A95 +// #define A97 A96 +// #define A98 A97 +// #define A99 A98 +// #define A100 A99 +// #define A101 A100 +// #define A102 A101 +// #define A103 A102 +// #define A104 A103 +// #define A105 A104 +// #define A106 A105 +// #define A107 A106 +// #define A108 A107 +// #define A109 A108 +// #define A110 A109 +// #define A111 A110 +// #define A112 A111 +// #define A113 A112 +// #define A114 A113 +// #define A115 A114 +// #define A116 A115 +// #define A117 A116 +// #define A118 A117 +// #define A119 A118 +// #define A120 A119 +// #define A121 A120 +// #define A122 A121 +// #define A123 A122 +// #define A124 A123 +// #define A125 A124 +// #define A126 A125 +// #define A127 A126 +// #define A128 A127 +// #define A129 A128 +// #define A130 A129 +// #define A131 A130 +// #define A132 A131 +// #define A133 A132 +// #define A134 A133 +// #define A135 A134 +// #define A136 A135 +// #define A137 A136 +// #define A138 A137 +// #define A139 A138 +// #define A140 A139 +// #define A141 A140 +// #define A142 A141 +// #define A143 A142 +// #define A144 A143 +// #define A145 A144 +// #define A146 A145 +// #define A147 A146 +// #define A148 A147 +// #define A149 A148 +// #define A150 A149 +// #define A151 A150 +// #define A152 A151 +// #define A153 A152 +// #define A154 A153 +// #define A155 A154 +// #define A156 A155 +// #define A157 A156 +// #define A158 A157 +// #define A159 A158 +// #define A160 A159 +// #define A161 A160 +// #define A162 A161 +// #define A163 A162 +// #define A164 A163 +// #define A165 A164 +// #define A166 A165 +// #define A167 A166 +// #define A168 A167 +// #define A169 A168 +// #define A170 A169 +// #define A171 A170 +// #define A172 A171 +// #define A173 A172 +// #define A174 A173 +// #define A175 A174 +// #define A176 A175 +// #define A177 A176 +// #define A178 A177 +// #define A179 A178 +// #define A180 A179 +// #define A181 A180 +// #define A182 A181 +// #define A183 A182 +// #define A184 A183 +// #define A185 A184 +// #define A186 A185 +// #define A187 A186 +// #define A188 A187 +// #define A189 A188 +// #define A190 A189 +// #define A191 A190 +// #define A192 A191 +// #define A193 A192 +// #define A194 A193 +// #define A195 A194 +// #define A196 A195 +// #define A197 A196 +// #define A198 A197 +// #define A199 A198 +// #define A200 A199 // Testing with a chain of 50 non-tail macro expansions to test the recursion limit. #define B1 0 + 0 @@ -312,14 +314,12 @@ #define B101 B100 + 0 #define B102 B101 + 0 -// Testing with a chain of 200 macros to test this "tail call optimization". - void putdigit(int n) { putchar('0' + n); putchar('\n'); } void main() { - putdigit(A200); + // putdigit(A200); // Disabled, see note above putdigit(B50); } diff --git a/tests/_all/preprocessor/macro/recursion-limit.golden b/tests/_all/preprocessor/macro/recursion-limit.golden index b261da18..573541ac 100644 --- a/tests/_all/preprocessor/macro/recursion-limit.golden +++ b/tests/_all/preprocessor/macro/recursion-limit.golden @@ -1,2 +1 @@ -1 0 From da14bbd2e85b25e4748a0ffeff845ca771eb0833 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 2 Feb 2025 21:51:11 -0500 Subject: [PATCH 42/89] Show #include location when file is not found --- pnut.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pnut.c b/pnut.c index 0d6d8706..6e9dbb8b 100644 --- a/pnut.c +++ b/pnut.c @@ -838,8 +838,8 @@ FILE *fopen_source_file(char *file_name, char *relative_to) { } fp = fopen(fp_filepath, "r"); if (fp == 0) { - putstr("Could not open file: "); putstr(fp_filepath); putchar('\n'); - exit(1); + putstr(fp_filepath); putchar('\n'); + fatal_error("Could not open file"); } return fp; } From d4f87ce4e3be624d7d7ccb3d9de47d4594b7e226 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 2 Feb 2025 21:52:05 -0500 Subject: [PATCH 43/89] Expand macros that are token pasted together --- pnut.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pnut.c b/pnut.c index 6e9dbb8b..a52e11a7 100644 --- a/pnut.c +++ b/pnut.c @@ -1747,7 +1747,7 @@ int paste_integers(int left_val, int right_val) { } // Support token pasting between identifiers and non-negative integers -void paste_tokens(int left_tok, int left_val) { +bool paste_tokens(int left_tok, int left_val) { int right_tok; int right_val; expand_macro_arg = false; @@ -1759,7 +1759,7 @@ void paste_tokens(int left_tok, int left_val) { if (get_macro_arg(val) == 0) { tok = left_tok; val = left_val; - return; + return false; } else { begin_macro_expansion(0, get_macro_arg(val), 0); // Play the tokens of the macro argument get_tok_macro(); @@ -1767,12 +1767,12 @@ void paste_tokens(int left_tok, int left_val) { } right_tok = tok; right_val = val; - if (left_tok == IDENTIFIER || left_tok == MACRO || left_tok <= WHILE_KW) { + if (left_tok == IDENTIFIER || left_tok == TYPE || left_tok == MACRO || left_tok <= WHILE_KW) { // Something that starts with an identifier can only be an identifier begin_string(); accum_string_string(left_val); - if (right_tok == IDENTIFIER || right_tok == MACRO || right_tok <= WHILE_KW) { + if (right_tok == IDENTIFIER || right_tok == TYPE || right_tok == MACRO || right_tok <= WHILE_KW) { accum_string_string(right_val); } else if (right_tok == INTEGER) { accum_string_integer(-right_val); @@ -1803,6 +1803,13 @@ void paste_tokens(int left_tok, int left_val) { putstr("left_tok="); putint(left_tok); putstr(", right_tok="); putint(right_tok); putchar('\n'); syntax_error("cannot paste a non-identifier or non-integer"); } + + if (tok == MACRO) { + // If the result of the pasting is a macro, it is expanded + return attempt_macro_expansion(val); + } else { + return false; + } } void get_tok() { @@ -1848,8 +1855,11 @@ void get_tok() { } else { // macro_tok_lst is not empty because read_macro_tokens checked for trailing ## macro_tok_lst = cdr(macro_tok_lst); // Skip the ## - paste_tokens(tok, val); + if (paste_tokens(tok, val)) { + continue; + } else { break; + } } } else if (macro_tok_lst == 0 && paste_last_token) { // We finished expanding the left-hand side of ## if (macro_stack_ix == 0) { @@ -1860,7 +1870,11 @@ void get_tok() { } return_to_parent_macro(); paste_last_token = false; // We are done pasting - paste_tokens(tok, val); + if (paste_tokens(tok, val)) { + continue; + } else { + break; + } } if (tok == MACRO) { // Nested macro expansion! From 1997bf45d429a41b4bc38f334c317750e5443893 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 2 Feb 2025 22:05:37 -0500 Subject: [PATCH 44/89] Improve print_tok identation --- debug.c | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/debug.c b/debug.c index b23b5ac8..e0a037fe 100644 --- a/debug.c +++ b/debug.c @@ -34,10 +34,47 @@ void print_tok_string(int string_probe) { } } -int print_tok_indent = 0; +int print_tok_indent_level = 0; +int print_tok_preceding_nl_count = 0; +void print_tok_indent() { + int i; + for (i = 0; i < print_tok_indent_level; i += 1) putchar(' '); +} + void print_tok(int tok, int val) { int i; + // print_tok treats '{', '}' and '\n' specially: + // - '{' increases the indent level by 2 + // - '}' decreases the indent level by 2 + // - '\n' prints a newline and increments print_tok_preceding_nl_count + + // When print_tok_preceding_nl_count is not 0, print_tok_indent is called + // before printing the token This ensures that tokens are properly indented + // after a newline. + + if (tok == '\n') { + if (print_tok_preceding_nl_count >= 2) return; // Skip consecutive newlines + print_tok_preceding_nl_count += 1; + putchar('\n'); + return; + } else if (tok == '{') { + print_tok_indent(); + putchar(tok); + print_tok_indent_level += 2; + return; + } else if (tok == '}') { + print_tok_indent_level -= 2; + print_tok_indent(); + putchar(tok); + return; + } + + if (print_tok_preceding_nl_count != 0) { + print_tok_indent(); + print_tok_preceding_nl_count = 0; + } + if (tok == AUTO_KW) putstr("auto"); else if (tok == BREAK_KW) putstr("break"); else if (tok == CASE_KW) putstr("case"); @@ -115,15 +152,6 @@ void print_tok(int tok, int val) { putchar('"'); } else if (tok == MACRO_ARG) { putstr("ARG["); putint(val); putstr("]"); - } else if (tok == '{') { - putchar(tok); - print_tok_indent += 2; - } else if (tok == '}') { - print_tok_indent -= 2; - putchar(tok); - } else if (tok == '\n') { - putchar(tok); - for (i = 0; i < print_tok_indent; i++) putchar(' '); } else { putchar(tok); } From aea6f80580dabfc8475b88d7d060fd9554027949 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 2 Feb 2025 22:06:14 -0500 Subject: [PATCH 45/89] Add debugging function to see macro expansion ctx --- debug.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/debug.c b/debug.c index e0a037fe..92e84f6b 100644 --- a/debug.c +++ b/debug.c @@ -254,3 +254,49 @@ void show_ast(char* name, ast obj) { // members = get_child(members, 2); // } // } + +void print_tokens(int tokens) { + while (tokens != 0) { + print_tok(car(car(tokens)), cdr(car(tokens))); + tokens = cdr(tokens); + } +} + +void print_macro_args(int args) { + int arg; + if (args != 0) { + print_macro_args(cdr(args)); + print_tokens(car(args)); + if (cdr(args) != 0) putchar(','); + } +} + +void print_macro_ctx(int ix, int ident, int tokens, int args) { + int arg; + if (ident == 0) { + printf("# %-3d: ", ix); + } else { + printf("# %-3d: %s", ix, STRING_BUF(ident)); + } + if (args) { + putchar('('); + print_macro_args(args); + putchar(')'); + } + if (tokens != 0) { + printf(" -> "); + print_tokens(tokens); + } +} + +void print_macro_stack() { + int i = 0; + putstr("\n######### macro_stack: #########\n"); + while (3 * i < macro_stack_ix) { + print_macro_ctx(i, macro_stack[i * 3 + 2], macro_stack[i * 3], macro_stack[i * 3 + 1]); + putchar('\n'); + i += 1; + } + print_macro_ctx(i, macro_ident, macro_tok_lst, macro_args); + putstr("\n################################\n"); +} From d71a0665baf1d2e69fdc18d822be5ac602fcf670 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 3 Feb 2025 12:28:49 -0500 Subject: [PATCH 46/89] Add test where macro recursion depth is exceeded --- .../macro/recursion-limit-error.c | 216 ++++++++++++++++++ .../macro/recursion-limit-error.golden | 1 + 2 files changed, 217 insertions(+) create mode 100644 tests/_all/preprocessor/macro/recursion-limit-error.c create mode 100644 tests/_all/preprocessor/macro/recursion-limit-error.golden diff --git a/tests/_all/preprocessor/macro/recursion-limit-error.c b/tests/_all/preprocessor/macro/recursion-limit-error.c new file mode 100644 index 00000000..63a84432 --- /dev/null +++ b/tests/_all/preprocessor/macro/recursion-limit-error.c @@ -0,0 +1,216 @@ +// expect_comp_failure + +// tests for recursion depth of macros + +#include + +// The recursion limit for macros is 100 as defined by MACRO_RECURSION_MAX. +#define A1 1 +#define A2 A1 +#define A3 A2 +#define A4 A3 +#define A5 A4 +#define A6 A5 +#define A7 A6 +#define A8 A7 +#define A9 A8 +#define A10 A9 +#define A11 A10 +#define A12 A11 +#define A13 A12 +#define A14 A13 +#define A15 A14 +#define A16 A15 +#define A17 A16 +#define A18 A17 +#define A19 A18 +#define A20 A19 +#define A21 A20 +#define A22 A21 +#define A23 A22 +#define A24 A23 +#define A25 A24 +#define A26 A25 +#define A27 A26 +#define A28 A27 +#define A29 A28 +#define A30 A29 +#define A31 A30 +#define A32 A31 +#define A33 A32 +#define A34 A33 +#define A35 A34 +#define A36 A35 +#define A37 A36 +#define A38 A37 +#define A39 A38 +#define A40 A39 +#define A41 A40 +#define A42 A41 +#define A43 A42 +#define A44 A43 +#define A45 A44 +#define A46 A45 +#define A47 A46 +#define A48 A47 +#define A49 A48 +#define A50 A49 +#define A51 A50 +#define A52 A51 +#define A53 A52 +#define A54 A53 +#define A55 A54 +#define A56 A55 +#define A57 A56 +#define A58 A57 +#define A59 A58 +#define A60 A59 +#define A61 A60 +#define A62 A61 +#define A63 A62 +#define A64 A63 +#define A65 A64 +#define A66 A65 +#define A67 A66 +#define A68 A67 +#define A69 A68 +#define A70 A69 +#define A71 A70 +#define A72 A71 +#define A73 A72 +#define A74 A73 +#define A75 A74 +#define A76 A75 +#define A77 A76 +#define A78 A77 +#define A79 A78 +#define A80 A79 +#define A81 A80 +#define A82 A81 +#define A83 A82 +#define A84 A83 +#define A85 A84 +#define A86 A85 +#define A87 A86 +#define A88 A87 +#define A89 A88 +#define A90 A89 +#define A91 A90 +#define A92 A91 +#define A93 A92 +#define A94 A93 +#define A95 A94 +#define A96 A95 +#define A97 A96 +#define A98 A97 +#define A99 A98 +#define A100 A99 +#define A101 A100 +#define A102 A101 +#define A103 A102 +#define A104 A103 +#define A105 A104 +#define A106 A105 +#define A107 A106 +#define A108 A107 +#define A109 A108 +#define A110 A109 +#define A111 A110 +#define A112 A111 +#define A113 A112 +#define A114 A113 +#define A115 A114 +#define A116 A115 +#define A117 A116 +#define A118 A117 +#define A119 A118 +#define A120 A119 +#define A121 A120 +#define A122 A121 +#define A123 A122 +#define A124 A123 +#define A125 A124 +#define A126 A125 +#define A127 A126 +#define A128 A127 +#define A129 A128 +#define A130 A129 +#define A131 A130 +#define A132 A131 +#define A133 A132 +#define A134 A133 +#define A135 A134 +#define A136 A135 +#define A137 A136 +#define A138 A137 +#define A139 A138 +#define A140 A139 +#define A141 A140 +#define A142 A141 +#define A143 A142 +#define A144 A143 +#define A145 A144 +#define A146 A145 +#define A147 A146 +#define A148 A147 +#define A149 A148 +#define A150 A149 +#define A151 A150 +#define A152 A151 +#define A153 A152 +#define A154 A153 +#define A155 A154 +#define A156 A155 +#define A157 A156 +#define A158 A157 +#define A159 A158 +#define A160 A159 +#define A161 A160 +#define A162 A161 +#define A163 A162 +#define A164 A163 +#define A165 A164 +#define A166 A165 +#define A167 A166 +#define A168 A167 +#define A169 A168 +#define A170 A169 +#define A171 A170 +#define A172 A171 +#define A173 A172 +#define A174 A173 +#define A175 A174 +#define A176 A175 +#define A177 A176 +#define A178 A177 +#define A179 A178 +#define A180 A179 +#define A181 A180 +#define A182 A181 +#define A183 A182 +#define A184 A183 +#define A185 A184 +#define A186 A185 +#define A187 A186 +#define A188 A187 +#define A189 A188 +#define A190 A189 +#define A191 A190 +#define A192 A191 +#define A193 A192 +#define A194 A193 +#define A195 A194 +#define A196 A195 +#define A197 A196 +#define A198 A197 +#define A199 A198 +#define A200 A199 + +void putdigit(int n) { + putchar('0' + n); + putchar('\n'); +} + +void main() { + putdigit(A200); // Disabled, see note above +} diff --git a/tests/_all/preprocessor/macro/recursion-limit-error.golden b/tests/_all/preprocessor/macro/recursion-limit-error.golden new file mode 100644 index 00000000..2466d804 --- /dev/null +++ b/tests/_all/preprocessor/macro/recursion-limit-error.golden @@ -0,0 +1 @@ +tests/_all/preprocessor/macro/recursion-limit-error.c:215:11 Macro recursion depth exceeded. From 498ea0eb30b52bdb152a243557170b28d534b7e0 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 4 Feb 2025 12:20:13 -0500 Subject: [PATCH 47/89] Add annotation for tests that are broken Adding `// expect_failure` in a test source file will allow the golden file to not match. --- run-tests.sh | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/run-tests.sh b/run-tests.sh index 7b22bde4..1852e64e 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -146,13 +146,23 @@ test_expect_failure_for_shells() { echo `sed -n -e "/\/\/ expect_failure_for: /p" "$1" | sed -e "s/^\/\/ expect_failure_for: //"` } +# Pnut has a bug that causes it to miscompile when a certain feature is used. +# // expect_failure +test_expect_failure() { + if grep -q "// expect_failure" "$1"; then + return 0 + else + return 1 + fi +} + # Some tests are expected to fail with a compilation error # // expect_comp_failure test_expect_comp_failure() { if grep -q "// expect_comp_failure" "$1"; then - return 1 - else return 0 + else + return 1 fi } @@ -163,15 +173,22 @@ test_timeout() { echo `sed -n -e "/\/\/ timeout: /p" "$1" | sed -e "s/^\/\/ timeout: //"` } -test_expect_failure_for_shell() { # file: $1 +test_failure_is_expected() { # file: $1 + if test_expect_failure "$1"; then + reason="Expected to fail" + return 0 + fi + failing_shells=$(test_expect_failure_for_shells "$1") for failing_shell in $failing_shells; do failing_shell_name=$(echo "$failing_shell" | sed 's/-.*//') failing_shell_version=$(echo "$failing_shell" | sed 's/.*-//') if [ "$failing_shell_name" = "$shell" ]; then # First match on the shell name, then on the version if any if [ -z "$failing_shell_version" ] || [ "$failing_shell_version" = "$failing_shell" ]; then + reason="Expected to fail on $failing_shell_name" return 0 # No version specified, match! elif shell_version "$shell" | grep -q -E "$failing_shell_version"; then + reason="Expected to fail on $failing_shell_name" return 0 # version matched! else return 1 # version didn't match! @@ -214,7 +231,7 @@ run_test() { # file_to_test: $1 failed_pnut_comp=0 # Flag to indicate if compilation failed expect_failed_comp=0 # Flag to indicate if compilation is expected to fail - test_expect_comp_failure "$file" || expect_failed_comp=1 + if test_expect_comp_failure "$file"; then expect_failed_comp=1; fi # Print file name before generating golden file so we know it's getting processed printf "$file: " @@ -269,8 +286,8 @@ run_test() { # file_to_test: $1 if [ $? -eq 0 ]; then # If the output matches the golden file echo "✅ Test passed" return 0 - elif test_expect_failure_for_shell "$file"; then - echo "⚠️ Test disabled for $shell" + elif test_failure_is_expected "$file"; then + echo "⚠️ Test disabled ($reason)" return 0 else echo "❌ Test failed" @@ -278,8 +295,8 @@ run_test() { # file_to_test: $1 echo "$diff_out" return 1 fi - elif test_expect_failure_for_shell "$file"; then - echo "⚠️ Test disabled for $shell" + elif test_failure_is_expected "$file"; then + echo "⚠️ Test disabled ($reason)" return 0 else echo "❌ Failed to run: $(cat "$dir/$filename.err")" From f042f772aa0d43b54426225dd94d5272c467068c Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 4 Feb 2025 12:22:12 -0500 Subject: [PATCH 48/89] Add tests --- .../_all/preprocessor/macro/self-reference.c | 45 +++++++++++++ .../preprocessor/macro/self-reference.golden | 9 +++ tests/_all/tcc-expansion.c | 67 +++++++++++++++++++ tests/_all/tcc-expansion.golden | 3 + .../self-reference-macro-with-macro-args.c | 31 +++++++++ ...elf-reference-macro-with-macro-args.golden | 2 + 6 files changed, 157 insertions(+) create mode 100644 tests/_all/preprocessor/macro/self-reference.c create mode 100644 tests/_all/preprocessor/macro/self-reference.golden create mode 100644 tests/_all/tcc-expansion.c create mode 100644 tests/_all/tcc-expansion.golden create mode 100644 tests/_bug/self-reference-macro-with-macro-args.c create mode 100644 tests/_bug/self-reference-macro-with-macro-args.golden diff --git a/tests/_all/preprocessor/macro/self-reference.c b/tests/_all/preprocessor/macro/self-reference.c new file mode 100644 index 00000000..04859f9e --- /dev/null +++ b/tests/_all/preprocessor/macro/self-reference.c @@ -0,0 +1,45 @@ +// tests for recursion depth of macros + +// putchar +#include + +int test1 = 1; +int test2 = 2; +int test3 = 3; +int x = 4; +int y = 5; + +void putdigit(int n) { + putchar('0' + n); + putchar('\n'); +} + +void A(int a, int b) { + putdigit(a); + putdigit(b); +} + +void B(int a, int b) { + putdigit(a); + putdigit(b); +} + +#define test1 test1 +#define test2 test3 +#define test3 test2 +#define x 1 + y +#define y 1 + x +#define A(a1, a2) A(a1, a2) +#define B(a1, a2) C(a1, a2) +#define C(a1, a2) B(a1, a2) + +void main() { + putdigit(test1); + putdigit(test2); + putdigit(test3); + putdigit(x); + putdigit(y); + A(test1, test2); + A(test3, x); + // B(x, y); +} diff --git a/tests/_all/preprocessor/macro/self-reference.golden b/tests/_all/preprocessor/macro/self-reference.golden new file mode 100644 index 00000000..05578747 --- /dev/null +++ b/tests/_all/preprocessor/macro/self-reference.golden @@ -0,0 +1,9 @@ +1 +2 +3 +6 +7 +1 +2 +3 +6 diff --git a/tests/_all/tcc-expansion.c b/tests/_all/tcc-expansion.c new file mode 100644 index 00000000..b42fa383 --- /dev/null +++ b/tests/_all/tcc-expansion.c @@ -0,0 +1,67 @@ +#include + +#define DEF(id, str) str "\0" +#define DEF_ASM(x) DEF(TOK_ASM_ ## x, #x) + +#define DEF_ASMTEST(x,suffix) \ + DEF_ASM(x ## o ## suffix) \ + DEF_ASM(x ## no ## suffix) \ + DEF_ASM(x ## b ## suffix) \ + DEF_ASM(x ## c ## suffix) \ + DEF_ASM(x ## nae ## suffix) \ + DEF_ASM(x ## nb ## suffix) \ + DEF_ASM(x ## nc ## suffix) \ + DEF_ASM(x ## ae ## suffix) \ + DEF_ASM(x ## e ## suffix) \ + DEF_ASM(x ## z ## suffix) \ + DEF_ASM(x ## ne ## suffix) \ + DEF_ASM(x ## nz ## suffix) \ + DEF_ASM(x ## be ## suffix) \ + DEF_ASM(x ## na ## suffix) \ + DEF_ASM(x ## nbe ## suffix) \ + DEF_ASM(x ## a ## suffix) \ + DEF_ASM(x ## s ## suffix) \ + DEF_ASM(x ## ns ## suffix) \ + DEF_ASM(x ## p ## suffix) \ + DEF_ASM(x ## pe ## suffix) \ + DEF_ASM(x ## np ## suffix) \ + DEF_ASM(x ## po ## suffix) \ + DEF_ASM(x ## l ## suffix) \ + DEF_ASM(x ## nge ## suffix) \ + DEF_ASM(x ## nl ## suffix) \ + DEF_ASM(x ## ge ## suffix) \ + DEF_ASM(x ## le ## suffix) \ + DEF_ASM(x ## ng ## suffix) \ + DEF_ASM(x ## nle ## suffix) \ + DEF_ASM(x ## g ## suffix) + +const char tcc_keywords[] = + DEF_ASMTEST(j,) + "\0" + ; + +#define str str2 +#define T(X, Y) X ## Y + +void putstr(char* str) { + while (*str != '\0') { + putchar(*str); + str += 1; + } +} + +void main() { + char* str2 = "abcdef"; + char* kw = tcc_keywords; + while (*kw != '\0' || *(kw + 1) != '\0') { + if (*kw != 0) { + putchar(*kw); + } else { + putchar(' '); + } + kw += 1; + } + putchar('\n'); + putstr(T(str, 2)); putchar('\n'); + putstr(T(str2, )); putchar('\n'); +} diff --git a/tests/_all/tcc-expansion.golden b/tests/_all/tcc-expansion.golden new file mode 100644 index 00000000..b97235fd --- /dev/null +++ b/tests/_all/tcc-expansion.golden @@ -0,0 +1,3 @@ +jo jno jb jc jnae jnb jnc jae je jz jne jnz jbe jna jnbe ja js jns jp jpe jnp jpo jl jnge jnl jge jle jng jnle jg +abcdef +abcdef diff --git a/tests/_bug/self-reference-macro-with-macro-args.c b/tests/_bug/self-reference-macro-with-macro-args.c new file mode 100644 index 00000000..ccdf70eb --- /dev/null +++ b/tests/_bug/self-reference-macro-with-macro-args.c @@ -0,0 +1,31 @@ +// When a macro argument is expanded, tokens that are themselves macros are +// expanded. This is problematic when the macro argument contains a +// self-referencial macro, since each time the macro is passed as argument, it +// is expanded for one step, even when the macro should not be expanded because +// it was expanded previously. + +// expect_failure + +#include + +int x = 4; +int y = 5; + +void putdigit(int n) { + putchar('0' + n); + putchar('\n'); +} + +void B(int a) { + putdigit(a); +} + +#define x 1 + y +#define y 1 + x +#define A(a1) B(a1) +#define B(a1) A(a1) + +void main() { + B(x); + B(y); +} diff --git a/tests/_bug/self-reference-macro-with-macro-args.golden b/tests/_bug/self-reference-macro-with-macro-args.golden new file mode 100644 index 00000000..baf19666 --- /dev/null +++ b/tests/_bug/self-reference-macro-with-macro-args.golden @@ -0,0 +1,2 @@ +6 +7 From 3ba0a5c9dfcf264dd01c6dddd50052a11c85c436 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 4 Feb 2025 12:45:26 -0500 Subject: [PATCH 49/89] Revert part of the changes to token pasting Upon reviewing the changes to the token pasting code, I realized that pasted tokens already pass through the "is this a macro" check, and so the changes in d4f87ce4e3be624d7d7ccb3d9de47d4594b7e226 were unnecessary. --- pnut.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/pnut.c b/pnut.c index a52e11a7..6a662aa9 100644 --- a/pnut.c +++ b/pnut.c @@ -1747,7 +1747,7 @@ int paste_integers(int left_val, int right_val) { } // Support token pasting between identifiers and non-negative integers -bool paste_tokens(int left_tok, int left_val) { +void paste_tokens(int left_tok, int left_val) { int right_tok; int right_val; expand_macro_arg = false; @@ -1759,7 +1759,7 @@ bool paste_tokens(int left_tok, int left_val) { if (get_macro_arg(val) == 0) { tok = left_tok; val = left_val; - return false; + return; } else { begin_macro_expansion(0, get_macro_arg(val), 0); // Play the tokens of the macro argument get_tok_macro(); @@ -1803,13 +1803,6 @@ bool paste_tokens(int left_tok, int left_val) { putstr("left_tok="); putint(left_tok); putstr(", right_tok="); putint(right_tok); putchar('\n'); syntax_error("cannot paste a non-identifier or non-integer"); } - - if (tok == MACRO) { - // If the result of the pasting is a macro, it is expanded - return attempt_macro_expansion(val); - } else { - return false; - } } void get_tok() { @@ -1855,11 +1848,7 @@ void get_tok() { } else { // macro_tok_lst is not empty because read_macro_tokens checked for trailing ## macro_tok_lst = cdr(macro_tok_lst); // Skip the ## - if (paste_tokens(tok, val)) { - continue; - } else { - break; - } + paste_tokens(tok, val); } } else if (macro_tok_lst == 0 && paste_last_token) { // We finished expanding the left-hand side of ## if (macro_stack_ix == 0) { @@ -1870,11 +1859,7 @@ void get_tok() { } return_to_parent_macro(); paste_last_token = false; // We are done pasting - if (paste_tokens(tok, val)) { - continue; - } else { - break; - } + paste_tokens(tok, val); } if (tok == MACRO) { // Nested macro expansion! From 6f5bf18896a4e56e9431f093c2417629494649be Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 9 Feb 2025 22:19:44 -0500 Subject: [PATCH 50/89] parse_fun_def: Fix check that all params are named --- pnut.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pnut.c b/pnut.c index 6a662aa9..25c51022 100644 --- a/pnut.c +++ b/pnut.c @@ -2789,10 +2789,11 @@ void add_typedef(ast declarator) { } ast parse_fun_def(ast declarator) { - ast params = get_child_(DECL, declarator, 1); + ast fun_type = get_child__(DECL, '(', declarator, 1); + ast params = get_child_('(', fun_type, 1); // Check that the parameters are all named since declarator may be abstract - while (get_op(params) == ',') { + while (params != 0) { if (get_child_(DECL, get_child__(',', DECL, params, 0), 0) == 0) { parse_error("Parameter name expected", tok); } From 3515cf570a256f9d75ad6a1325020e390d563dbd Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 12:46:21 -0500 Subject: [PATCH 51/89] Add LIST node type to replace ',' nodes ',' nodes were used to make lists, but that made it confusing since ',' is also used for the comma operator whose nodes had a slightly different shape (the last ',' always contained 2 non-null children). --- exe.c | 85 +++++++++++++++++++++++++++++----------------------------- pnut.c | 48 +++++++++++++++++---------------- sh.c | 85 ++++++++++++++++++++++++++++++---------------------------- 3 files changed, 112 insertions(+), 106 deletions(-) diff --git a/exe.c b/exe.c index 1bb3b742..a85aabf9 100644 --- a/exe.c +++ b/exe.c @@ -537,11 +537,11 @@ ast canonicalize_type(ast type) { ast res = type; int binding; - if (get_op(type) == STRUCT_KW && get_child_opt_(STRUCT_KW, ',', type, 2) == 0) { // struct with empty def => reference + if (get_op(type) == STRUCT_KW && get_child_opt_(STRUCT_KW, LIST, type, 2) == 0) { // struct with empty def => reference binding = cgc_lookup_struct(get_val_(IDENTIFIER, get_child__(STRUCT_KW, IDENTIFIER, type, 1)), cgc_globals); - } else if (get_op(type) == UNION_KW && get_child_opt_(UNION_KW, ',', type, 2) == 0) { // union with empty def => reference + } else if (get_op(type) == UNION_KW && get_child_opt_(UNION_KW, LIST, type, 2) == 0) { // union with empty def => reference binding = cgc_lookup_union(get_val_(IDENTIFIER, get_child__(UNION_KW, IDENTIFIER, type, 1)), cgc_globals); - } else if (get_op(type) == ENUM_KW && get_child_opt_(ENUM_KW, ',', type, 2) == 0) { // enum with empty def => reference + } else if (get_op(type) == ENUM_KW && get_child_opt_(ENUM_KW, LIST, type, 2) == 0) { // enum with empty def => reference binding = cgc_lookup_enum(get_val_(IDENTIFIER, get_child__(ENUM_KW, IDENTIFIER, type, 1)), cgc_globals); } else { return res; @@ -566,9 +566,9 @@ int struct_union_size(ast type) { type = canonicalize_type(type); members = get_child(type, 2); - while (get_op(members) == ',') { - member_type = get_child_(DECL, get_child__(',', DECL, members, 0), 1); - members = get_child_opt_(',', ',', members, 1); + while (members != 0) { + member_type = get_child_(DECL, get_child__(LIST, DECL, members, 0), 1); + members = get_child_opt_(LIST, LIST, members, 1); member_size = type_width(member_type, true, true); sum_size += member_size; // Struct size is the sum of its members if (member_size > max_size) max_size = member_size; // Union size is the max of its members @@ -586,7 +586,7 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { ast decl, ident; while (members != 0) { - decl = get_child_opt_(',', DECL, members, 0); + decl = get_child_opt_(LIST, DECL, members, 0); ident = get_child_opt_(DECL, IDENTIFIER, decl, 0); if (ident == 0) { // Anonymous struct member, search that struct sub_offset = struct_member_offset_go(get_child_(DECL, decl, 1), member_ident); @@ -601,7 +601,7 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { // final offset is not 0. offset += round_up_to_word_size(type_width(get_child_(DECL, decl, 1), true, true)); } - members = get_child_opt_(',', ',', members, 1); + members = get_child_opt_(LIST, LIST, members, 1); } return -1; @@ -619,7 +619,7 @@ ast struct_member_go(ast struct_type, ast member_ident) { ast decl, ident; while (members != 0) { - decl = get_child_opt_(',', DECL, members, 0); + decl = get_child_opt_(LIST, DECL, members, 0); ident = get_child_opt_(DECL, IDENTIFIER, decl, 0); if (ident == 0) { // Anonymous struct member, search that struct ident = struct_member_go(get_child_(DECL, decl, 1), member_ident); @@ -627,7 +627,7 @@ ast struct_member_go(ast struct_type, ast member_ident) { } else if (get_val_(IDENTIFIER, member_ident) == get_val_(IDENTIFIER, ident)) { return decl; } - members = get_child_opt_(',', ',', members, 1); + members = get_child_opt_(LIST, LIST, members, 1); } return -1; @@ -1476,7 +1476,8 @@ void handle_enum_struct_union_type_decl(ast type); void codegen_enum(ast node) { ast name = get_child_opt_(ENUM_KW, IDENTIFIER, node, 1); - ast cases = get_child_opt_(ENUM_KW, ',', node, 2); + ast cases = get_child_opt_(ENUM_KW, LIST, node, 2); + ast cas; int binding; if (name != 0 && cases != 0) { // if enum has a name and members (not a reference to an existing type) @@ -1485,9 +1486,10 @@ void codegen_enum(ast node) { cgc_add_typedef(get_val_(IDENTIFIER, name), BINDING_TYPE_ENUM, node); } - while (get_op(cases) == ',') { - cgc_add_enum(get_val_(IDENTIFIER, get_child__(',', IDENTIFIER, cases, 0)), get_child__(',', INTEGER, cases, 1)); - cases = get_child_opt_(',', ',', cases, 2); + while (cases != 0) { + cas = get_child_(LIST, cases, 0); + cgc_add_enum(get_val_(IDENTIFIER, get_child__('=', IDENTIFIER, cas, 0)), get_child__('=', INTEGER, cas, 1)); + cases = get_child_opt_(LIST, LIST, cases, 1); } } @@ -1509,8 +1511,8 @@ void codegen_struct_or_union(ast node, enum BINDING kind) { // This is not the right semantic because inner declarations are scoped to // this declaration, but it's probably good enough for TCC. while (members != 0) { - handle_enum_struct_union_type_decl(get_child_(DECL, get_child__(',', DECL, members, 0), 1)); - members = get_child_opt_(',', ',', members, 1); + handle_enum_struct_union_type_decl(get_child_(DECL, get_child__(LIST, DECL, members, 0), 1)); + members = get_child_opt_(LIST, LIST, members, 1); } } @@ -1580,9 +1582,9 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse inner_type_width = type_width(get_child_('[', type, 0), true, false); while (init != 0 && arr_len != 0) { - codegen_initializer(local, get_child_(',', init, 0), inner_type, base_reg, offset, true); + codegen_initializer(local, get_child_(LIST, init, 0), inner_type, base_reg, offset, true); offset += inner_type_width; - init = get_child_opt_(',', ',', init, 1); + init = get_child_opt_(LIST, LIST, init, 1); arr_len -= 1; // decrement the number of elements left to initialize to make sure we don't overflow } @@ -1599,38 +1601,38 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse case STRUCT_KW: members = get_child_(STRUCT_KW, type, 2); while (init != 0 && members != 0) { - inner_type = get_child_(DECL, get_child__(',', DECL, members, 0), 1); - codegen_initializer(local, get_child_(',', init, 0), inner_type, base_reg, offset, false); + inner_type = get_child_(DECL, get_child__(LIST, DECL, members, 0), 1); + codegen_initializer(local, get_child_(LIST, init, 0), inner_type, base_reg, offset, false); offset += type_width(inner_type, true, true); - init = get_child_opt_(',', ',', init, 1); - members = get_child_opt_(',', ',', members, 1); + init = get_child_opt_(LIST, LIST, init, 1); + members = get_child_opt_(LIST, LIST, members, 1); } // Initialize rest of the members to 0 while (local && members != 0) { - inner_type = get_child_(DECL, get_child__(',', DECL, members, 0), 1); + inner_type = get_child_(DECL, get_child__(LIST, DECL, members, 0), 1); initialize_memory(0, base_reg, offset, type_width(inner_type, true, true)); offset += type_width(inner_type, true, true); - members = get_child_opt_(',', ',', members, 1); + members = get_child_opt_(LIST, LIST, members, 1); } break; case UNION_KW: members = get_child_(STRUCT_KW, type, 2); - if (get_child_opt_(',', ',', init, 1) != 0) { + if (get_child_opt_(LIST, LIST, init, 1) != 0) { fatal_error("codegen_initializer: union initializer list has more than one element"); } else if (members == 0) { fatal_error("codegen_initializer: union has no members"); } - codegen_initializer(local, get_child_(',', init, 0), get_child_(DECL, get_child__(',', DECL, members, 0), 1), base_reg, offset, false); + codegen_initializer(local, get_child_(LIST, init, 0), get_child_(DECL, get_child__(LIST, DECL, members, 0), 1), base_reg, offset, false); break; default: - if (get_child_opt_(',', ',', init, 1) != 0 // More than 1 element - || get_op(get_child_(',', init, 0)) == INITIALIZER_LIST) { // Or nested initializer list + if (get_child_opt_(LIST, LIST, init, 1) != 0 // More than 1 element + || get_op(get_child_(LIST, init, 0)) == INITIALIZER_LIST) { // Or nested initializer list fatal_error("codegen_initializer: scalar initializer list has more than one element"); } - codegen_rvalue(get_child_(',', init, 0)); + codegen_rvalue(get_child_(LIST, init, 0)); pop_reg(reg_X); grow_fs(-1); write_mem_location(base_reg, offset, reg_X, type_width(type, true, !in_array)); @@ -1669,7 +1671,7 @@ int initializer_size(ast initializer) { initializer = get_child_(INITIALIZER_LIST, initializer, 0); while (initializer != 0) { size += 1; - initializer = get_child_opt_(',', ',', initializer, 1); + initializer = get_child_opt_(LIST, LIST, initializer, 1); } return size; @@ -1754,10 +1756,10 @@ void codegen_body(ast node) { while (node != 0) { stmt = get_child_('{', node, 0); if (get_op(stmt) == DECLS) { // Variable declaration - declarations = get_child__(DECLS, ',', stmt, 0); + declarations = get_child__(DECLS, LIST, stmt, 0); while (declarations != 0) { // Multiple variable declarations - codegen_local_var_decl(get_child__(',', DECL, declarations, 0)); - declarations = get_child_opt_(',', ',', declarations, 1); + codegen_local_var_decl(get_child__(LIST, DECL, declarations, 0)); + declarations = get_child_opt_(LIST, LIST, declarations, 1); } } else { codegen_statement(stmt); @@ -2000,14 +2002,14 @@ void add_params(ast params) { int ident; while (params != 0) { - decl = get_child__(',', DECL, params, 0); + decl = get_child__(LIST, DECL, params, 0); ident = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); type = get_child_(DECL, decl, 1); if (cgc_lookup_var(ident, cgc_locals) != 0) fatal_error("add_params: duplicate parameter"); cgc_add_local_param(ident, type_width(type, false, true) / word_size, type); - params = get_child_opt_(',', ',', params, 1); + params = get_child_opt_(LIST, LIST, params, 1); } } @@ -2016,7 +2018,7 @@ void codegen_glo_fun_decl(ast node) { ast body = get_child_opt_(FUN_DECL, '{', node, 1); ast name_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); ast fun_type = get_child__(DECL, '(', decl, 1); - ast params = get_child_opt_('(', ',', fun_type, 1); + ast params = get_child_opt_('(', LIST, fun_type, 1); ast fun_return_type = get_child_('(', fun_type, 0); int binding; int save_locals_fun = cgc_locals_fun; @@ -2052,7 +2054,6 @@ void codegen_glo_fun_decl(ast node) { ret(); - cgc_locals_fun = save_locals_fun; } @@ -2060,8 +2061,8 @@ void codegen_glo_fun_decl(ast node) { // The only thing we need to do is to call handle_enum_struct_union_type_decl // on the type specifier, which is the same for all declarations. void handle_typedef(ast node) { - ast decls = get_child__(TYPEDEF_KW, ',', node, 0); - ast decl = get_child__(',', DECL, decls, 0); + ast decls = get_child__(TYPEDEF_KW, LIST, node, 0); + ast decl = get_child__(LIST, DECL, decls, 0); ast type = get_child_(DECL, decl, 1); handle_enum_struct_union_type_decl(get_type_specifier(type)); @@ -2072,10 +2073,10 @@ void codegen_glo_decl(ast node) { int op = get_op(node); if (op == DECLS) { - decls = get_child__(DECLS, ',', node, 0); // Declaration list + decls = get_child__(DECLS, LIST, node, 0); // Declaration list while (decls != 0) { // Multiple variable declarations - codegen_glo_var_decl(get_child__(',', DECL, decls, 0)); - decls = get_child_opt_(',', ',', decls, 1); // Next variable declaration + codegen_glo_var_decl(get_child__(LIST, DECL, decls, 0)); + decls = get_child_opt_(LIST, LIST, decls, 1); // Next variable declaration } } else if (op == FUN_DECL) { codegen_glo_fun_decl(node); diff --git a/pnut.c b/pnut.c index 25c51022..02f9f0f8 100644 --- a/pnut.c +++ b/pnut.c @@ -202,6 +202,8 @@ enum { IDENTIFIER = 500, TYPE = 501, MACRO = 502, + + LIST = 600, // List object }; void putstr(char *str) { @@ -2379,11 +2381,11 @@ ast parse_enum() { } if (result == 0) { - result = new_ast3(',', ident, value, 0); + result = new_ast2(LIST, new_ast2('=', ident, value), 0); tail = result; } else { - set_child(tail, 2, new_ast3(',', ident, value, 0)); - tail = get_child_(',', tail, 2); + set_child(tail, 1, new_ast2(LIST, new_ast2('=', ident, value), 0)); + tail = get_child_(LIST, tail, 1); } if (tok == ',') { @@ -2436,19 +2438,19 @@ ast parse_struct_or_union(int struct_or_union_tok) { decl = new_ast3(DECL, 0, type_specifier, 0); if (result == 0) { - tail = result = new_ast2(',', decl, 0); + tail = result = new_ast2(LIST, decl, 0); } else { - set_child(tail, 1, new_ast2(',', decl, 0)); - tail = get_child_(',', tail, 1); + set_child(tail, 1, new_ast2(LIST, decl, 0)); + tail = get_child_(LIST, tail, 1); } } else { while (1) { decl = parse_declarator(false, type_specifier); if (result == 0) { - tail = result = new_ast2(',', decl, 0); + tail = result = new_ast2(LIST, decl, 0); } else { - set_child(tail, 1, new_ast2(',', decl, 0)); - tail = get_child_(',', tail, 1); + set_child(tail, 1, new_ast2(LIST, decl, 0)); + tail = get_child_(LIST, tail, 1); } if (get_child_(DECL, decl, 1) == VOID_KW) parse_error("member with void type not allowed in struct/union", tok); @@ -2633,10 +2635,10 @@ int parse_param_list() { if (tok == ',') get_tok(); if (result == 0) { - tail = result = new_ast2(',', decl, 0); + tail = result = new_ast2(LIST, decl, 0); } else { - set_child(tail, 1, new_ast2(',', decl, 0)); - tail = get_child_(',', tail, 1); + set_child(tail, 1, new_ast2(LIST, decl, 0)); + tail = get_child_(LIST, tail, 1); } } @@ -2731,10 +2733,10 @@ ast parse_initializer_list() { if (tok == '{') fatal_error("nested initializer lists not supported"); #endif if (result == 0) { - tail = result = new_ast2(',', parse_initializer(), 0); + tail = result = new_ast2(LIST, parse_initializer(), 0); } else { - set_child(tail, 1, new_ast2(',', parse_initializer(), 0)); - tail = get_child_(',', tail, 1); + set_child(tail, 1, new_ast2(LIST, parse_initializer(), 0)); + tail = get_child_(LIST, tail, 1); } if (tok == ',') get_tok(); else break; @@ -2794,10 +2796,10 @@ ast parse_fun_def(ast declarator) { // Check that the parameters are all named since declarator may be abstract while (params != 0) { - if (get_child_(DECL, get_child__(',', DECL, params, 0), 0) == 0) { + if (get_child_(DECL, get_child__(LIST, DECL, params, 0), 0) == 0) { parse_error("Parameter name expected", tok); } - params = get_child_(',', params, 1); + params = get_child_(LIST, params, 1); } if (get_child_(DECL, declarator, 2) != 0) parse_error("Initializer not allowed in function definition", tok); return new_ast2(FUN_DECL, declarator, parse_compound_statement()); @@ -2831,15 +2833,15 @@ ast parse_declaration(bool local) { return parse_fun_def(declarator); } - declarators = new_ast2(',', declarator, 0); // Wrap the declarators in a list + declarators = new_ast2(LIST, declarator, 0); // Wrap the declarators in a list tail = declarators; // Otherwise, this is a variable or declaration while (tok != ';') { if (tok == ',') { get_tok(); - set_child(tail, 1, new_ast2(',', parse_declarator_and_initializer(type_specifier), 0)); - tail = get_child__(',', ',', tail, 1); + set_child(tail, 1, new_ast2(LIST, parse_declarator_and_initializer(type_specifier), 0)); + tail = get_child__(LIST, LIST, tail, 1); } else { parse_error("';' or ',' expected", tok); } @@ -2850,9 +2852,9 @@ ast parse_declaration(bool local) { // type table. if (get_child(type_specifier, 0) & MK_TYPE_SPECIFIER(TYPEDEF_KW)) { type_specifier = declarators; // Save declarators in type_specifier - while (get_op(declarators) == ',') { - add_typedef(get_child__(',', DECL, declarators, 0)); - declarators = get_child_opt_(',', ',', declarators, 1); + while (declarators != 0) { + add_typedef(get_child__(LIST, DECL, declarators, 0)); + declarators = get_child_opt_(LIST, LIST, declarators, 1); } result = new_ast1(TYPEDEF_KW, type_specifier); } else { diff --git a/sh.c b/sh.c index 443f5dc2..8b7e5280 100644 --- a/sh.c +++ b/sh.c @@ -516,8 +516,8 @@ void add_var_to_local_env(ast decl, enum BINDING kind) { void add_fun_params_to_local_env(ast lst) { while (lst != 0) { - add_var_to_local_env(get_child__(',', DECL, lst, 0), BINDING_PARAM_LOCAL); - lst = get_child_opt_(',', ',', lst, 1); + add_var_to_local_env(get_child__(LIST, DECL, lst, 0), BINDING_PARAM_LOCAL); + lst = get_child_opt_(LIST, LIST, lst, 1); } } @@ -566,8 +566,8 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for void check_decls(ast lst) { while (lst != 0) { - assert_var_decl_is_safe(get_child__(',', DECL, lst, 0), true); - lst = get_child_(',', lst, 1); + assert_var_decl_is_safe(get_child__(LIST, DECL, lst, 0), true); + lst = get_child_opt_(LIST, LIST, lst, 1); } } @@ -659,9 +659,9 @@ text let_params(int params) { while (params != 0) { // TODO: Constant param optimization - ident = get_child__(DECL, IDENTIFIER, get_child__(',', DECL, params, 0), 0); + ident = get_child__(DECL, IDENTIFIER, get_child__(LIST, DECL, params, 0), 0); res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); - params = get_child_opt_(',', ',', params, 1); + params = get_child_opt_(LIST, LIST, params, 1); params_ix += 1; } @@ -864,7 +864,7 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition sub1 = node; // For 1 param, the parent node is the fun call node // If there are 2 or more params, we traverse the ',' nodes ... while (get_op(sub2) == ',') { - sub1 = sub2;; // .. and the parent node is the ',' node + sub1 = sub2; // .. and the parent node is the ',' node set_child(sub1, 0, handle_side_effects_go(get_child_(',', sub2, 0), executes_conditionally)); sub2 = get_child_(',', sub2, 1); } @@ -877,7 +877,7 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition gensym_ix = start_gensym_ix; sub1 = new_ast2('=', assign_to, node); - sub1 = new_ast2(',', sub1, 0); + sub1 = new_ast2(LIST, sub1, 0); if (executes_conditionally) { if (conditional_fun_calls == 0) { conditional_fun_calls = sub1; } else { set_child(conditional_fun_calls_tail, 1, sub1); } @@ -915,7 +915,7 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { } else if (op == STRING) { /* We must initialize strings before the expression */ sub1 = fresh_string_ident(get_val_(STRING, node)); - literals_inits = new_ast2(',', new_ast2('=', sub1, get_val_(STRING, node)), literals_inits); + literals_inits = new_ast2(LIST, new_ast2('=', sub1, get_val_(STRING, node)), literals_inits); return sub1; } else { printf("op=%d %c", op, op); @@ -1039,7 +1039,7 @@ int initializer_list_len(ast node) { // Each element of the list has size 1 since nested initializers are not allowed while (node != 0) { res += 1; - node = get_child_(',', node, 1); + node = get_child_(LIST, node, 1); } return res; @@ -1053,7 +1053,7 @@ text comp_initializer_list(ast initializer_list, int expected_len) { runtime_use_initialize = true; while (initializer_list != 0) { - element = get_child_(',', initializer_list, 0); + element = get_child_(LIST, initializer_list, 0); switch (get_op(element)) { case INTEGER: args = concatenate_strings_with(args, wrap_int(-get_val_(INTEGER, element)), wrap_char(' ')); @@ -1071,7 +1071,7 @@ text comp_initializer_list(ast initializer_list, int expected_len) { // TODO: Support nested initializers and constant expressions fatal_error("comp_initializer: unexpected operator"); } - initializer_list = get_child_opt_(',', ',', initializer_list, 1); + initializer_list = get_child_opt_(LIST, LIST, initializer_list, 1); } return args; @@ -1089,12 +1089,12 @@ text with_prefixed_side_effects(ast test_side_effects, text code) { ast side_effect; while (test_side_effects != 0) { - side_effect = get_child__(',', '=', test_side_effects, 0); + side_effect = get_child__(LIST, '=', test_side_effects, 0); test_side_effects_code = string_concat3(test_side_effects_code, comp_fun_call_code(get_child_('=', side_effect, 1), get_child_('=', side_effect, 0)), wrap_str_lit("; ")); - test_side_effects = get_child_(',', test_side_effects, 1); + test_side_effects = get_child_(LIST, test_side_effects, 1); } if (test_side_effects_code != 0) { return string_concat4(wrap_str_lit("{ "), test_side_effects_code, code, wrap_str_lit("; }")); @@ -1353,9 +1353,9 @@ text comp_rvalue(ast node, int context) { fun_call_decl_start = glo_decl_ix; while (literals_inits != 0) { - side_effect = get_child__(',', '=', literals_inits, 0); + side_effect = get_child__(LIST, '=', literals_inits, 0); comp_defstr(get_child_('=', side_effect, 0), get_child_('=', side_effect, 1), -1); - literals_inits = get_child_opt_(',', ',', literals_inits, 1); + literals_inits = get_child_opt_(LIST, LIST, literals_inits, 1); } // We don't want to call defstr on every iteration, so we only capture fun @@ -1366,9 +1366,9 @@ text comp_rvalue(ast node, int context) { fun_call_decl_start = glo_decl_ix; while (replaced_fun_calls2 != 0) { - side_effect = get_child__(',', '=', replaced_fun_calls2, 0); + side_effect = get_child__(LIST, '=', replaced_fun_calls2, 0); comp_fun_call(get_child_('=', side_effect, 1), get_child_('=', side_effect, 0)); - replaced_fun_calls2 = get_child_opt_(',', ',', replaced_fun_calls2, 1); + replaced_fun_calls2 = get_child_opt_(LIST, LIST, replaced_fun_calls2, 1); } // When compiling a test, we place the function side effects inline with the condition. @@ -2020,7 +2020,7 @@ bool comp_return(ast return_value) { append_glo_decl(wrap_str_lit("break")); } } else if (!in_tail_position) { - rest_loc_var_fixups = new_ast2(',', append_glo_decl_fixup(), rest_loc_var_fixups); + rest_loc_var_fixups = new_ast2(LIST, append_glo_decl_fixup(), rest_loc_var_fixups); append_glo_decl(wrap_str_lit("return")); } @@ -2030,10 +2030,10 @@ bool comp_return(ast return_value) { void comp_var_decls(ast node) { ast var_decl; - node = get_child_opt_(DECLS, ',', node, 0); + node = get_child_opt_(DECLS, LIST, node, 0); while (node != 0) { // Add to local env and cummulative env, then initialize - var_decl = get_child__(',', DECL, node, 0); + var_decl = get_child__(LIST, DECL, node, 0); assert_var_decl_is_safe(var_decl, true); add_var_to_local_env(var_decl, BINDING_VAR_LOCAL); if (get_child_(DECL, var_decl, 2) != 0) { // Initializer @@ -2044,7 +2044,7 @@ void comp_var_decls(ast node) { comp_assignment(new_ast0(IDENTIFIER, get_child__(DECL, IDENTIFIER, var, 0)), new_ast0(INTEGER, 0)); } #endif - node = get_child_opt_(',', ',', node, 1); // Next variable + node = get_child_opt_(LIST, LIST, node, 1); // Next variable } } @@ -2134,7 +2134,7 @@ void comp_glo_fun_decl(ast node) { ast body = get_child_opt_(FUN_DECL, '{', node, 1); ast name_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); ast fun_type = get_child__(DECL, '(', decl, 1); - ast params = get_child_opt_('(', ',', fun_type, 1); + ast params = get_child_opt_('(', LIST, fun_type, 1); text trailing_txt = 0; int params_ix = 2; // Start at 2 because $1 is assigned to the return location ast var; @@ -2165,9 +2165,9 @@ void comp_glo_fun_decl(ast node) { if (trailing_txt == 0) { // Show the mapping between the function parameters and $1, $2, etc. while (params != 0) { - var = get_child__(',', DECL, params, 0); + var = get_child__(LIST, DECL, params, 0); trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(probe_string(get_val_(IDENTIFIER, get_child_(DECL, var, 0)))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); - params = get_child_(',', params, 1); + params = get_child_opt_(LIST, LIST, params, 1); params_ix += 1; } if (trailing_txt != 0) trailing_txt = string_concat(wrap_str_lit(" # "), trailing_txt); @@ -2187,14 +2187,14 @@ void comp_glo_fun_decl(ast node) { #ifndef SH_INITIALIZE_PARAMS_WITH_LET // Initialize parameters - params = get_child_opt_('(', ',', fun_type, 1); // Reload params because params is now = 0 + params = get_child_opt_('(', LIST, fun_type, 1); // Reload params because params is now = 0 params_ix = 2; while (params != 0) { - var = get_child__(',', DECL, params, 0); + var = get_child__(LIST, DECL, params, 0); // TODO: Constant param optimization // Constant parameters don't need to be initialized comp_assignment(get_child_(DECL, var, 0), new_ast0(IDENTIFIER_DOLLAR, params_ix)); - params = get_child_opt_(',', ',', params, 1); + params = get_child_opt_(LIST, LIST, params, 1); params_ix += 1; } #endif @@ -2210,8 +2210,8 @@ void comp_glo_fun_decl(ast node) { // So we fixup the calls to save_vars and unsave_vars at the end. fixup_glo_decl(save_loc_vars_fixup, save_local_vars()); while (rest_loc_var_fixups != 0) { - fixup_glo_decl(get_child_(',', rest_loc_var_fixups, 0), restore_local_vars(params_ix - 1)); - rest_loc_var_fixups = get_child_opt_(',', ',', rest_loc_var_fixups, 1); + fixup_glo_decl(get_child_(LIST, rest_loc_var_fixups, 0), restore_local_vars(params_ix - 1)); + rest_loc_var_fixups = get_child_opt_(LIST, LIST, rest_loc_var_fixups, 1); } // functions cannot be empty so we insert ':' if it's empty @@ -2310,14 +2310,17 @@ void comp_assignment_constant(text constant_name, ast rhs) { // Since anything that's not a local variable is considered global, this makes // it easy to implement enums. void comp_enum_cases(ast ident, ast cases) { + ast cas; if (ident != 0) { append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))), wrap_str_lit(" enum declaration"))); } else { append_glo_decl(wrap_str_lit("# Enum declaration")); } - while (get_op(cases) == ',') { - comp_assignment_constant(env_var(get_child__(',', IDENTIFIER, cases, 0)), get_child_(',', cases, 1)); - cases = get_child_opt_(',', ',', cases, 2); + + while (cases != 0) { + cas = get_child__(LIST, '=', cases, 0); + comp_assignment_constant(env_var(get_child__('=', IDENTIFIER, cas, 0)), get_child_('=', cas, 1)); + cases = get_child_opt_(LIST, LIST, cases, 1); } } @@ -2357,8 +2360,8 @@ void comp_struct(ast ident, ast members) { } else { append_glo_decl(wrap_str_lit("# Struct member declarations")); } - while (get_op(members) == ',') { - decl = get_child__(',', DECL, members, 0); + while (members != 0) { + decl = get_child__(LIST, DECL, members, 0); field_type = get_child_(DECL, decl, 1); // Arrays and struct value types are not supported for now. // When we have type information on the local and global variables, we'll @@ -2368,7 +2371,7 @@ void comp_struct(ast ident, ast members) { } comp_assignment_constant(struct_member_var(get_child_opt_(DECL, IDENTIFIER, decl, 0)), offset); - members = get_child_opt_(',', ',', members, 1); + members = get_child_opt_(LIST, LIST, members, 1); set_val(offset, get_val_(INTEGER, offset) - 1); } @@ -2395,8 +2398,8 @@ void handle_enum_struct_union_type_decl(ast type) { // The only thing we need to do is to call handle_enum_struct_union_type_decl // on the type specifier. void handle_typedef(ast node) { - ast decls = get_child__(TYPEDEF_KW, ',', node, 0); - ast decl = get_child__(',', DECL, decls, 0); + ast decls = get_child__(TYPEDEF_KW, LIST, node, 0); + ast decl = get_child__(LIST, DECL, decls, 0); ast type = get_child_(DECL, decl, 1); handle_enum_struct_union_type_decl(get_type_specifier(type)); @@ -2419,10 +2422,10 @@ void comp_glo_decl(ast node) { if (op == '=') { // Assignments comp_assignment(get_child_('=', node, 0), get_child_('=', node, 1)); } else if (op == DECLS) { // Variable declarations - declarations = get_child__(DECLS, ',', node, 0); + declarations = get_child__(DECLS, LIST, node, 0); while (declarations != 0) { // Multiple variable declarations - comp_glo_var_decl(get_child__(',', DECL, declarations, 0)); - declarations = get_child_opt_(',', ',', declarations, 1); + comp_glo_var_decl(get_child__(LIST, DECL, declarations, 0)); + declarations = get_child_opt_(LIST, LIST, declarations, 1); } } else if (op == FUN_DECL) { comp_glo_fun_decl(node); From 3831bea2091f9f66272166e7e4e384e8421a1ac1 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 13:23:05 -0500 Subject: [PATCH 52/89] Use cons/car/cdr to manipulate lists --- exe.c | 62 ++++++++++++++++++++++++++--------------------------- pnut.c | 67 ++++++++++++++++++++++------------------------------------ sh.c | 67 +++++++++++++++++++++++++++++----------------------------- 3 files changed, 89 insertions(+), 107 deletions(-) diff --git a/exe.c b/exe.c index a85aabf9..9a4d6078 100644 --- a/exe.c +++ b/exe.c @@ -567,8 +567,8 @@ int struct_union_size(ast type) { members = get_child(type, 2); while (members != 0) { - member_type = get_child_(DECL, get_child__(LIST, DECL, members, 0), 1); - members = get_child_opt_(LIST, LIST, members, 1); + member_type = get_child_(DECL, car_(DECL, members), 1); + members = tail(members); member_size = type_width(member_type, true, true); sum_size += member_size; // Struct size is the sum of its members if (member_size > max_size) max_size = member_size; // Union size is the max of its members @@ -586,7 +586,7 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { ast decl, ident; while (members != 0) { - decl = get_child_opt_(LIST, DECL, members, 0); + decl = car_(DECL, members); ident = get_child_opt_(DECL, IDENTIFIER, decl, 0); if (ident == 0) { // Anonymous struct member, search that struct sub_offset = struct_member_offset_go(get_child_(DECL, decl, 1), member_ident); @@ -601,7 +601,7 @@ int struct_member_offset_go(ast struct_type, ast member_ident) { // final offset is not 0. offset += round_up_to_word_size(type_width(get_child_(DECL, decl, 1), true, true)); } - members = get_child_opt_(LIST, LIST, members, 1); + members = tail(members); } return -1; @@ -619,7 +619,7 @@ ast struct_member_go(ast struct_type, ast member_ident) { ast decl, ident; while (members != 0) { - decl = get_child_opt_(LIST, DECL, members, 0); + decl = car_(DECL, members); ident = get_child_opt_(DECL, IDENTIFIER, decl, 0); if (ident == 0) { // Anonymous struct member, search that struct ident = struct_member_go(get_child_(DECL, decl, 1), member_ident); @@ -627,7 +627,7 @@ ast struct_member_go(ast struct_type, ast member_ident) { } else if (get_val_(IDENTIFIER, member_ident) == get_val_(IDENTIFIER, ident)) { return decl; } - members = get_child_opt_(LIST, LIST, members, 1); + members = tail(members); } return -1; @@ -1487,9 +1487,9 @@ void codegen_enum(ast node) { } while (cases != 0) { - cas = get_child_(LIST, cases, 0); + cas = car_('=', cases); cgc_add_enum(get_val_(IDENTIFIER, get_child__('=', IDENTIFIER, cas, 0)), get_child__('=', INTEGER, cas, 1)); - cases = get_child_opt_(LIST, LIST, cases, 1); + cases = tail(cases); } } @@ -1511,8 +1511,8 @@ void codegen_struct_or_union(ast node, enum BINDING kind) { // This is not the right semantic because inner declarations are scoped to // this declaration, but it's probably good enough for TCC. while (members != 0) { - handle_enum_struct_union_type_decl(get_child_(DECL, get_child__(LIST, DECL, members, 0), 1)); - members = get_child_opt_(LIST, LIST, members, 1); + handle_enum_struct_union_type_decl(get_child_(DECL, car_(DECL, members), 1)); + members = tail(members); } } @@ -1582,9 +1582,9 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse inner_type_width = type_width(get_child_('[', type, 0), true, false); while (init != 0 && arr_len != 0) { - codegen_initializer(local, get_child_(LIST, init, 0), inner_type, base_reg, offset, true); + codegen_initializer(local, car(init), inner_type, base_reg, offset, true); offset += inner_type_width; - init = get_child_opt_(LIST, LIST, init, 1); + init = tail(init); arr_len -= 1; // decrement the number of elements left to initialize to make sure we don't overflow } @@ -1601,38 +1601,38 @@ void codegen_initializer(bool local, ast init, ast type, int base_reg, int offse case STRUCT_KW: members = get_child_(STRUCT_KW, type, 2); while (init != 0 && members != 0) { - inner_type = get_child_(DECL, get_child__(LIST, DECL, members, 0), 1); - codegen_initializer(local, get_child_(LIST, init, 0), inner_type, base_reg, offset, false); + inner_type = get_child_(DECL, car_(DECL, members), 1); + codegen_initializer(local, car(init), inner_type, base_reg, offset, false); offset += type_width(inner_type, true, true); - init = get_child_opt_(LIST, LIST, init, 1); - members = get_child_opt_(LIST, LIST, members, 1); + init = tail(init); + members = tail(members); } // Initialize rest of the members to 0 while (local && members != 0) { - inner_type = get_child_(DECL, get_child__(LIST, DECL, members, 0), 1); + inner_type = get_child_(DECL, car_(DECL, members), 1); initialize_memory(0, base_reg, offset, type_width(inner_type, true, true)); offset += type_width(inner_type, true, true); - members = get_child_opt_(LIST, LIST, members, 1); + members = tail(members); } break; case UNION_KW: members = get_child_(STRUCT_KW, type, 2); - if (get_child_opt_(LIST, LIST, init, 1) != 0) { + if (tail(init) != 0) { fatal_error("codegen_initializer: union initializer list has more than one element"); } else if (members == 0) { fatal_error("codegen_initializer: union has no members"); } - codegen_initializer(local, get_child_(LIST, init, 0), get_child_(DECL, get_child__(LIST, DECL, members, 0), 1), base_reg, offset, false); + codegen_initializer(local, car(init), get_child_(DECL, car_(DECL, members), 1), base_reg, offset, false); break; default: - if (get_child_opt_(LIST, LIST, init, 1) != 0 // More than 1 element - || get_op(get_child_(LIST, init, 0)) == INITIALIZER_LIST) { // Or nested initializer list + if (tail(init) != 0 // More than 1 element + || get_op(car(init)) == INITIALIZER_LIST) { // Or nested initializer list fatal_error("codegen_initializer: scalar initializer list has more than one element"); } - codegen_rvalue(get_child_(LIST, init, 0)); + codegen_rvalue(car(init)); pop_reg(reg_X); grow_fs(-1); write_mem_location(base_reg, offset, reg_X, type_width(type, true, !in_array)); @@ -1671,7 +1671,7 @@ int initializer_size(ast initializer) { initializer = get_child_(INITIALIZER_LIST, initializer, 0); while (initializer != 0) { size += 1; - initializer = get_child_opt_(LIST, LIST, initializer, 1); + initializer = tail(initializer); } return size; @@ -1758,8 +1758,8 @@ void codegen_body(ast node) { if (get_op(stmt) == DECLS) { // Variable declaration declarations = get_child__(DECLS, LIST, stmt, 0); while (declarations != 0) { // Multiple variable declarations - codegen_local_var_decl(get_child__(LIST, DECL, declarations, 0)); - declarations = get_child_opt_(LIST, LIST, declarations, 1); + codegen_local_var_decl(car_(DECL, declarations)); + declarations = tail(declarations); } } else { codegen_statement(stmt); @@ -2002,14 +2002,14 @@ void add_params(ast params) { int ident; while (params != 0) { - decl = get_child__(LIST, DECL, params, 0); + decl = car_(DECL, params); ident = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, decl, 0)); type = get_child_(DECL, decl, 1); if (cgc_lookup_var(ident, cgc_locals) != 0) fatal_error("add_params: duplicate parameter"); cgc_add_local_param(ident, type_width(type, false, true) / word_size, type); - params = get_child_opt_(LIST, LIST, params, 1); + params = tail(params); } } @@ -2062,7 +2062,7 @@ void codegen_glo_fun_decl(ast node) { // on the type specifier, which is the same for all declarations. void handle_typedef(ast node) { ast decls = get_child__(TYPEDEF_KW, LIST, node, 0); - ast decl = get_child__(LIST, DECL, decls, 0); + ast decl = car_(DECL, decls); ast type = get_child_(DECL, decl, 1); handle_enum_struct_union_type_decl(get_type_specifier(type)); @@ -2075,8 +2075,8 @@ void codegen_glo_decl(ast node) { if (op == DECLS) { decls = get_child__(DECLS, LIST, node, 0); // Declaration list while (decls != 0) { // Multiple variable declarations - codegen_glo_var_decl(get_child__(LIST, DECL, decls, 0)); - decls = get_child_opt_(LIST, LIST, decls, 1); // Next variable declaration + codegen_glo_var_decl(car_(DECL, decls)); + decls = tail(decls); // Next variable declaration } } else if (op == FUN_DECL) { codegen_glo_fun_decl(node); diff --git a/pnut.c b/pnut.c index 02f9f0f8..a600556a 100644 --- a/pnut.c +++ b/pnut.c @@ -290,34 +290,6 @@ int alloc_obj(int size) { return alloc_result; } -int cons(int child0, int child1) { - - int result = alloc_obj(2); - - heap[result] = child0; - heap[result+1] = child1; - - return result; -} - -int car(int pair) { - return heap[pair]; -} - -int cdr(int pair) { - return heap[pair+1]; -} - -int set_car(int pair, int value) { - heap[pair] = value; - return value; -} - -int set_cdr(int pair, int value) { - heap[pair+1] = value; - return value; -} - int get_op(ast node) { return heap[node] & 1023; } @@ -523,6 +495,17 @@ ast clone_ast(ast orig) { return ast_result; } +// TODO: Use macro to avoid indirection? +// Functions used to create and access lists. +ast cons(int child0, int child1) { return new_ast2(LIST, child0, child1); } +ast car(int pair) { return get_child_(LIST, pair, 0); } +ast car_(int expected_op, int pair) { return get_child__(LIST, expected_op, pair, 0); } +ast cdr(int pair) { return get_child_(LIST, pair, 1); } +ast cdr_(int expected_op, int pair) { return get_child_opt_(LIST, expected_op, pair, 1); } +void set_car(int pair, int value) { return set_child(pair, 0, value); } +void set_cdr(int pair, int value) { return set_child(pair, 1, value); } +#define tail(x) cdr_(LIST, x) + // Simple accessor to get the string from the string pool #define STRING_BUF(string_val) (string_pool + heap[string_val+1]) #define STRING_LEN(string_val) (heap[string_val+4]) @@ -1060,7 +1043,7 @@ int read_macro_tokens(int args) { tail = toks; get_tok_macro(); while (tok != '\n' && tok != EOF) { - heap[tail + 1] = cons(lookup_macro_token(args, tok, val), 0); + set_cdr(tail, cons(lookup_macro_token(args, tok, val), 0)); tail = cdr(tail); // Advance tail get_tok_macro(); } @@ -1563,7 +1546,7 @@ int macro_parse_argument() { arg_tokens = cons(cons(tok, val), 0); tail = arg_tokens; } else { - heap[tail + 1] = cons(cons(tok, val), 0); + set_cdr(tail, cons(cons(tok, val), 0)); tail = cdr(tail); } get_tok_macro_expand(); @@ -2381,10 +2364,10 @@ ast parse_enum() { } if (result == 0) { - result = new_ast2(LIST, new_ast2('=', ident, value), 0); + result = cons(new_ast2('=', ident, value), 0); tail = result; } else { - set_child(tail, 1, new_ast2(LIST, new_ast2('=', ident, value), 0)); + set_child(tail, 1, cons(new_ast2('=', ident, value), 0)); tail = get_child_(LIST, tail, 1); } @@ -2438,18 +2421,18 @@ ast parse_struct_or_union(int struct_or_union_tok) { decl = new_ast3(DECL, 0, type_specifier, 0); if (result == 0) { - tail = result = new_ast2(LIST, decl, 0); + tail = result = cons(decl, 0); } else { - set_child(tail, 1, new_ast2(LIST, decl, 0)); + set_child(tail, 1, cons(decl, 0)); tail = get_child_(LIST, tail, 1); } } else { while (1) { decl = parse_declarator(false, type_specifier); if (result == 0) { - tail = result = new_ast2(LIST, decl, 0); + tail = result = cons(decl, 0); } else { - set_child(tail, 1, new_ast2(LIST, decl, 0)); + set_child(tail, 1, cons(decl, 0)); tail = get_child_(LIST, tail, 1); } @@ -2635,9 +2618,9 @@ int parse_param_list() { if (tok == ',') get_tok(); if (result == 0) { - tail = result = new_ast2(LIST, decl, 0); + tail = result = cons(decl, 0); } else { - set_child(tail, 1, new_ast2(LIST, decl, 0)); + set_child(tail, 1, cons(decl, 0)); tail = get_child_(LIST, tail, 1); } } @@ -2733,9 +2716,9 @@ ast parse_initializer_list() { if (tok == '{') fatal_error("nested initializer lists not supported"); #endif if (result == 0) { - tail = result = new_ast2(LIST, parse_initializer(), 0); + tail = result = cons(parse_initializer(), 0); } else { - set_child(tail, 1, new_ast2(LIST, parse_initializer(), 0)); + set_child(tail, 1, cons(parse_initializer(), 0)); tail = get_child_(LIST, tail, 1); } if (tok == ',') get_tok(); @@ -2833,14 +2816,14 @@ ast parse_declaration(bool local) { return parse_fun_def(declarator); } - declarators = new_ast2(LIST, declarator, 0); // Wrap the declarators in a list + declarators = cons(declarator, 0); // Wrap the declarators in a list tail = declarators; // Otherwise, this is a variable or declaration while (tok != ';') { if (tok == ',') { get_tok(); - set_child(tail, 1, new_ast2(LIST, parse_declarator_and_initializer(type_specifier), 0)); + set_child(tail, 1, cons(parse_declarator_and_initializer(type_specifier), 0)); tail = get_child__(LIST, LIST, tail, 1); } else { parse_error("';' or ',' expected", tok); diff --git a/sh.c b/sh.c index 8b7e5280..43cf60db 100644 --- a/sh.c +++ b/sh.c @@ -516,8 +516,8 @@ void add_var_to_local_env(ast decl, enum BINDING kind) { void add_fun_params_to_local_env(ast lst) { while (lst != 0) { - add_var_to_local_env(get_child__(LIST, DECL, lst, 0), BINDING_PARAM_LOCAL); - lst = get_child_opt_(LIST, LIST, lst, 1); + add_var_to_local_env(car_(DECL, lst), BINDING_PARAM_LOCAL); + lst = tail(lst); } } @@ -566,8 +566,8 @@ void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for void check_decls(ast lst) { while (lst != 0) { - assert_var_decl_is_safe(get_child__(LIST, DECL, lst, 0), true); - lst = get_child_opt_(LIST, LIST, lst, 1); + assert_var_decl_is_safe(car_(DECL, lst), true); + lst = tail(lst); } } @@ -659,9 +659,9 @@ text let_params(int params) { while (params != 0) { // TODO: Constant param optimization - ident = get_child__(DECL, IDENTIFIER, get_child__(LIST, DECL, params, 0), 0); + ident = get_child__(DECL, IDENTIFIER, car_(DECL, params), 0); res = concatenate_strings_with(res, string_concat4(wrap_str_lit("let "), env_var_with_prefix(ident, false), wrap_char(' '), format_special_var(new_ast0(IDENTIFIER_DOLLAR, params_ix), false)), wrap_str_lit("; ")); - params = get_child_opt_(LIST, LIST, params, 1); + params = tail(params); params_ix += 1; } @@ -876,8 +876,7 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition // reused after the function call, so resetting the gensym counter. gensym_ix = start_gensym_ix; - sub1 = new_ast2('=', assign_to, node); - sub1 = new_ast2(LIST, sub1, 0); + sub1 = cons(new_ast2('=', assign_to, node), 0); if (executes_conditionally) { if (conditional_fun_calls == 0) { conditional_fun_calls = sub1; } else { set_child(conditional_fun_calls_tail, 1, sub1); } @@ -915,7 +914,7 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { } else if (op == STRING) { /* We must initialize strings before the expression */ sub1 = fresh_string_ident(get_val_(STRING, node)); - literals_inits = new_ast2(LIST, new_ast2('=', sub1, get_val_(STRING, node)), literals_inits); + literals_inits = cons(new_ast2('=', sub1, get_val_(STRING, node)), literals_inits); return sub1; } else { printf("op=%d %c", op, op); @@ -1039,7 +1038,7 @@ int initializer_list_len(ast node) { // Each element of the list has size 1 since nested initializers are not allowed while (node != 0) { res += 1; - node = get_child_(LIST, node, 1); + node = tail(node); } return res; @@ -1053,7 +1052,7 @@ text comp_initializer_list(ast initializer_list, int expected_len) { runtime_use_initialize = true; while (initializer_list != 0) { - element = get_child_(LIST, initializer_list, 0); + element = car(initializer_list); switch (get_op(element)) { case INTEGER: args = concatenate_strings_with(args, wrap_int(-get_val_(INTEGER, element)), wrap_char(' ')); @@ -1071,7 +1070,7 @@ text comp_initializer_list(ast initializer_list, int expected_len) { // TODO: Support nested initializers and constant expressions fatal_error("comp_initializer: unexpected operator"); } - initializer_list = get_child_opt_(LIST, LIST, initializer_list, 1); + initializer_list = tail(initializer_list); } return args; @@ -1089,12 +1088,12 @@ text with_prefixed_side_effects(ast test_side_effects, text code) { ast side_effect; while (test_side_effects != 0) { - side_effect = get_child__(LIST, '=', test_side_effects, 0); + side_effect = car_('=', test_side_effects); test_side_effects_code = string_concat3(test_side_effects_code, comp_fun_call_code(get_child_('=', side_effect, 1), get_child_('=', side_effect, 0)), wrap_str_lit("; ")); - test_side_effects = get_child_(LIST, test_side_effects, 1); + test_side_effects = tail(test_side_effects); } if (test_side_effects_code != 0) { return string_concat4(wrap_str_lit("{ "), test_side_effects_code, code, wrap_str_lit("; }")); @@ -1353,9 +1352,9 @@ text comp_rvalue(ast node, int context) { fun_call_decl_start = glo_decl_ix; while (literals_inits != 0) { - side_effect = get_child__(LIST, '=', literals_inits, 0); + side_effect = car_('=', literals_inits); comp_defstr(get_child_('=', side_effect, 0), get_child_('=', side_effect, 1), -1); - literals_inits = get_child_opt_(LIST, LIST, literals_inits, 1); + literals_inits = tail(literals_inits); } // We don't want to call defstr on every iteration, so we only capture fun @@ -1366,9 +1365,9 @@ text comp_rvalue(ast node, int context) { fun_call_decl_start = glo_decl_ix; while (replaced_fun_calls2 != 0) { - side_effect = get_child__(LIST, '=', replaced_fun_calls2, 0); + side_effect = car_('=', replaced_fun_calls2); comp_fun_call(get_child_('=', side_effect, 1), get_child_('=', side_effect, 0)); - replaced_fun_calls2 = get_child_opt_(LIST, LIST, replaced_fun_calls2, 1); + replaced_fun_calls2 = tail(replaced_fun_calls2); } // When compiling a test, we place the function side effects inline with the condition. @@ -2020,7 +2019,7 @@ bool comp_return(ast return_value) { append_glo_decl(wrap_str_lit("break")); } } else if (!in_tail_position) { - rest_loc_var_fixups = new_ast2(LIST, append_glo_decl_fixup(), rest_loc_var_fixups); + rest_loc_var_fixups = cons(append_glo_decl_fixup(), rest_loc_var_fixups); append_glo_decl(wrap_str_lit("return")); } @@ -2033,7 +2032,7 @@ void comp_var_decls(ast node) { node = get_child_opt_(DECLS, LIST, node, 0); while (node != 0) { // Add to local env and cummulative env, then initialize - var_decl = get_child__(LIST, DECL, node, 0); + var_decl = car_(DECL, node); assert_var_decl_is_safe(var_decl, true); add_var_to_local_env(var_decl, BINDING_VAR_LOCAL); if (get_child_(DECL, var_decl, 2) != 0) { // Initializer @@ -2044,7 +2043,7 @@ void comp_var_decls(ast node) { comp_assignment(new_ast0(IDENTIFIER, get_child__(DECL, IDENTIFIER, var, 0)), new_ast0(INTEGER, 0)); } #endif - node = get_child_opt_(LIST, LIST, node, 1); // Next variable + node = tail(node); // Next variable } } @@ -2165,9 +2164,9 @@ void comp_glo_fun_decl(ast node) { if (trailing_txt == 0) { // Show the mapping between the function parameters and $1, $2, etc. while (params != 0) { - var = get_child__(LIST, DECL, params, 0); + var = car_(DECL, params); trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(probe_string(get_val_(IDENTIFIER, get_child_(DECL, var, 0)))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); - params = get_child_opt_(LIST, LIST, params, 1); + params = tail(params); params_ix += 1; } if (trailing_txt != 0) trailing_txt = string_concat(wrap_str_lit(" # "), trailing_txt); @@ -2190,11 +2189,11 @@ void comp_glo_fun_decl(ast node) { params = get_child_opt_('(', LIST, fun_type, 1); // Reload params because params is now = 0 params_ix = 2; while (params != 0) { - var = get_child__(LIST, DECL, params, 0); + var = car_(DECL, params); // TODO: Constant param optimization // Constant parameters don't need to be initialized comp_assignment(get_child_(DECL, var, 0), new_ast0(IDENTIFIER_DOLLAR, params_ix)); - params = get_child_opt_(LIST, LIST, params, 1); + params = tail(params); params_ix += 1; } #endif @@ -2210,8 +2209,8 @@ void comp_glo_fun_decl(ast node) { // So we fixup the calls to save_vars and unsave_vars at the end. fixup_glo_decl(save_loc_vars_fixup, save_local_vars()); while (rest_loc_var_fixups != 0) { - fixup_glo_decl(get_child_(LIST, rest_loc_var_fixups, 0), restore_local_vars(params_ix - 1)); - rest_loc_var_fixups = get_child_opt_(LIST, LIST, rest_loc_var_fixups, 1); + fixup_glo_decl(car(rest_loc_var_fixups), restore_local_vars(params_ix - 1)); + rest_loc_var_fixups = tail(rest_loc_var_fixups); } // functions cannot be empty so we insert ':' if it's empty @@ -2318,9 +2317,9 @@ void comp_enum_cases(ast ident, ast cases) { } while (cases != 0) { - cas = get_child__(LIST, '=', cases, 0); + cas = car_('=', cases); comp_assignment_constant(env_var(get_child__('=', IDENTIFIER, cas, 0)), get_child_('=', cas, 1)); - cases = get_child_opt_(LIST, LIST, cases, 1); + cases = tail(cases); } } @@ -2361,7 +2360,8 @@ void comp_struct(ast ident, ast members) { append_glo_decl(wrap_str_lit("# Struct member declarations")); } while (members != 0) { - decl = get_child__(LIST, DECL, members, 0); + decl = car_(DECL, members); + members = tail(members); field_type = get_child_(DECL, decl, 1); // Arrays and struct value types are not supported for now. // When we have type information on the local and global variables, we'll @@ -2371,7 +2371,6 @@ void comp_struct(ast ident, ast members) { } comp_assignment_constant(struct_member_var(get_child_opt_(DECL, IDENTIFIER, decl, 0)), offset); - members = get_child_opt_(LIST, LIST, members, 1); set_val(offset, get_val_(INTEGER, offset) - 1); } @@ -2399,7 +2398,7 @@ void handle_enum_struct_union_type_decl(ast type) { // on the type specifier. void handle_typedef(ast node) { ast decls = get_child__(TYPEDEF_KW, LIST, node, 0); - ast decl = get_child__(LIST, DECL, decls, 0); + ast decl = car_(DECL, decls); ast type = get_child_(DECL, decl, 1); handle_enum_struct_union_type_decl(get_type_specifier(type)); @@ -2424,8 +2423,8 @@ void comp_glo_decl(ast node) { } else if (op == DECLS) { // Variable declarations declarations = get_child__(DECLS, LIST, node, 0); while (declarations != 0) { // Multiple variable declarations - comp_glo_var_decl(get_child__(LIST, DECL, declarations, 0)); - declarations = get_child_opt_(LIST, LIST, declarations, 1); + comp_glo_var_decl(car_(DECL, declarations)); + declarations = tail(declarations); } } else if (op == FUN_DECL) { comp_glo_fun_decl(node); From ae35b1fb1344ab64d4ad22be8ebc6f94f0fbfc95 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 13:29:04 -0500 Subject: [PATCH 53/89] Parse call params as LIST instead of ',' expr --- exe.c | 12 ++---------- pnut.c | 25 ++++++++++++++++++++++++- sh.c | 52 +++++++++++++++++++--------------------------------- 3 files changed, 45 insertions(+), 44 deletions(-) diff --git a/exe.c b/exe.c index 9a4d6078..940359d4 100644 --- a/exe.c +++ b/exe.c @@ -959,17 +959,9 @@ int codegen_params(ast params) { int fs = 0; - // Function params are comma expressions that aren't exactly like comma lists. - // Comma lists end with a new_ast2(',', last, 0) node, while function params - // end with a new_ast2(',', second_last, last) if there are more than one param - // and are just the last param if there is only one. if (params != 0) { - if (get_op(params) == ',') { - fs = codegen_params(get_child_(',', params, 1)); - fs += codegen_param(get_child_(',', params, 0)); - } else { - fs = codegen_param(params); - } + fs = codegen_params(get_child_opt_(LIST, LIST, params, 1)); + fs += codegen_param(get_child_(LIST, params, 0)); } return fs; diff --git a/pnut.c b/pnut.c index a600556a..80bee2e3 100644 --- a/pnut.c +++ b/pnut.c @@ -506,6 +506,16 @@ void set_car(int pair, int value) { return set_child(pair, 0, value); } void set_cdr(int pair, int value) { return set_child(pair, 1, value); } #define tail(x) cdr_(LIST, x) +// Returns the only element of a singleton list, if it is a singleton list. +// Otherwise, returns 0. +ast list_singleton(ast list) { + if (list != 0 && tail(list) == 0) { + return car(list); + } else { + return 0; + } +} + // Simple accessor to get the string from the string pool #define STRING_BUF(string_val) (string_pool + heap[string_val+1]) #define STRING_LEN(string_val) (heap[string_val+4]) @@ -2273,6 +2283,7 @@ void expect_tok_(int expected_tok, char* file, int line) { } ast parse_comma_expression(); +ast parse_call_params(); ast parse_cast_expression(); ast parse_compound_statement(); ast parse_conditional_expression(); @@ -2939,7 +2950,7 @@ ast parse_postfix_expression() { if (tok == ')') { child = 0; } else { - child = parse_comma_expression(); + child = parse_call_params(); } result = new_ast2('(', result, child); expect_tok(')'); @@ -3305,6 +3316,18 @@ ast parse_comma_expression() { return result; } +ast parse_call_params() { + ast result = parse_assignment_expression(); + result = new_ast2(LIST, result, 0); + + if (tok == ',') { + get_tok(); + set_child(result, 1, parse_call_params()); + } + + return result; +} + ast parse_comma_expression_opt() { ast result; diff --git a/sh.c b/sh.c index 43cf60db..9a3ed4b8 100644 --- a/sh.c +++ b/sh.c @@ -859,17 +859,10 @@ ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_condition // Traverse the arguments and replace them with the result of // handle_side_effects_go sub is the parent node of the current argument - sub2 = get_child_('(', node, 1); - if (sub2 != 0) { // Check if not an empty list - sub1 = node; // For 1 param, the parent node is the fun call node - // If there are 2 or more params, we traverse the ',' nodes ... - while (get_op(sub2) == ',') { - sub1 = sub2; // .. and the parent node is the ',' node - set_child(sub1, 0, handle_side_effects_go(get_child_(',', sub2, 0), executes_conditionally)); - sub2 = get_child_(',', sub2, 1); - } - // Handle the last argument - set_child(sub1, 1, handle_side_effects_go(sub2, executes_conditionally)); + sub1 = get_child_('(', node, 1); + while (sub1 != 0) { + set_child(sub1, 0, handle_side_effects_go(car(sub1), executes_conditionally)); + sub1 = tail(sub1); } // All the temporary variables used for the function parameters can be @@ -1459,14 +1452,10 @@ text fun_call_params(ast params) { ast param; text code_params = 0; - if (params != 0) { // Check if not an empty list - while (get_op(params) == ',') { - param = comp_rvalue(get_child_(',', params, 0), RVALUE_CTX_BASE); - code_params = concatenate_strings_with(code_params, param, wrap_char(' ')); - params = get_child_(',', params, 1); - } - param = comp_rvalue(params, RVALUE_CTX_BASE); // Last parameter + while (params != 0) { + param = comp_rvalue(car(params), RVALUE_CTX_BASE); code_params = concatenate_strings_with(code_params, param, wrap_char(' ')); + params = tail(params); } return code_params; @@ -1551,13 +1540,8 @@ void handle_printf_call(char *format_str, ast params) { while (*format_str != '\0') { // Param is consumed, get the next one if (param == 0 && params != 0) { - if (get_op(params) == ',') { - param = get_child_(',', params, 0); - params = get_child_(',', params, 1); - } else { - param = params; - params = 0; - } + param = car(params); + params = tail(params); } if (mod) { @@ -1682,25 +1666,27 @@ text comp_fun_call_code(ast node, ast assign_to) { ast name = get_child__('(', IDENTIFIER, node, 0); ast params = get_child_('(', node, 1); int name_id = get_val_(IDENTIFIER, name); + ast param; text res; #ifdef SH_AVOID_PRINTF_USE if (get_op(assign_to) == IDENTIFIER_EMPTY) { if (((name_id == PUTS_ID || name_id == PUTSTR_ID || name_id == PRINTF_ID) - && params != 0 && get_op(params) == STRING)) { // puts("..."), putstr("..."), printf("...") - return printf_call(STRING_BUF(get_val_(STRING, params)), 0, 0, true); - } else if (name_id == PRINTF_ID && get_op(get_child(params, 0)) == STRING) { - handle_printf_call(STRING_BUF(get_val_(STRING, get_child(params, 0))), get_child(params, 1)); + && (param = list_singleton(params)) != 0 + && get_op(param) == STRING)) { // puts("..."), putstr("..."), printf("...") + return printf_call(STRING_BUF(get_val_(STRING, param)), 0, 0, true); + } else if (name_id == PRINTF_ID && params != 0 && get_op(car(params)) == STRING) { // printf("...", ...) + handle_printf_call(STRING_BUF(get_val_(STRING, car(params))), tail(params)); return 0; } #ifdef SH_INLINE_PUTCHAR - else if (name_id == PUTCHAR_ID && params != 0 && get_op(params) != ',') { // putchar with 1 param - return comp_putchar_inline(params); + else if (name_id == PUTCHAR_ID && (param = list_singleton(params)) != 0) { // putchar with 1 param + return comp_putchar_inline(param); } #endif #ifdef SH_INLINE_EXIT - else if (name_id == EXIT_ID && params != 0 && get_op(params) != ',') { // exit with 1 param - res = comp_rvalue(params, RVALUE_CTX_BASE); + else if (name_id == EXIT_ID && (param = list_singleton(params)) != 0) { // exit with 1 param + res = comp_rvalue(param, RVALUE_CTX_BASE); return string_concat(wrap_str_lit("exit "), res); } #endif From b7aa5f13bf313084b12f8ff604a84bb2de0c29eb Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:24:01 -0500 Subject: [PATCH 54/89] Remove unused variable --- sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sh.c b/sh.c index 9a3ed4b8..0d83da6b 100644 --- a/sh.c +++ b/sh.c @@ -845,7 +845,7 @@ bool contains_side_effects = 0; ast handle_fun_call_side_effect(ast node, ast assign_to, bool executes_conditionally) { int start_gensym_ix = gensym_ix; - ast sub1, sub2; + ast sub1; if (assign_to == 0) { assign_to = fresh_ident(); // Unique identifier for the function call From 7774d988db745f3f03dd04dcf17fb3eb6ffdaa36 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:54:21 -0500 Subject: [PATCH 55/89] Fix bug where for(;;) would crash pnut-exe --- exe.c | 14 +++++++++----- tests/_all/for-empty.c | 12 ++++++++++++ tests/_all/for-empty.golden | 1 + 3 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 tests/_all/for-empty.c create mode 100644 tests/_all/for-empty.golden diff --git a/exe.c b/exe.c index 940359d4..e91e4d3f 100644 --- a/exe.c +++ b/exe.c @@ -1830,11 +1830,15 @@ void codegen_statement(ast node) { def_label(lbl1); codegen_statement(get_child_(FOR_KW, node, 2)); // post loop action def_label(lbl3); - codegen_rvalue(get_child_(FOR_KW, node, 1)); // test - pop_reg(reg_X); - grow_fs(-1); - xor_reg_reg(reg_Y, reg_Y); - jump_cond_reg_reg(EQ, lbl2, reg_X, reg_Y); + if (get_child_(FOR_KW, node, 1) != 0) { + codegen_rvalue(get_child_(FOR_KW, node, 1)); // test + pop_reg(reg_X); + grow_fs(-1); + xor_reg_reg(reg_Y, reg_Y); + jump_cond_reg_reg(EQ, lbl2, reg_X, reg_Y); + } + // if no test, we always fall down to the body + codegen_statement(get_child_(FOR_KW, node, 3)); jump(lbl1); def_label(lbl2); diff --git a/tests/_all/for-empty.c b/tests/_all/for-empty.c new file mode 100644 index 00000000..3b3c5cda --- /dev/null +++ b/tests/_all/for-empty.c @@ -0,0 +1,12 @@ +#include + +int main() { + int i = 0; + for (;;) { + if (i == 10) { + return 0; + } + putchar('0' + i); + i += 1; + } +} diff --git a/tests/_all/for-empty.golden b/tests/_all/for-empty.golden new file mode 100644 index 00000000..ad471007 --- /dev/null +++ b/tests/_all/for-empty.golden @@ -0,0 +1 @@ +0123456789 \ No newline at end of file From c57b063f2624463b6e7e6fdd104745ebb11e6dc2 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:54:50 -0500 Subject: [PATCH 56/89] Add timings to bootstrap-pnut-exe.sh script --- bootstrap-pnut-exe.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bootstrap-pnut-exe.sh b/bootstrap-pnut-exe.sh index ec1a2c91..f6285b9e 100755 --- a/bootstrap-pnut-exe.sh +++ b/bootstrap-pnut-exe.sh @@ -35,7 +35,8 @@ bootstrap_with_gcc() { chmod +x $TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe - ./$TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe $PNUT_EXE_OPTIONS pnut.c > $TEMP_DIR/pnut-x86-by-pnut-x86-by-pnut-x86-by-gcc.exe + printf_timing "pnut-x86-by-gcc.exe compiling pnut.c -> pnut-x86-by-pnut-x86-by-gcc.exe" \ + "./$TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe $PNUT_EXE_OPTIONS pnut.c > $TEMP_DIR/pnut-x86-by-pnut-x86-by-pnut-x86-by-gcc.exe" if [ -s $TEMP_DIR/pnut-x86-by-pnut-x86-by-pnut-x86-by-gcc.exe ] ; then if diff $TEMP_DIR/pnut-x86-by-pnut-x86-by-gcc.exe $TEMP_DIR/pnut-x86-by-pnut-x86-by-pnut-x86-by-gcc.exe 2>&1 > /dev/null ; then From 0e51e31e9c177f8172628b49fa811e3c499849fd Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:39:59 -0500 Subject: [PATCH 57/89] Hide string_pool from code generators The string_pool is an internal data structure that is not meant to be used by code generators. This commit removes all references to it from sh.c and exe.c, using the STRING_BUF, STRING_BUF_END macros instead. --- exe.c | 14 +++++++------- pnut.c | 1 + sh.c | 43 ++++++++++++++++++++++--------------------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/exe.c b/exe.c index 940359d4..761ff184 100644 --- a/exe.c +++ b/exe.c @@ -680,7 +680,7 @@ int resolve_identifier(int ident_probe) { binding = cgc_lookup_enum_value(ident_probe, cgc_globals); if (binding != 0) return binding; - putstr("ident = "); putstr(string_pool + probe_string(ident_probe)); putchar('\n'); + putstr("ident = "); putstr(STRING_BUF(ident_probe)); putchar('\n'); fatal_error("identifier not found"); return 0; } @@ -717,7 +717,7 @@ ast value_type(ast node) { return int_type; default: putstr("ident = "); - putstr(string_pool + probe_string(ident)); + putstr(STRING_BUF(ident)); putchar('\n'); fatal_error("value_type: unknown identifier"); return -1; @@ -790,7 +790,7 @@ ast value_type(ast node) { return heap[binding+5]; } else { putstr("ident = "); - putstr(string_pool + probe_string(get_val_(IDENTIFIER, child0))); + putstr(STRING_BUF(get_val_(IDENTIFIER, child0))); putchar('\n'); fatal_error("value_type: function not found"); return -1; @@ -977,7 +977,7 @@ void codegen_call(ast node) { if (binding == 0) { putstr("ident = "); - putstr(string_pool + probe_string(ident_probe)); + putstr(STRING_BUF(ident_probe)); putchar('\n'); fatal_error("codegen_call: function not found"); } @@ -1120,7 +1120,7 @@ int codegen_lvalue(ast node) { void codegen_string(int string_probe) { int lbl = alloc_label(0); - char *string_start = string_pool + heap[string_probe + 1]; + char *string_start = STRING_BUF(string_probe); char *string_end = string_start + heap[string_probe + 4]; call(lbl); @@ -1182,7 +1182,7 @@ void codegen_rvalue(ast node) { break; default: - putstr("ident = "); putstr(string_pool + probe_string(get_val_(IDENTIFIER, node))); putchar('\n'); + putstr("ident = "); putstr(STRING_BUF(get_val_(IDENTIFIER, node))); putchar('\n'); fatal_error("codegen_rvalue: identifier not found"); break; } @@ -1521,7 +1521,7 @@ void handle_enum_struct_union_type_decl(ast type) { } void codegen_initializer_string(int string_probe, ast type, int base_reg, int offset) { - char *string_start = string_pool + heap[string_probe + 1]; + char *string_start = STRING_BUF(string_probe); int i = 0; int str_len = heap[string_probe + 4]; int arr_len; diff --git a/pnut.c b/pnut.c index 80bee2e3..a5514c83 100644 --- a/pnut.c +++ b/pnut.c @@ -519,6 +519,7 @@ ast list_singleton(ast list) { // Simple accessor to get the string from the string pool #define STRING_BUF(string_val) (string_pool + heap[string_val+1]) #define STRING_LEN(string_val) (heap[string_val+4]) +#define STRING_BUF_END(string_val) (STRING_BUF(string_val) + STRING_LEN(string_val)) void begin_string() { string_start = string_pool_alloc; diff --git a/sh.c b/sh.c index 0d83da6b..71be8075 100644 --- a/sh.c +++ b/sh.c @@ -166,8 +166,8 @@ text wrap_str_lit(char *s) { return wrap_str_imm(s, 0); } -text wrap_str_pool(int s) { - return wrap_str_imm(string_pool + s, 0); +text wrap_str_pool(int ident_probe) { + return wrap_str_imm(STRING_BUF(ident_probe), 0); } text concatenate_strings_with(text t1, text t2, text sep) { @@ -245,6 +245,7 @@ void print_escaped_text(text t, bool for_printf) { void print_text(text t) { int i; + char *s; if (t == 0) return; @@ -263,13 +264,13 @@ void print_text(text t) { } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER)) { putint(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_STRING)) { - if (TEXT_TO_INT(text_pool[t + 2]) == 0) { + if (TEXT_TO_INT(text_pool[t + 2]) == 0) { // null-terminated string putstr((char*) text_pool[t + 1]); - } else { - i = text_pool[t + 1]; // start - while (i < TEXT_TO_INT(text_pool[t + 2])) { - putchar(string_pool[i]); - i += 1; + } else { // string ends at the address in text_pool[t + 2] + s = (char*) text_pool[t + 1]; // start + while (s < text_pool[t + 2] || *s != 0) { + putchar(*s); + s += 1; } } } else if (text_pool[t] == TEXT_FROM_INT(TEXT_ESCAPED)) { @@ -443,15 +444,15 @@ text format_special_var(ast ident, ast prefixed_with_dollar) { } text struct_member_var(ast member_name_ident) { - return string_concat(wrap_str_lit("__"), wrap_str_pool(probe_string(get_val_(IDENTIFIER, member_name_ident)))); + return string_concat(wrap_str_lit("__"), wrap_str_pool(get_val_(IDENTIFIER, member_name_ident))); } text struct_sizeof_var(ast struct_name_ident) { - return string_concat(wrap_str_lit("__sizeof__"), wrap_str_pool(probe_string(get_val_(IDENTIFIER, struct_name_ident)))); + return string_concat(wrap_str_lit("__sizeof__"), wrap_str_pool(get_val_(IDENTIFIER, struct_name_ident))); } text global_var(ast ident) { - return string_concat(wrap_char('_'), wrap_str_pool(probe_string(ident))); + return string_concat(wrap_char('_'), wrap_str_pool(ident)); } text env_var_with_prefix(ast ident, ast prefixed_with_dollar) { @@ -461,7 +462,7 @@ text env_var_with_prefix(ast ident, ast prefixed_with_dollar) { if (get_val_(IDENTIFIER, ident) == ARGV_ID) { return wrap_str_lit("argv_"); } else { - return wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))); + return wrap_str_pool(get_val_(IDENTIFIER, ident)); } } else { return global_var(get_val_(IDENTIFIER, ident)); @@ -476,7 +477,7 @@ text env_var(ast ident) { } text function_name(int ident_tok) { - return string_concat(wrap_char('_'), wrap_str_pool(probe_string(ident_tok))); + return string_concat(wrap_char('_'), wrap_str_pool(ident_tok)); } ast fresh_ident() { @@ -506,7 +507,7 @@ void add_var_to_local_env(ast decl, enum BINDING kind) { // Make sure we're not shadowing an existing local variable if (cgc_lookup_var(ident_probe, cgc_locals)) { - putstr("var="); putstr(string_pool + probe_string(ident_probe)); putchar('\n'); + putstr("var="); putstr(STRING_BUF(ident_probe)); putchar('\n'); fatal_error("Variable is already in local environment"); } @@ -528,7 +529,7 @@ void add_fun_params_to_local_env(ast lst) { // Also, the shell backend doesn't support variables with aggregate types. void assert_var_decl_is_safe(ast variable, bool local) { // Helper function for assert_idents_are_safe ast ident_probe = get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, variable, 0)); - char* name = string_pool + probe_string(ident_probe); + char* name = STRING_BUF(ident_probe); ast type = get_child_(DECL, variable, 1); if (name[0] == '_' || (name[0] != '\0' && name[1] == '_' && name[2] == '\0')) { // Check for a_ variables that could conflict with character constants @@ -1001,8 +1002,8 @@ ast handle_side_effects(ast node) { } void comp_defstr(ast ident, int string_probe, int array_size) { - char *string_start = string_pool + heap[string_probe + 1]; - char *string_end = string_start + heap[string_probe + 4]; + char *string_start = STRING_BUF(string_probe); + char *string_end = STRING_BUF_END(string_probe); text array_size_text = 0; if (array_size != -1) { @@ -2094,7 +2095,7 @@ bool comp_statement(ast node, STMT_CTX stmt_ctx) { } else if (op == ':') { // Labelled statement are not very useful as gotos are not supported in the // Shell backend, but we still emit a label comment for readability. - append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, get_child_(':', node, 0)))), wrap_char(':'))); + append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(get_val_(IDENTIFIER, get_child_(':', node, 0))), wrap_char(':'))); return comp_statement(get_child_(':', node, 1), stmt_ctx); } else if (op == GOTO_KW) { fatal_error("goto statements not supported"); @@ -2151,7 +2152,7 @@ void comp_glo_fun_decl(ast node) { // Show the mapping between the function parameters and $1, $2, etc. while (params != 0) { var = car_(DECL, params); - trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(probe_string(get_val_(IDENTIFIER, get_child_(DECL, var, 0)))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); + trailing_txt = concatenate_strings_with(trailing_txt, string_concat3(wrap_str_pool(get_val_(IDENTIFIER, get_child_(DECL, var, 0))), wrap_str_lit(": $"), wrap_int(params_ix)), wrap_str_lit(", ")); params = tail(params); params_ix += 1; } @@ -2297,7 +2298,7 @@ void comp_assignment_constant(text constant_name, ast rhs) { void comp_enum_cases(ast ident, ast cases) { ast cas; if (ident != 0) { - append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))), wrap_str_lit(" enum declaration"))); + append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(get_val_(IDENTIFIER, ident)), wrap_str_lit(" enum declaration"))); } else { append_glo_decl(wrap_str_lit("# Enum declaration")); } @@ -2341,7 +2342,7 @@ void comp_struct(ast ident, ast members) { int offset = new_ast0(INTEGER, 0); int field_type; if (ident != 0) { - append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(probe_string(get_val_(IDENTIFIER, ident))), wrap_str_lit(" struct member declarations"))); + append_glo_decl(string_concat3(wrap_str_lit("# "), wrap_str_pool(get_val_(IDENTIFIER, ident)), wrap_str_lit(" struct member declarations"))); } else { append_glo_decl(wrap_str_lit("# Struct member declarations")); } From 736e9737252fbc6c42af9c6a6a6c30599572fd34 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:40:47 -0500 Subject: [PATCH 58/89] Store name pointed by goto in IDENTIFIER node The children of AST nodes should all be AST nodes, unless the node is terminal and has no children. --- exe.c | 2 +- pnut.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/exe.c b/exe.c index 761ff184..045aa0b9 100644 --- a/exe.c +++ b/exe.c @@ -991,7 +991,7 @@ void codegen_call(ast node) { } void codegen_goto(ast node) { - ast label_ident = get_val_(GOTO_KW, node); + ast label_ident = get_val_(IDENTIFIER, get_child__(GOTO_KW, IDENTIFIER, node, 0)); int binding = cgc_lookup_goto_label(label_ident, cgc_locals_fun); int goto_lbl; diff --git a/pnut.c b/pnut.c index a5514c83..587cd7f7 100644 --- a/pnut.c +++ b/pnut.c @@ -3434,7 +3434,7 @@ ast parse_statement() { get_tok(); expect_tok(IDENTIFIER); - result = new_ast0(GOTO_KW, val); + result = new_ast1(GOTO_KW, new_ast0(IDENTIFIER, val)); expect_tok(';'); } else if (tok == CONTINUE_KW) { From 5c8b9e7fb5c7403aed5e5ea4a1b9b2f0747672ed Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:43:10 -0500 Subject: [PATCH 59/89] Parse array/function declarator in the right order Array and function declarator were added to the outside of the type AST while they should have been "pushed" inside the type AST. See comment in the code for more details. --- pnut.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/pnut.c b/pnut.c index 587cd7f7..83c79271 100644 --- a/pnut.c +++ b/pnut.c @@ -2642,32 +2642,88 @@ int parse_param_list() { return result; } -// abstract_decl: true if the declarator may omit the identifier +ast get_inner_type(ast type) { + switch (get_op(type)) { + case DECL: + case '*': + return get_child(type, 1); + case '[': + case '(': + return get_child(type, 0); + default: + fatal_error("Invalid type"); + return 0; + } +} + +void update_inner_type(ast parent_type, ast inner_type) { + switch (get_op(parent_type)) { + case DECL: + case '*': + set_child(parent_type, 1, inner_type); + break; + + case '[': + case '(': + set_child(parent_type, 0, inner_type); + break; + } +} + +// Parse a declarator. In C, declarators are written as they are used, meaning +// that the identifier appears inside the declarator, and is surrounded by the +// operators that are used to access the declared object. +// +// When manipulating declarator and type objects, it's much more convenient to +// have the identifier as the outermost node, and the order of the operators +// reversed, ending with the type specifier (base type). +// For example, `int *a[10]` is parsed as `(decl a (array 10 (pointer int)))` +// even if the parser parses `int`, `*`, identifier and `[10]` in that order. +// +// To achieve this, parse_declarator takes the inner type as an argument, and +// the inner type is extended as the declarator is parsed. The parent_type is +// then used in the declarator base case, the identifier, and which +// creates the DECL node. +// +// There's a small twist to this however, caused by array and function +// declarators appearing postfixed to the declarator. Because tokens are only +// read once, we can't skip ahead to expand the inner type with array/function +// declarator and then recursively call parse_declarator with the extended type. +// Instead, parse_declarator keeps track of the node that wraps the inner type +// and returns it in `parse_declarator_parent_type_parent`. Using the reference +// to the node containing the inner type, it is then possible to insert the +// array/function declarator in the right location, that is around the inner +// type. +// +// Parameters: abstract_decl: true if the declarator may omit the identifier +ast parse_declarator_parent_type_parent; ast parse_declarator(bool abstract_decl, ast parent_type) { bool first_tok = tok; // Indicates if the declarator is a noptr-declarator ast result = 0; ast decl; ast arr_size_expr; + ast parent_type_parent; switch (tok) { case IDENTIFIER: result = new_ast3(DECL, new_ast0(IDENTIFIER, val), parent_type, 0); // child#2 is the initializer + parent_type_parent = result; get_tok(); break; case '*': get_tok(); // Pointers may be const-qualified - result = pointer_type(parent_type, tok == CONST_KW); + parent_type_parent = pointer_type(parent_type, tok == CONST_KW); if (tok == CONST_KW) get_tok(); - - result = parse_declarator(abstract_decl, result); + result = parse_declarator(abstract_decl, parent_type_parent); break; // Parenthesis delimit the specifier-and-qualifier part of the declaration from the declarator case '(': get_tok(); result = parse_declarator(abstract_decl, parent_type); + parent_type_parent = parse_declarator_parent_type_parent; expect_tok(')'); break; @@ -2677,6 +2733,7 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { // In that case, we create a DECL node with no identifier. if (abstract_decl) { result = new_ast3(DECL, 0, parent_type, 0); // child#0 is the identifier, child#2 is the initializer + parent_type_parent = result; } else { parse_error("Invalid declarator, expected an identifier but declarator doesn't have one", tok); } @@ -2687,7 +2744,7 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { // Because we want the DECL to stay as the outermost node, we temporarily // unwrap the DECL parent_type. decl = result; - result = get_child_(DECL, result, 1); // child#1 is the type + result = get_child_(DECL, decl, 1); while (first_tok != '*') { // noptr-declarator may be followed by [ constant-expression ] to declare an @@ -2696,7 +2753,7 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { if (tok == '[') { // Check if not a void array if (get_op(result) == VOID_KW) parse_error("void array not allowed", tok); - get_tok(); + get_tok(); if (tok == ']') { val = 0; } else { @@ -2704,17 +2761,20 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { if (arr_size_expr == 0) parse_error("Array size must be an integer constant", tok); val = eval_constant(arr_size_expr, false); } - result = new_ast2('[', result, val); // 0 is used to represent an unsized array + result = new_ast2('[', get_inner_type(parent_type_parent), val); + update_inner_type(parent_type_parent, result); + parent_type_parent = result; expect_tok(']'); } else if (tok == '(') { - result = new_ast2('(', result, parse_param_list()); + result = new_ast2('(', get_inner_type(parent_type_parent), parse_param_list()); + update_inner_type(parent_type_parent, result); + parent_type_parent = result; } else { break; } } - // And now we wrap the DECL back around the result. - set_child(decl, 1, result); // child#1 is the type + parse_declarator_parent_type_parent = parent_type_parent; return decl; } From 1ac80f2ae91d35a18c3efa53e6fb94e060884589 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:47:02 -0500 Subject: [PATCH 60/89] Add debug function to convert AST to S-exp --- debug.c | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- pnut.c | 17 ++-- 2 files changed, 263 insertions(+), 19 deletions(-) diff --git a/debug.c b/debug.c index 92e84f6b..64218b17 100644 --- a/debug.c +++ b/debug.c @@ -112,14 +112,18 @@ void print_tok(int tok, int val) { else if (tok == AMP_EQ) putstr("&="); else if (tok == ARROW) putstr("->"); else if (tok == BAR_BAR) putstr("||"); +#ifdef GAMBIT_MODE + else if (tok == BAR_EQ) putstr("||="); +#else else if (tok == BAR_EQ) putstr("|="); +#endif else if (tok == CARET_EQ) putstr("^="); else if (tok == EQ_EQ) putstr("=="); else if (tok == GT_EQ) putstr(">="); else if (tok == LSHIFT_EQ) putstr("<<="); else if (tok == LSHIFT) putstr("<<"); else if (tok == LT_EQ) putstr("<="); - else if (tok == MINUS_EQ) putstr(" -= "); // Adding spaces around -= so - is not interpreted as an option to printf + else if (tok == MINUS_EQ) putstr("-="); else if (tok == MINUS_MINUS) putstr("--"); else if (tok == EXCL_EQ) putstr("!="); else if (tok == PERCENT_EQ) putstr("%="); @@ -130,9 +134,17 @@ void print_tok(int tok, int val) { else if (tok == STAR_EQ) putstr("*="); else if (tok == HASH_HASH) putstr("##"); else if (tok == PLUS_PLUS_PRE) putstr("++"); - else if (tok == MINUS_MINUS_PRE) putstr(" -- "); // Adding spaces around -= so - is not interpreted as an option to printf + else if (tok == MINUS_MINUS_PRE) putstr("--"); else if (tok == PLUS_PLUS_POST) putstr("++"); - else if (tok == MINUS_MINUS_POST) putstr(" -- "); // Adding spaces around -= so - is not interpreted as an option to printf + else if (tok == MINUS_MINUS_POST) putstr("--"); + else if (tok == PARENS) putstr("("); + + else if (tok == FUN_DECL) putstr("fun_decl"); + else if (tok == CAST) putstr("cast"); + else if (tok == INITIALIZER_LIST) putstr("initializer_list"); + else if (tok == DECL) putstr("decl"); + else if (tok == DECLS) putstr("decls"); + else if (tok == LIST) putstr("list"); else if (tok == IDENTIFIER) { putstr(string_pool + heap[val+1]); @@ -153,6 +165,10 @@ void print_tok(int tok, int val) { } else if (tok == MACRO_ARG) { putstr("ARG["); putint(val); putstr("]"); } else { +#ifdef GAMBIT_MODE + if (tok == '|') putstr("bar"); + else +#endif putchar(tok); } @@ -200,14 +216,18 @@ void print_tok_type(int tok) { else if (tok == AMP_EQ) putstr("&="); else if (tok == ARROW) putstr("->"); else if (tok == BAR_BAR) putstr("||"); +#ifdef GAMBIT_MODE + else if (tok == BAR_EQ) putstr("||="); +#else else if (tok == BAR_EQ) putstr("|="); +#endif else if (tok == CARET_EQ) putstr("^="); else if (tok == EQ_EQ) putstr("=="); else if (tok == GT_EQ) putstr(">="); else if (tok == LSHIFT_EQ) putstr("<<="); else if (tok == LSHIFT) putstr("<<"); else if (tok == LT_EQ) putstr("<="); - else if (tok == MINUS_EQ) putstr(" -= "); // Adding spaces around -= so - is not interpreted as an option to printf + else if (tok == MINUS_EQ) putstr("-="); else if (tok == MINUS_MINUS) putstr("--"); else if (tok == EXCL_EQ) putstr("!="); else if (tok == PERCENT_EQ) putstr("%="); @@ -218,9 +238,19 @@ void print_tok_type(int tok) { else if (tok == STAR_EQ) putstr("*="); else if (tok == HASH_HASH) putstr("##"); else if (tok == PLUS_PLUS_PRE) putstr("++"); - else if (tok == MINUS_MINUS_PRE) putstr(" -- "); // Adding spaces around -= so - is not interpreted as an option to printf + else if (tok == MINUS_MINUS_PRE) putstr("--"); else if (tok == PLUS_PLUS_POST) putstr("++"); - else if (tok == MINUS_MINUS_POST) putstr(" -- "); // Adding spaces around -= so - is not interpreted as an option to printf + else if (tok == MINUS_MINUS_POST) putstr("--"); + + else if (tok == PARENS) putstr("("); + + else if (tok == FUN_DECL) putstr("fun_decl"); + else if (tok == CAST) putstr("cast"); + else if (tok == INITIALIZER_LIST) putstr("initializer_list"); + else if (tok == DECL) putstr("decl"); + else if (tok == DECLS) putstr("decls"); + else if (tok == LIST) putstr("list"); + else if (tok == IDENTIFIER) putstr("identifier"); else if (tok == INTEGER) putstr("integer"); else if (tok == CHARACTER) putstr("character"); @@ -229,15 +259,226 @@ void print_tok_type(int tok) { else if (tok == MACRO_ARG) putstr("macro argument"); else if (tok == EOF) putstr("end of file"); else if (tok == '\n') putstr("newline"); - else { putchar('\''); putchar(tok); putchar('\''); } + else if (' ' < tok && tok < 127) { +#ifdef GAMBIT_MODE + if (tok == '|') { + putstr("bar"); + } else +#endif + + putchar(tok); + } + else { + printf("tok=%d\n", tok); + fatal_error("print_tok_type: unknown token"); + } } -void show_ast(char* name, ast obj) { - int i; +void ast_to_sexp(ast obj); + +void ast_list_to_sexp(ast obj) { + while (obj != 0) { + ast_to_sexp(car(obj)); + obj = tail(obj); + if (obj != 0) putchar(' '); // Separate elements with a space + } +} + +void type_ast_to_sexp(ast type) { + switch (get_op(type)) { + case '*': + printf("(* "); + type_ast_to_sexp(get_child_('*', type, 1)); + printf(")"); + break; + + case '[': + printf("["); + type_ast_to_sexp(get_child_('[', type, 0)); + printf(" "); + putint(get_child_('[', type, 1)); + printf("]"); + break; + + case '(': + printf("(-> ("); + ast_list_to_sexp(get_child_opt_('(', LIST, type, 1)); // Function args + printf(") "); + type_ast_to_sexp(get_child_('(', type, 0)); + printf(")"); + break; + + case CHAR_KW: + case INT_KW: + case VOID_KW: + case SHORT_KW: + case SIGNED_KW: + case UNSIGNED_KW: + case LONG_KW: + case FLOAT_KW: + case DOUBLE_KW: + print_tok_type(get_op(type)); + break; + + case STRUCT_KW: + case UNION_KW: + case ENUM_KW: + printf("("); + print_tok_type(get_op(type)); + printf(" "); + ast_to_sexp(get_child(type, 0)); // Struct/union name + printf(" "); + ast_list_to_sexp(get_child(type, 2)); // Struct/union members + printf(")"); + break; + + default: + printf("", get_op(type)); + exit(1); + } +} + +void ast_to_sexp(ast obj) { + if (obj == 0) { + printf("#f"); + return; + } + + int i = 0; int nb_children = get_nb_children(obj); - if (nb_children == 0) nb_children = 1; // Account for value of ast nodes with no child - for (i = 0; i < nb_children + 1; i++) { - printf("%s[%d] = %d\n", name, i, (int) heap[obj + i]); + int op = get_op(obj); + + switch (op) { + case IDENTIFIER: + putstr(STRING_BUF(get_val_(IDENTIFIER, obj))); + break; + + case STRING: + putchar('"'); + print_tok_string(get_val_(STRING, obj)); + putchar('"'); + break; + + case INTEGER: + putint(-get_val_(INTEGER, obj)); + break; + + case CHARACTER: + // Removed so gambit scheme can read the output + // If printable ASCII: print as character, otherwise print as octal + // if (get_val_(CHARACTER, obj) >= 32 && get_val_(CHARACTER, obj) < 127) { + // putchar('\''); + // print_string_char(get_val_(CHARACTER, obj)); + // putchar('\''); + // } + printf("(char "); + putint(get_val_(CHARACTER, obj)); + putchar(')'); + break; + + case DECLS: + // For clarity, we print the declarations without a parent `DECLS` node + ast_list_to_sexp(get_child_opt_(DECLS, LIST, obj, 0)); + break; + + case TYPEDEF_KW: + printf("(typedef "); + ast_list_to_sexp(get_child_opt_(TYPEDEF_KW, LIST, obj, 0)); + printf(")"); + return; + + case ENUM_KW: + case STRUCT_KW: + case UNION_KW: + type_ast_to_sexp(obj); + break; + + case DECL: + // Nodes of type DECL are a bit special because they contain a type, and types have their own structure + putstr("(decl "); + ast_to_sexp(get_child__(DECL, IDENTIFIER, obj, 0)); + putchar(' '); + type_ast_to_sexp(get_child_(DECL, obj, 1)); + putchar(' '); + ast_to_sexp(get_child_(DECL, obj, 2)); + printf(")"); + break; + + case FUN_DECL: + printf("(define-fun "); + putstr(STRING_BUF(get_val_(IDENTIFIER, get_child__(DECL, IDENTIFIER, get_child__(FUN_DECL, DECL, obj, 0), 0)))); + putchar(' '); + type_ast_to_sexp(get_child_(DECL, get_child__(FUN_DECL, DECL, obj, 0), 1)); // Get type out of decl + putchar(' '); + ast_to_sexp(get_child_(FUN_DECL, obj, 1)); // Body + printf(")"); + break; + + case CAST: + printf("(cast "); + type_ast_to_sexp(get_child_(DECL, get_child__(CAST, DECL, obj, 0), 1)); // Get type out of decl + printf(" "); + ast_to_sexp(get_child_(CAST, obj, 1)); + printf(")"); + break; + + case SIZEOF_KW: + printf("(sizeof "); + if (get_op(get_child_(SIZEOF_KW, obj, 0)) == DECL) { + type_ast_to_sexp(get_child_(DECL, get_child_(SIZEOF_KW, obj, 0), 1)); + } else { + ast_to_sexp(get_child_(SIZEOF_KW, obj, 0)); + } + printf(")"); + break; + + case PARENS: + // Ignore parens nodes + ast_to_sexp(get_child_(PARENS, obj, 0)); + break; + + case '[': + printf("(array_at"); + ast_to_sexp(get_child_('[', obj, 0)); + printf(" "); + ast_to_sexp(get_child_('[', obj, 1)); + printf(")"); + break; + + case '(': // Function calls, we print the function and its arguments + printf("("); + ast_to_sexp(get_child_('(', obj, 0)); + if (get_child_('(', obj, 1) != 0) { + printf(" "); + ast_to_sexp(get_child_('(', obj, 1)); + } + printf(")"); + break; + + case LIST: + printf("(list "); + ast_list_to_sexp(obj); + printf(")"); + break; + + case '{': + while (obj != 0) { + ast_to_sexp(get_child_('{', obj, 0)); + obj = get_child_opt_('{', '{', obj, 1); + if (obj != 0) putchar(' '); + } + break; + + default: + putchar('('); + print_tok_type(op); + putchar(' '); + for (; i < get_nb_children(obj); i += 1) { + ast_to_sexp(get_child(obj, i)); + if (get_child(obj, i) != 0 && i < get_nb_children(obj) - 1) putchar(' '); + } + putchar(')'); + break; } } diff --git a/pnut.c b/pnut.c index 83c79271..49c6921e 100644 --- a/pnut.c +++ b/pnut.c @@ -126,6 +126,7 @@ char* include_search_path = 0; // Search path for include files // Tokens and AST nodes enum { + // Keywords AUTO_KW = 300, BREAK_KW, CASE_KW, @@ -158,15 +159,12 @@ enum { VOID_KW, VOLATILE_KW, WHILE_KW, - DECL, - DECLS, - FUN_DECL, - CAST, // Non-character operands INTEGER = 401, CHARACTER, STRING, + AMP_AMP, AMP_EQ, ARROW, @@ -196,10 +194,12 @@ enum { ELLIPSIS, PARENS, INITIALIZER_LIST, - - // Other tokens + DECL, + DECLS, + FUN_DECL, + CAST, MACRO_ARG = 499, - IDENTIFIER = 500, + IDENTIFIER = 500, // 500 because it's easy to remember TYPE = 501, MACRO = 502, @@ -3675,6 +3675,9 @@ int main(int argc, char **argv) { get_tok(); while (tok != EOF) { decl = parse_declaration(false); + printf("# %s:%d:%d\n", fp_filepath, line_number, column_number); + ast_to_sexp(decl); + putchar('\n'); } #else codegen_begin(); From aa366ce7d7f73a77fa13a8c9dc08fe4cac46d359 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 14:55:53 -0500 Subject: [PATCH 61/89] Fix warning --- sh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sh.c b/sh.c index 71be8075..eb06d8ee 100644 --- a/sh.c +++ b/sh.c @@ -268,7 +268,7 @@ void print_text(text t) { putstr((char*) text_pool[t + 1]); } else { // string ends at the address in text_pool[t + 2] s = (char*) text_pool[t + 1]; // start - while (s < text_pool[t + 2] || *s != 0) { + while (s < (char*) text_pool[t + 2] || *s != 0) { putchar(*s); s += 1; } From a52a3d4b845f9c84915a47f31a0bca32fc0fca88 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 15:02:06 -0500 Subject: [PATCH 62/89] Call ast_to_sexp if DEBUG_PARSER_SEXP is defined --- pnut.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pnut.c b/pnut.c index 49c6921e..36ee847e 100644 --- a/pnut.c +++ b/pnut.c @@ -2213,7 +2213,7 @@ void get_tok() { } // parser -#if defined DEBUG_CPP || defined DEBUG_EXPAND_INCLUDES || defined NICE_ERR_MSG || defined HANDLE_SIGNALS +#if defined DEBUG_CPP || defined DEBUG_EXPAND_INCLUDES || defined NICE_ERR_MSG || defined HANDLE_SIGNALS || defined DEBUG_PARSER_SEXP #include "debug.c" #endif @@ -3675,8 +3675,12 @@ int main(int argc, char **argv) { get_tok(); while (tok != EOF) { decl = parse_declaration(false); +#ifdef DEBUG_PARSER_SEXP +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR printf("# %s:%d:%d\n", fp_filepath, line_number, column_number); +#endif ast_to_sexp(decl); +#endif putchar('\n'); } #else From 44226e27a22b0b8f0093e695f383745f92ce86fa Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 15:33:08 -0500 Subject: [PATCH 63/89] Fix warnings for gcc 2.95.4 (used by Debian Woody) --- debug.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/debug.c b/debug.c index 64218b17..d711906c 100644 --- a/debug.c +++ b/debug.c @@ -339,16 +339,14 @@ void type_ast_to_sexp(ast type) { } void ast_to_sexp(ast obj) { + int i = 0; + if (obj == 0) { printf("#f"); return; } - int i = 0; - int nb_children = get_nb_children(obj); - int op = get_op(obj); - - switch (op) { + switch (get_op(obj)) { case IDENTIFIER: putstr(STRING_BUF(get_val_(IDENTIFIER, obj))); break; @@ -385,7 +383,7 @@ void ast_to_sexp(ast obj) { printf("(typedef "); ast_list_to_sexp(get_child_opt_(TYPEDEF_KW, LIST, obj, 0)); printf(")"); - return; + break; case ENUM_KW: case STRUCT_KW: @@ -471,7 +469,7 @@ void ast_to_sexp(ast obj) { default: putchar('('); - print_tok_type(op); + print_tok_type(get_op(obj)); putchar(' '); for (; i < get_nb_children(obj); i += 1) { ast_to_sexp(get_child(obj, i)); From d5b0b6623d0d2d580865525c243fb7d96f6d0929 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 7 Feb 2025 23:07:05 -0500 Subject: [PATCH 64/89] Add x86 instructions for indirect calls --- exe.c | 3 +++ x86.c | 46 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/exe.c b/exe.c index 02bbf723..579509c3 100644 --- a/exe.c +++ b/exe.c @@ -90,6 +90,7 @@ void mov_reg_mem(int dst, int base, int offset); void mov_reg_mem8(int dst, int base, int offset); void add_reg_imm(int dst, int imm); +void add_reg_lbl(int dst, int lbl); void add_reg_reg(int dst, int src); void or_reg_reg (int dst, int src); void and_reg_reg(int dst, int src); @@ -100,6 +101,7 @@ void div_reg_reg(int dst, int src); void rem_reg_reg(int dst, int src); void shl_reg_reg(int dst, int src); void sar_reg_reg(int dst, int src); +void mov_reg_lbl(int reg, int lbl); void push_reg(int src); void pop_reg (int dst); @@ -107,6 +109,7 @@ void pop_reg (int dst); void jump(int lbl); void jump_rel(int offset); void call(int lbl); +void call_reg(int reg); void ret(); void dup(int reg) { diff --git a/x86.c b/x86.c index ae0b3d5f..4d63bca9 100644 --- a/x86.c +++ b/x86.c @@ -159,12 +159,24 @@ void add_reg_imm(int dst, int imm) { // ADD dst_reg, imm ;; Add 32 bit immediate value to register // See: https://web.archive.org/web/20240407051903/https://www.felixcloutier.com/x86/add + rex_prefix(0, dst); emit_i8(0x81); mod_rm(0, dst); emit_i32_le(imm); } +void add_reg_lbl(int dst, int lbl) { + + // ADD dst_reg, rel addr ;; Add 32 bit displacement between next instruction and label to register + // See: https://web.archive.org/web/20240407051903/https://www.felixcloutier.com/x86/add + + rex_prefix(0, dst); + emit_i8(0x81); + mod_rm(0, dst); + use_label(lbl); // 32 bit placeholder for distance +} + void mov_memory(int op, int reg, int base, int offset) { // Move word between register and memory @@ -318,16 +330,6 @@ void push_reg(int src) { emit_i8(0x50 + src); } -void push_imm32_le(int imm) { - - // PUSH imm32 ;; Push 32 bit immediate value to stack - // See: https://web.archive.org/web/20240407051929/https://www.felixcloutier.com/x86/push - - emit_i8(0x68); - emit_i32_le(imm); -} - - void pop_reg (int dst) { // POP dst_reg ;; Pop word from stack to register @@ -363,6 +365,15 @@ void call(int lbl) { use_label(lbl); } +void call_reg(int reg) { + + // CALL reg ;; Indirect call to address in register + // See: https://web.archive.org/web/20240323052931/https://www.felixcloutier.com/x86/call + + emit_i8(0xff); + mod_rm(2, reg); +} + void ret() { // RET ;; Return to calling procedure @@ -441,6 +452,21 @@ void setup_proc_args(int global_vars_size) { push_reg(reg_Y); // push argc } +void mov_reg_lbl(int reg, int lbl) { + // Since we can't do rip-relative addressing in 32 bit mode, we need to push + // the address to the stack and then some arithmetic to get the address in a + // register. + + int lbl_for_pc = alloc_label("lbl_for_pc"); + + call(lbl_for_pc); // call lbl + def_label(lbl_for_pc); // end label + // <--- The stack now has the address of the next instruction + pop_reg(reg); // pop reg_X (1 byte) + add_reg_lbl(reg, lbl); // load address of label to reg_X (6 or 7 bytes if 32 or 64 bit) + add_reg_imm(reg, word_size == 8 ? 8 : 7); // adjust for the pop and add instructions +} + // For 32 bit linux. #ifdef target_i386_linux From 73d51ee4faa7ca86dc1a7a02e31231d9546c1fa3 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 8 Feb 2025 23:34:08 -0500 Subject: [PATCH 65/89] Give the right type to functions of built-in lib --- exe.c | 28 ++++++++++++++-------------- pnut.c | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/exe.c b/exe.c index 579509c3..f2a024c2 100644 --- a/exe.c +++ b/exe.c @@ -1423,46 +1423,46 @@ void codegen_begin() { void_star_type = pointer_type(new_ast0(VOID_KW, 0), false); main_lbl = alloc_label("main"); - cgc_add_global_fun(init_ident(IDENTIFIER, "main"), main_lbl, void_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "main"), main_lbl, function_type(void_type, 0)); exit_lbl = alloc_label("exit"); - cgc_add_global_fun(init_ident(IDENTIFIER, "exit"), exit_lbl, void_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "exit"), exit_lbl, function_type1(void_type, int_type)); getchar_lbl = alloc_label("getchar"); - cgc_add_global_fun(init_ident(IDENTIFIER, "getchar"), getchar_lbl, char_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "getchar"), getchar_lbl, function_type(char_type, 0)); putchar_lbl = alloc_label("putchar"); - cgc_add_global_fun(init_ident(IDENTIFIER, "putchar"), putchar_lbl, void_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "putchar"), putchar_lbl, function_type1(void_type, char_type)); fopen_lbl = alloc_label("fopen"); - cgc_add_global_fun(init_ident(IDENTIFIER, "fopen"), fopen_lbl, int_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "fopen"), fopen_lbl, function_type2(int_type, string_type, string_type)); fclose_lbl = alloc_label("fclose"); - cgc_add_global_fun(init_ident(IDENTIFIER, "fclose"), fclose_lbl, void_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "fclose"), fclose_lbl, function_type1(int_type, int_type)); fgetc_lbl = alloc_label("fgetc"); - cgc_add_global_fun(init_ident(IDENTIFIER, "fgetc"), fgetc_lbl, char_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "fgetc"), fgetc_lbl, function_type1(int_type, int_type)); malloc_lbl = alloc_label("malloc"); - cgc_add_global_fun(init_ident(IDENTIFIER, "malloc"), malloc_lbl, void_star_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "malloc"), malloc_lbl, function_type1(void_star_type, int_type)); free_lbl = alloc_label("free"); - cgc_add_global_fun(init_ident(IDENTIFIER, "free"), free_lbl, char_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "free"), free_lbl, function_type1(void_type, void_star_type)); read_lbl = alloc_label("read"); - cgc_add_global_fun(init_ident(IDENTIFIER, "read"), read_lbl, int_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "read"), read_lbl, function_type3(int_type, int_type, void_star_type, int_type)); write_lbl = alloc_label("write"); - cgc_add_global_fun(init_ident(IDENTIFIER, "write"), write_lbl, int_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "write"), write_lbl, function_type3(int_type, int_type, void_star_type, int_type)); open_lbl = alloc_label("open"); - cgc_add_global_fun(init_ident(IDENTIFIER, "open"), open_lbl, int_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "open"), open_lbl, function_type3(int_type, string_type, int_type, int_type)); close_lbl = alloc_label("close"); - cgc_add_global_fun(init_ident(IDENTIFIER, "close"), close_lbl, int_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "close"), close_lbl, function_type1(int_type, int_type)); printf_lbl = alloc_label("printf"); - cgc_add_global_fun(init_ident(IDENTIFIER, "printf"), printf_lbl, void_type); + cgc_add_global_fun(init_ident(IDENTIFIER, "printf"), printf_lbl, function_type1(int_type, string_type)); jump(setup_lbl); } diff --git a/pnut.c b/pnut.c index 36ee847e..e73b20fc 100644 --- a/pnut.c +++ b/pnut.c @@ -2320,6 +2320,22 @@ ast pointer_type(ast parent_type, bool is_const) { return new_ast2('*', is_const ? MK_TYPE_SPECIFIER(CONST_KW) : 0, parent_type); } +ast function_type(ast parent_type, ast params) { + return new_ast2('(', parent_type, params); +} + +ast function_type1(ast parent_type, ast param1) { + return new_ast2('(', parent_type, cons(param1, 0)); +} + +ast function_type2(ast parent_type, ast param1, ast param2) { + return new_ast2('(', parent_type, cons(param1, cons(param2, 0))); +} + +ast function_type3(ast parent_type, ast param1, ast param2, ast param3) { + return new_ast2('(', parent_type, cons(param1, cons(param2, cons(param3, 0)))); +} + // Type and declaration parser int is_type_starter(int tok) { return tok == INT_KW || tok == CHAR_KW || tok == SHORT_KW || tok == LONG_KW // Numeric types From 941db7208c5b032f812571cbd7bf947f6eac138a Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 8 Feb 2025 23:36:44 -0500 Subject: [PATCH 66/89] Support indirect function calls in pnut-exe --- exe.c | 84 ++++++++++++++++++++++++-------- tests/_exe/indirect-calls.c | 69 ++++++++++++++++++++++++++ tests/_exe/indirect-calls.golden | 24 +++++++++ 3 files changed, 157 insertions(+), 20 deletions(-) create mode 100644 tests/_exe/indirect-calls.c create mode 100644 tests/_exe/indirect-calls.golden diff --git a/exe.c b/exe.c index f2a024c2..c5f89edb 100644 --- a/exe.c +++ b/exe.c @@ -484,6 +484,16 @@ bool is_pointer_type(ast type) { return op == '[' || op == '*'; } +bool is_function_type(ast type) { + int op = get_op(type); + if (op == '*') { + if (get_op(get_child_('*', type, 1)) == '(') { + return true; + } + } + return op == '('; +} + bool is_struct_or_union_type(ast type) { int op = get_op(type); return op == STRUCT_KW || op == UNION_KW; @@ -680,6 +690,9 @@ int resolve_identifier(int ident_probe) { binding = cgc_lookup_var(ident_probe, cgc_globals); if (binding != 0) return binding; + binding = cgc_lookup_fun(ident_probe, cgc_globals); + if (binding != 0) return binding; + binding = cgc_lookup_enum_value(ident_probe, cgc_globals); if (binding != 0) return binding; @@ -693,7 +706,6 @@ ast value_type(ast node) { int op = get_op(node); int nb_children = get_nb_children(node); int binding; - int ident; ast left_type, right_type; ast child0, child1; @@ -708,8 +720,7 @@ ast value_type(ast node) { } else if (op == STRING) { return string_type; } else if (op == IDENTIFIER) { - ident = get_val_(IDENTIFIER, node); - binding = resolve_identifier(ident); + binding = resolve_identifier(get_val_(IDENTIFIER, node)); switch (binding_kind(binding)) { case BINDING_PARAM_LOCAL: case BINDING_VAR_LOCAL: @@ -718,9 +729,11 @@ ast value_type(ast node) { return heap[binding+4]; case BINDING_ENUM_CST: return int_type; + case BINDING_FUN: + return heap[binding+5]; default: putstr("ident = "); - putstr(STRING_BUF(ident)); + putstr(STRING_BUF(get_val_(IDENTIFIER, node))); putchar('\n'); fatal_error("value_type: unknown identifier"); return -1; @@ -736,7 +749,11 @@ ast value_type(ast node) { if (op == '*') { left_type = value_type(child0); - return dereference_type(left_type); + if (is_function_type(left_type)) { + return left_type; + } else { + return dereference_type(left_type); + } } else if (op == '&') { left_type = value_type(child0); return pointer_type(left_type, false); @@ -971,21 +988,27 @@ int codegen_params(ast params) { } void codegen_call(ast node) { - ast fun_ident = get_child__('(', IDENTIFIER, node, 0); - ast ident_probe = get_val_(IDENTIFIER, fun_ident); - ast params = get_child(node, 1); + ast fun = get_child_('(', node, 0); + ast params = get_child_('(', node, 1); ast nb_params = codegen_params(params); + int binding = 0; - int binding = cgc_lookup_fun(ident_probe, cgc_globals); - - if (binding == 0) { - putstr("ident = "); - putstr(STRING_BUF(ident_probe)); - putchar('\n'); - fatal_error("codegen_call: function not found"); + // Check if the function is a direct call, find the binding if it is + if (get_op(fun) == IDENTIFIER) { + binding = resolve_identifier(get_val_(IDENTIFIER, fun)); + if (binding_kind(binding) != BINDING_FUN) binding = 0; } - call(heap[binding+4]); + if (binding != 0) { + // Generate a fast path for direct calls + call(heap[binding+4]); + } else { + // Otherwise we go through the function pointer + codegen_rvalue(fun); + pop_reg(reg_X); + grow_fs(-1); + call_reg(reg_X); + } grow_stack(-nb_params); grow_fs(-nb_params); @@ -1035,6 +1058,10 @@ int codegen_lvalue(ast node) { add_reg_reg(reg_X, reg_glo); push_reg(reg_X); break; + case BINDING_FUN: + mov_reg_lbl(reg_X, heap[binding+4]); + push_reg(reg_X); + break; default: fatal_error("codegen_lvalue: identifier not found"); break; @@ -1161,6 +1188,16 @@ void codegen_rvalue(ast node) { binding = resolve_identifier(get_val_(IDENTIFIER, node)); switch (binding_kind(binding)) { case BINDING_PARAM_LOCAL: + mov_reg_imm(reg_X, (cgc_fs - heap[binding+3]) * word_size); + add_reg_reg(reg_X, reg_SP); + // structs/unions are allocated on the stack, so no need to dereference + // For arrays, we need to dereference the pointer since they are passed as pointers + if (get_op(heap[binding+4]) != STRUCT_KW && get_op(heap[binding+4]) != UNION_KW) { + mov_reg_mem(reg_X, reg_X, 0); + } + push_reg(reg_X); + break; + case BINDING_VAR_LOCAL: mov_reg_imm(reg_X, (cgc_fs - heap[binding+3]) * word_size); add_reg_reg(reg_X, reg_SP); @@ -1184,6 +1221,11 @@ void codegen_rvalue(ast node) { push_reg(reg_X); break; + case BINDING_FUN: + mov_reg_lbl(reg_X, heap[binding+4]); + push_reg(reg_X); + break; + default: putstr("ident = "); putstr(STRING_BUF(get_val_(IDENTIFIER, node))); putchar('\n'); fatal_error("codegen_rvalue: identifier not found"); @@ -1198,15 +1240,17 @@ void codegen_rvalue(ast node) { } else if (nb_children == 1) { if (op == '*') { + type1 = value_type(child0); codegen_rvalue(child0); - pop_reg(reg_Y); grow_fs(-1); - if (is_pointer_type(value_type(child0))) { - load_mem_location(reg_X, reg_Y, 0, ref_type_width(value_type(child0))); + if (is_function_type(type1)) { + } else if (is_pointer_type(type1)) { + pop_reg(reg_X); + load_mem_location(reg_X, reg_X, 0, ref_type_width(value_type(child0))); + push_reg(reg_X); } else { fatal_error("codegen_rvalue: non-pointer is being dereferenced with *"); } - push_reg(reg_X); } else if (op == '+' || op == PARENS) { codegen_rvalue(child0); grow_fs(-1); diff --git a/tests/_exe/indirect-calls.c b/tests/_exe/indirect-calls.c new file mode 100644 index 00000000..0531de37 --- /dev/null +++ b/tests/_exe/indirect-calls.c @@ -0,0 +1,69 @@ +#include + +void putstr(char *s) { + while (*s) { + putchar(*s); + s += 1; + } +} + +void f(int direct) { + if (direct) { + putstr("direct\n"); + } else { + putstr("indirect\n"); + } +} + +void fun1() { + putstr("fun1\n"); +} + +void fun2() { + putstr("fun2\n"); +} + +void fun3() { + putstr("fun3\n"); +} + +void fun4() { + putstr("fun4\n"); +} + +void (*funs[4])() = {&fun1, &fun2, *fun3, **fun4}; + +void calls_funs(void (*funs[])(), int n) { + int i; + for (i = 0; i < n; i++) { + funs[i](); + } +} + +void call_fun(void (fun)(), char *msg) { + void (fun2)(); + fun(msg); + (*fun)(msg); + (**fun)(msg); +} + +int main() { + calls_funs(funs, 4); + void (*f_ptr1)() = f; + void (*f_ptr2)() = &f; + void (*f_ptr3)() = *f; + void (*f_ptr4)() = *****f; + f(1); + f_ptr1(0); + (*f_ptr1)(0); + (*f_ptr1)(0); + (***f_ptr1)(0); + f_ptr2(0); + f_ptr3(0); + f_ptr4(0); + call_fun(putstr, "hello\n"); + call_fun(*putstr, "hello\n"); + call_fun(&putstr, "hello\n"); + call_fun(**putstr, "hello\n"); + return 0; +} diff --git a/tests/_exe/indirect-calls.golden b/tests/_exe/indirect-calls.golden new file mode 100644 index 00000000..791fbe6f --- /dev/null +++ b/tests/_exe/indirect-calls.golden @@ -0,0 +1,24 @@ +fun1 +fun2 +fun3 +fun4 +direct +indirect +indirect +indirect +indirect +indirect +indirect +indirect +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello +hello From 369745096203d7a51be2ed8f0c5ca64329ffeb00 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 21:51:19 -0500 Subject: [PATCH 67/89] Add variadic flag to fun type AST node --- debug.c | 7 +++++-- pnut.c | 23 +++++++++++++++++------ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/debug.c b/debug.c index d711906c..ead76e5f 100644 --- a/debug.c +++ b/debug.c @@ -303,6 +303,7 @@ void type_ast_to_sexp(ast type) { case '(': printf("(-> ("); ast_list_to_sexp(get_child_opt_('(', LIST, type, 1)); // Function args + if (get_child_('(', type, 2)) printf(" ..."); // Varargs printf(") "); type_ast_to_sexp(get_child_('(', type, 0)); printf(")"); @@ -397,8 +398,10 @@ void ast_to_sexp(ast obj) { ast_to_sexp(get_child__(DECL, IDENTIFIER, obj, 0)); putchar(' '); type_ast_to_sexp(get_child_(DECL, obj, 1)); - putchar(' '); - ast_to_sexp(get_child_(DECL, obj, 2)); + if (get_child_(DECL, obj, 2)) { // Initializer, if present + putchar(' '); + ast_to_sexp(get_child_(DECL, obj, 2)); + } printf(")"); break; diff --git a/pnut.c b/pnut.c index e73b20fc..d471f6c8 100644 --- a/pnut.c +++ b/pnut.c @@ -2321,19 +2321,24 @@ ast pointer_type(ast parent_type, bool is_const) { } ast function_type(ast parent_type, ast params) { - return new_ast2('(', parent_type, params); + return new_ast3('(', parent_type, params, false); } ast function_type1(ast parent_type, ast param1) { - return new_ast2('(', parent_type, cons(param1, 0)); + return new_ast3('(', parent_type, cons(param1, 0), 0); } ast function_type2(ast parent_type, ast param1, ast param2) { - return new_ast2('(', parent_type, cons(param1, cons(param2, 0))); + return new_ast3('(', parent_type, cons(param1, cons(param2, 0)), 0); } ast function_type3(ast parent_type, ast param1, ast param2, ast param3) { - return new_ast2('(', parent_type, cons(param1, cons(param2, cons(param3, 0)))); + return new_ast3('(', parent_type, cons(param1, cons(param2, cons(param3, 0))), 0); +} + +ast make_variadic_func(ast func_type) { + set_child(func_type, 2, true); // Set the variadic flag + return func_type; } // Type and declaration parser @@ -2617,11 +2622,14 @@ ast parse_declaration_specifiers() { return type_specifier; } +bool parse_param_list_is_variadic = false; int parse_param_list() { ast result = 0; ast tail; ast decl; + parse_param_list_is_variadic = false; + expect_tok('('); while (tok != ')' && tok != EOF) { @@ -2637,7 +2645,9 @@ int parse_param_list() { get_tok(); } else if (tok == ELLIPSIS) { // ignore ELLIPSIS nodes for now, but it should be the last parameter + if (result == 0) parse_error("Function must have a named parameter before ellipsis parameter", tok); get_tok(); + parse_param_list_is_variadic = true; break; } else { parse_error("Parameter declaration expected", tok); @@ -2782,7 +2792,8 @@ ast parse_declarator(bool abstract_decl, ast parent_type) { parent_type_parent = result; expect_tok(']'); } else if (tok == '(') { - result = new_ast2('(', get_inner_type(parent_type_parent), parse_param_list()); + result = new_ast3('(', get_inner_type(parent_type_parent), parse_param_list(), false); + if (parse_param_list_is_variadic) result = make_variadic_func(result); update_inner_type(parent_type_parent, result); parent_type_parent = result; } else { @@ -3696,8 +3707,8 @@ int main(int argc, char **argv) { printf("# %s:%d:%d\n", fp_filepath, line_number, column_number); #endif ast_to_sexp(decl); -#endif putchar('\n'); +#endif } #else codegen_begin(); From 96ca42280dd6a38bc7e5ee843f7ca0771ad91da2 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 21:51:46 -0500 Subject: [PATCH 68/89] Add test for function pointers inside struct --- tests/_exe/record-of-functions.c | 115 ++++++++++++++++++++++++++ tests/_exe/record-of-functions.golden | 3 + 2 files changed, 118 insertions(+) create mode 100755 tests/_exe/record-of-functions.c create mode 100644 tests/_exe/record-of-functions.golden diff --git a/tests/_exe/record-of-functions.c b/tests/_exe/record-of-functions.c new file mode 100755 index 00000000..fee96fd9 --- /dev/null +++ b/tests/_exe/record-of-functions.c @@ -0,0 +1,115 @@ +#include +#include + +#define EOF (-1) +#ifndef NULL +#define NULL (0) +#endif + +void putstr(char *s) { + while (*s) { + putchar(*s); + s += 1; + } +} + +// Record that holds two functions for input and output +// Used to parameterize generic input/output functions +struct IOFunctions { + char (*in_char)(void *); + void (*out_char)(void *, char); + void *param; // Parameter passed to in_char and out_char +}; + +struct IOBuffer { + char *buf; + int len; + int pos; +}; + +// State for stubbed input/output functions +struct StubbedIO { + struct IOBuffer input; + struct IOBuffer output; +}; + +// Default putchar/getchar functions, ignores the extra parameter +void putchar_(void *param, char c) { putchar(c); } +char getchar_(void *param) { return getchar(); } + +// Stubbed putchar/getchar functions +// Their extra param is a StubbedIO struct that's used to keep state between calls. +void putchar_stub(struct StubbedIO *stubData, char c) { + struct IOBuffer *buf = &stubData->output; + if (buf->pos < buf->len) { + buf->buf[buf->pos] = c; + buf->pos += 1; + } else { + putstr("Output buffer overflow\n"); + exit(1); + } +} + +char getchar_stub(struct StubbedIO *stubData) { + struct IOBuffer *buf = &stubData->input; + if (buf->pos >= buf->len) return EOF; + + buf->pos += 1; + return buf->buf[buf->pos - 1]; +} + +// Parameterized input/output functions, using the IOFunctions record. +int in_string(struct IOFunctions *record, char delim, char *buf, int len) { + int i = 0; + char c; + while (i < len) { + c = record->in_char(record->param); + if (c == EOF || c == delim) break; + + buf[i] = c; + i += 1; + } + + return i; +} + +void out_string(struct IOFunctions *record, char *s) { + while (*s) { + record->out_char(record->param, *s); + s += 1; + } +} + +struct IOFunctions stdin_stdout = { &getchar_, &putchar_, NULL }; + +struct IOFunctions *stub_io(char *input, int input_buf_len, int output_buf_len) { + struct IOFunctions *record = malloc(sizeof(struct IOFunctions)); + struct StubbedIO *stubData = malloc(sizeof(struct StubbedIO)); + record->in_char = (char (*)(void *)) &getchar_stub; + record->out_char = (void (*)(void *, char)) &putchar_stub; + record->param = stubData; + + stubData->input.buf = input; + stubData->input.len = input_buf_len; + stubData->input.pos = 0; + + stubData->output.buf = malloc(output_buf_len); + stubData->output.len = output_buf_len; + stubData->output.pos = 0; + + return record; +} + +int main() { + // Testing default output, but not input since it requires user interaction + char in_buf[100]; + out_string(&stdin_stdout, "hello\n"); + + // Testing stubbed input/output, no user interaction required + struct IOFunctions *stubbedIO = stub_io("WOOO WEEE\n", 14, 10); + out_string(stubbedIO, "WEEE WOOO\n"); + in_string(stubbedIO, '\0', in_buf, 100); + putstr(((struct StubbedIO *)stubbedIO->param)->output.buf); // WEEE WOOO + putstr(in_buf); // WOOO WEEE + return 0; +} diff --git a/tests/_exe/record-of-functions.golden b/tests/_exe/record-of-functions.golden new file mode 100644 index 00000000..93f821af --- /dev/null +++ b/tests/_exe/record-of-functions.golden @@ -0,0 +1,3 @@ +hello +WEEE WOOO +WOOO WEEE From 3b2b5561f3d8d81a8ba06e699c1890327d6c2cf0 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 9 Feb 2025 22:20:23 -0500 Subject: [PATCH 69/89] Add check that calls pass the right number of args --- exe.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/exe.c b/exe.c index c5f89edb..84ea0fd0 100644 --- a/exe.c +++ b/exe.c @@ -975,14 +975,36 @@ int codegen_param(ast param) { return type_width(type, false, true) / word_size; } +#ifdef SAFE_MODE +int codegen_params(ast params, ast params_type, bool allow_extra_params) { +#else int codegen_params(ast params) { +#endif int fs = 0; if (params != 0) { - fs = codegen_params(get_child_opt_(LIST, LIST, params, 1)); - fs += codegen_param(get_child_(LIST, params, 0)); +#ifdef SAFE_MODE + if (!allow_extra_params && params_type == 0) { + fatal_error("codegen_params: Function expects less parameters than provided"); + } + + // Check that the number of parameters is correct + if (params_type != 0) params_type = tail(params_type); +#endif + +#ifdef SAFE_MODE + fs = codegen_params(tail(params), params_type, allow_extra_params); +#else + fs = codegen_params(tail(params)); +#endif + fs += codegen_param(car(params)); } + #ifdef SAFE_MODE + else if (params_type != 0) { + fatal_error("codegen_params: Function expects more parameters than provided"); + } + #endif return fs; } @@ -990,7 +1012,7 @@ int codegen_params(ast params) { void codegen_call(ast node) { ast fun = get_child_('(', node, 0); ast params = get_child_('(', node, 1); - ast nb_params = codegen_params(params); + ast nb_params; int binding = 0; // Check if the function is a direct call, find the binding if it is @@ -999,6 +1021,20 @@ void codegen_call(ast node) { if (binding_kind(binding) != BINDING_FUN) binding = 0; } +#ifdef SAFE_MODE + // Make sure fun has a type that can be called, either a function pointer or a function + ast type = value_type(fun); + if (!is_function_type(type)) { + putstr("type="); putint(get_op(type)); putchar('\n'); + fatal_error("Called object is not a function or function pointer"); + } + if (get_op(type) == '*') type = get_child_('*', type, 1); // Dereference function pointer + // allow_extra_params is true if the function is called indirectly or if the function is variadic + nb_params = codegen_params(params, get_child_('(', type, 1), get_child_('(', type, 2) || binding == 0); +#else + nb_params = codegen_params(params); +#endif + if (binding != 0) { // Generate a fast path for direct calls call(heap[binding+4]); @@ -1506,7 +1542,7 @@ void codegen_begin() { cgc_add_global_fun(init_ident(IDENTIFIER, "close"), close_lbl, function_type1(int_type, int_type)); printf_lbl = alloc_label("printf"); - cgc_add_global_fun(init_ident(IDENTIFIER, "printf"), printf_lbl, function_type1(int_type, string_type)); + cgc_add_global_fun(init_ident(IDENTIFIER, "printf"), printf_lbl, make_variadic_func(function_type1(int_type, string_type))); jump(setup_lbl); } From 7f573b5c2d530736c2cee41a2a4911867f145437 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 10 Feb 2025 09:35:46 -0500 Subject: [PATCH 70/89] Zero out buffer in record-of-functions test --- tests/_exe/record-of-functions.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/_exe/record-of-functions.c b/tests/_exe/record-of-functions.c index fee96fd9..99ae3b9c 100755 --- a/tests/_exe/record-of-functions.c +++ b/tests/_exe/record-of-functions.c @@ -82,6 +82,23 @@ void out_string(struct IOFunctions *record, char *s) { struct IOFunctions stdin_stdout = { &getchar_, &putchar_, NULL }; +void zero_buf(char *buf, int size) { + int i; + for (i = 0; i < size; i++) { + ((char *)buf)[i] = 0; + } +} + +struct IOBuffer *new_buf(int len) { + struct IOBuffer * buf = malloc(sizeof(struct IOBuffer)); + int i = 0; + buf->buf = malloc(len); + zero_buf(buf->buf, len); + buf->len = len; + buf->pos = 0; + return buf; +} + struct IOFunctions *stub_io(char *input, int input_buf_len, int output_buf_len) { struct IOFunctions *record = malloc(sizeof(struct IOFunctions)); struct StubbedIO *stubData = malloc(sizeof(struct StubbedIO)); @@ -93,16 +110,14 @@ struct IOFunctions *stub_io(char *input, int input_buf_len, int output_buf_len) stubData->input.len = input_buf_len; stubData->input.pos = 0; - stubData->output.buf = malloc(output_buf_len); - stubData->output.len = output_buf_len; - stubData->output.pos = 0; + stubData->output = *new_buf(output_buf_len); return record; } int main() { // Testing default output, but not input since it requires user interaction - char in_buf[100]; + char in_buf[100] = {0}; out_string(&stdin_stdout, "hello\n"); // Testing stubbed input/output, no user interaction required From a13e343a37cdd7ad55db1791e612eef1fbd608ef Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 13 Feb 2025 09:58:11 -0500 Subject: [PATCH 71/89] Allocate global variables on heap instead of stack Having globals on the stack is a bad idea because the stack is limited in size and can be easily overflowed. This commit moves the globals to the heap, which isn't limited in size. --- exe.c | 30 ++++++++++++++++-------------- x86.c | 2 ++ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/exe.c b/exe.c index 84ea0fd0..2e1a3009 100644 --- a/exe.c +++ b/exe.c @@ -2217,27 +2217,29 @@ void codegen_end() { def_label(setup_lbl); - // Set to 0 the part of the stack that's used for global variables - mov_reg_imm(reg_X, 0); // reg_X = 0 constant - mov_reg_reg(reg_Y, reg_SP); // reg_Y = end of global variables (excluded) - grow_stack_bytes(cgc_global_alloc); // Allocate space for global variables - mov_reg_reg(reg_glo, reg_SP); // reg_glo = start of global variables - - def_label(glo_setup_loop_lbl); // Loop over words of global variables table - mov_mem_reg(reg_glo, 0, reg_X); // Set to 0 - add_reg_imm(reg_glo, word_size); // Move to next entry - jump_cond_reg_reg(LT, glo_setup_loop_lbl, reg_glo, reg_Y); - - mov_reg_reg(reg_glo, reg_SP); // Reset global variables pointer + // Allocate some space for the global variables. + // The global variables used to be on the stack, but because the stack has a + // limited size, it is better to allocate a separate memory region so global + // variables are not limited by the stack size. + // + // We then allocate a separate memory region for the heap. Having a separate + // memory space for the heap makes it easier to detect out-of-bound accesses + // on global variables. + // + // Regarding initialization, os_allocate_memory uses mmap with the + // MAP_ANONYMOUS flag so the memory should already be zeroed. + // + os_allocate_memory(cgc_global_alloc); // Returns the globals table start address in reg_X + mov_reg_reg(reg_glo, reg_X); // reg_glo = globals table start os_allocate_memory(RT_HEAP_SIZE); // Returns the heap start address in reg_X - mov_mem_reg(reg_glo, 0, reg_X); // init heap start + mov_mem_reg(reg_glo, 0, reg_X); // Set init heap start mov_mem_reg(reg_glo, word_size, reg_X); // init bump pointer jump(init_start_lbl); def_label(init_next_lbl); - setup_proc_args(cgc_global_alloc); + setup_proc_args(0); call(main_lbl); if (!main_returns) mov_reg_imm(reg_X, 0); // exit process with 0 if main returns void push_reg(reg_X); // exit process with result of main diff --git a/x86.c b/x86.c index 4d63bca9..8defa2d1 100644 --- a/x86.c +++ b/x86.c @@ -443,6 +443,8 @@ void setup_proc_args(int global_vars_size) { // [esp + 8] : global table start (global_vars_size bytes long) // ... // For x86-64, it works similarly with [rsp + 0] for argc and [rsp + 8] for argv. + // + // Note(13/02/2025): Global variables are now allocated in a separate memory region so global_vars_size is 0. mov_reg_reg(reg_X, SP); add_reg_imm(reg_X, global_vars_size + word_size); // compute address of argv From 328b4fbbff0b18a7dd744dface66dde180b2b5c9 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 13 Feb 2025 09:59:14 -0500 Subject: [PATCH 72/89] Increase size of pnut's string_pool and heap 10x This should be enough to compile TCC, and shows that globals are no longer limited by stack size. Increasing the size of the string pool and heap by 10x segfaulted before the previous commit. --- pnut.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pnut.c b/pnut.c index d471f6c8..ea25035d 100644 --- a/pnut.c +++ b/pnut.c @@ -262,7 +262,7 @@ int prev_ch = EOF; int tok; int val; -#define STRING_POOL_SIZE 50000 +#define STRING_POOL_SIZE 500000 char string_pool[STRING_POOL_SIZE]; int string_pool_alloc = 0; int string_start; @@ -271,7 +271,7 @@ int hash; // These parameters give a perfect hashing of the C keywords #define HASH_PARAM 1026 #define HASH_PRIME 1009 -#define HEAP_SIZE 200000 +#define HEAP_SIZE 2000000 intptr_t heap[HEAP_SIZE]; int heap_alloc = HASH_PRIME; From b34fb6a5ffd2641f37b8d0c41f821f7258398f79 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 13 Feb 2025 10:05:49 -0500 Subject: [PATCH 73/89] Remove unused glo_setup_loop_lbl local variable --- exe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/exe.c b/exe.c index 2e1a3009..523abbcb 100644 --- a/exe.c +++ b/exe.c @@ -2213,8 +2213,6 @@ void rt_free() { void codegen_end() { - int glo_setup_loop_lbl = alloc_label("glo_setup_loop"); - def_label(setup_lbl); // Allocate some space for the global variables. From 2dd345509285ce1de95d138a04b817a60d393dfb Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 22:15:11 -0500 Subject: [PATCH 74/89] Emit numeric literal in the base they are parsed --- pnut.c | 92 +++++++++++++++++++++++++++++++++++++++++++--------------- sh.c | 75 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 129 insertions(+), 38 deletions(-) diff --git a/pnut.c b/pnut.c index ea25035d..aa3af315 100644 --- a/pnut.c +++ b/pnut.c @@ -161,7 +161,11 @@ enum { WHILE_KW, // Non-character operands - INTEGER = 401, + INTEGER = 401, // Integer written in decimal +#ifdef sh + INTEGER_HEX = 402, // Integer written in hexadecimal + INTEGER_OCT = 403, // Integer written in octal +#endif CHARACTER, STRING, @@ -1112,6 +1116,16 @@ void handle_define() { } +#ifdef sh +// Remove PARENS node from an expression, useful when we want to check what's +// the top level operator of an expression without considering the parenthesis. +ast non_parenthesized_operand(ast node) { + while (get_op(node) == PARENS) node = get_child_(PARENS, node, 0); + + return node; +} +#endif + int eval_constant(ast expr, bool if_macro) { int op = get_op(expr); int op1; @@ -1122,11 +1136,16 @@ int eval_constant(ast expr, bool if_macro) { if (get_nb_children(expr) >= 2) child1 = get_child(expr, 1); switch (op) { - case PARENS: return eval_constant(child0, if_macro); - case INTEGER: return -get_val_(INTEGER, expr); - case CHARACTER: return get_val_(CHARACTER, expr); - case '~': return ~eval_constant(child0, if_macro); - case '!': return !eval_constant(child0, if_macro); + case PARENS: return eval_constant(child0, if_macro); + case INTEGER: +#ifdef sh + case INTEGER_HEX: + case INTEGER_OCT: +#endif + return -get_val(expr); + case CHARACTER: return get_val_(CHARACTER, expr); + case '~': return ~eval_constant(child0, if_macro); + case '!': return !eval_constant(child0, if_macro); case '-': case '+': op1 = eval_constant(child0, if_macro); @@ -1770,7 +1789,11 @@ void paste_tokens(int left_tok, int left_val) { if (right_tok == IDENTIFIER || right_tok == TYPE || right_tok == MACRO || right_tok <= WHILE_KW) { accum_string_string(right_val); - } else if (right_tok == INTEGER) { + } else if (right_tok == INTEGER +#ifdef sh + || right_tok == INTEGER_HEX || right_tok == INTEGER_OCT +#endif + ) { accum_string_integer(-right_val); } else { putstr("left_tok="); putint(left_tok); putstr(", right_tok="); putint(right_tok); putchar('\n'); @@ -1781,8 +1804,16 @@ void paste_tokens(int left_tok, int left_val) { val = end_ident(); tok = heap[val+2]; // The kind of the identifier - } else if (left_tok == INTEGER) { - if (right_tok == INTEGER) { + } else if (left_tok == INTEGER +#ifdef sh + || left_tok == INTEGER_HEX || left_tok == INTEGER_OCT +#endif + ) { + if (right_tok == INTEGER +#ifdef sh + || right_tok == INTEGER_HEX || right_tok == INTEGER_OCT +#endif + ) { val = -paste_integers(-left_val, -right_val); } else if (right_tok == IDENTIFIER || right_tok == MACRO || right_tok <= WHILE_KW) { begin_string(); @@ -1933,8 +1964,12 @@ void get_tok() { get_ch(); + tok = INTEGER; if (val == 0) { // val == 0 <=> ch == '0' if (ch == 'x' || ch == 'X') { +#ifdef sh + tok = INTEGER_HEX; +#endif get_ch(); val = 0; if (accum_digit(16)) { @@ -1943,13 +1978,15 @@ void get_tok() { syntax_error("invalid hex integer -- it must have at least one digit"); } } else { +#ifdef sh + tok = INTEGER_OCT; +#endif while (accum_digit(8)); } } else { while (accum_digit(10)); } - tok = INTEGER; break; @@ -2362,6 +2399,7 @@ ast parse_enum() { ast tail; ast value = 0; int next_value = 0; + int last_literal_type = INTEGER; // Default to decimal integer for enum values expect_tok(ENUM_KW); @@ -2389,10 +2427,22 @@ ast parse_enum() { get_tok(); value = parse_assignment_expression(); if (value == 0) parse_error("Enum value must be a constant expression", tok); + +#ifdef sh + // Preserve the type of integer literals (dec/hex/oct) by only creating + // a new node if the value is not already a literal. We use the last + // literal type to determine which type to use when creating a new node. + value = non_parenthesized_operand(value); + if (get_op(value) != INTEGER || get_op(value) != INTEGER_HEX || get_op(value) != INTEGER_OCT) { + value = new_ast0(last_literal_type, -eval_constant(value, false)); + } + last_literal_type = get_op(value); +#else value = new_ast0(INTEGER, -eval_constant(value, false)); - next_value = get_val_(INTEGER, value) - 1; // Next value is the current value + 1, but val is negative +#endif + next_value = get_val(value) - 1; // Next value is the current value + 1, but val is negative } else { - value = new_ast0(INTEGER, next_value); + value = new_ast0(last_literal_type, next_value); next_value -= 1; } @@ -2966,19 +3016,13 @@ ast parse_primary_expression() { ast result; ast tail; - if (tok == IDENTIFIER) { - - result = new_ast0(IDENTIFIER, val); - get_tok(); - - } else if (tok == INTEGER) { - - result = new_ast0(INTEGER, val); - get_tok(); - - } else if (tok == CHARACTER) { + if (tok == IDENTIFIER || tok == CHARACTER || tok == INTEGER +#ifdef sh + || tok == INTEGER_HEX || tok == INTEGER_OCT +#endif + ) { - result = new_ast0(CHARACTER, val); + result = new_ast0(tok, val); get_tok(); } else if (tok == STRING) { diff --git a/sh.c b/sh.c index eb06d8ee..e0664b85 100644 --- a/sh.c +++ b/sh.c @@ -35,6 +35,8 @@ int text_alloc = 1; // Start at 1 because 0 is the empty text enum TEXT_NODES { TEXT_TREE, TEXT_INTEGER, + TEXT_INTEGER_HEX, + TEXT_INTEGER_OCT, TEXT_STRING, TEXT_ESCAPED }; @@ -84,6 +86,33 @@ text wrap_int(int i) { return (text_alloc += 2) - 2; } +text wrap_int_hex(int i) { + if (text_alloc + 2 >= TEXT_POOL_SIZE) fatal_error("string tree pool overflow"); + text_pool[text_alloc] = TEXT_FROM_INT(TEXT_INTEGER_HEX); + text_pool[text_alloc + 1] = TEXT_FROM_INT(i); + return (text_alloc += 2) - 2; +} + +text wrap_int_oct(int i) { + if (text_alloc + 2 >= TEXT_POOL_SIZE) fatal_error("string tree pool overflow"); + text_pool[text_alloc] = TEXT_FROM_INT(TEXT_INTEGER_OCT); + text_pool[text_alloc + 1] = TEXT_FROM_INT(i); + return (text_alloc += 2) - 2; +} + +text wrap_integer(int multiply, int obj) { + switch (get_op(obj)) { + case INTEGER: + return wrap_int(multiply * -get_val_(INTEGER, obj)); + case INTEGER_HEX: + return wrap_int_hex(multiply * -get_val_(INTEGER_HEX, obj)); + case INTEGER_OCT: + return wrap_int_oct(multiply * -get_val_(INTEGER_OCT, obj)); + default: + fatal_error("wrap_integer: unknown integer type"); + } +} + text escape_text(text t, bool for_printf) { if (text_alloc + 3 >= TEXT_POOL_SIZE) fatal_error("string tree pool overflow"); @@ -176,6 +205,19 @@ text concatenate_strings_with(text t1, text t2, text sep) { return string_concat3(t1, sep, t2); } +// Output unsigned integer in hex +void puthex_unsigned(int n) { + // Because n is signed, we clear the upper bits after shifting in case n was negative + if (n & ~15) puthex_unsigned((n >> 4) & 0x0fffffff); + putchar("0123456789abcdef"[n & 15]); +} + +void putoct_unsigned(int n) { + // Because n is signed, we clear the upper bits after shifting in case n was negative + if (n & ~7) putoct_unsigned((n >> 3) & 0x1fffffff); + putchar('0' + (n & 7)); +} + void print_escaped_char(char c, int for_printf) { // C escape sequences if (c == '\0') { putchar('\\'); putchar('0'); } @@ -234,6 +276,11 @@ void print_escaped_text(text t, bool for_printf) { } } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER)) { putint(TEXT_TO_INT(text_pool[t + 1])); + } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER_HEX)) { + putchar('0'); putchar('x'); + puthex_unsigned(TEXT_TO_INT(text_pool[t + 1])); + } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER_OCT)) { + putoct_unsigned(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_STRING)) { print_escaped_string((char*) text_pool[t + 1], (char*) text_pool[t + 2], for_printf); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_ESCAPED)) { @@ -263,6 +310,11 @@ void print_text(text t) { } } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER)) { putint(TEXT_TO_INT(text_pool[t + 1])); + } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER_HEX)) { + putchar('0'); putchar('x'); + puthex_unsigned(TEXT_TO_INT(text_pool[t + 1])); + } else if (text_pool[t] == TEXT_FROM_INT(TEXT_INTEGER_OCT)) { + putoct_unsigned(TEXT_TO_INT(text_pool[t + 1])); } else if (text_pool[t] == TEXT_FROM_INT(TEXT_STRING)) { if (TEXT_TO_INT(text_pool[t + 2]) == 0) { // null-terminated string putstr((char*) text_pool[t + 1]); @@ -903,7 +955,8 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { if (nb_children >= 3) { child2 = get_child(node, 2); } if (nb_children == 0) { - if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR || op == INTEGER || op == CHARACTER) { + if ( op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR + || op == CHARACTER || op == INTEGER || op == INTEGER_HEX || op == INTEGER_OCT) { return node; } else if (op == STRING) { /* We must initialize strings before the expression */ @@ -1049,7 +1102,9 @@ text comp_initializer_list(ast initializer_list, int expected_len) { element = car(initializer_list); switch (get_op(element)) { case INTEGER: - args = concatenate_strings_with(args, wrap_int(-get_val_(INTEGER, element)), wrap_char(' ')); + case INTEGER_HEX: + case INTEGER_OCT: + args = concatenate_strings_with(args, wrap_integer(1, element), wrap_char(' ')); break; case CHARACTER: // TODO: Character identifiers are only defined at the end of the script, so we can't use them here @@ -1127,14 +1182,6 @@ text wrap_if_needed(int parens_otherwise, int context, ast test_side_effects, te } } -int non_parenthesized_operand(ast node) { - while (get_op(node) == PARENS) { - node = get_child_(PARENS, node, 0); - } - - return node; -} - // Used to supports the case `if/while (c) { ... }`, where c is a variable or a literal. // This is otherwise handled by wrap-if-needed, but we don't want to wrap in $(( ... )) here. text wrap_in_condition_if_needed(int context, ast test_side_effects, text code) { @@ -1157,8 +1204,8 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) if (nb_children >= 4) { child3 = get_child(node, 3); } if (nb_children == 0) { - if (op == INTEGER) { - return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(-get_val_(INTEGER, node))); + if (op == INTEGER || op == INTEGER_HEX || op == INTEGER_OCT) { + return wrap_in_condition_if_needed(context, test_side_effects, wrap_integer(1, node)); } else if (op == CHARACTER) { #ifdef SH_INLINE_CHAR_LITERAL return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val_(CHARACTER, node))); @@ -1190,8 +1237,8 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } else if (op == '-') { // Check if the rest of ast is a literal, if so directly return the negated value. // Note: I think this can be simplified by not wrapped in () in the else case. - if (get_op(child0) == INTEGER) { - return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val_(INTEGER, child0))); + if (get_op(child0) == INTEGER || op == INTEGER_HEX || op == INTEGER_OCT) { + return wrap_in_condition_if_needed(context, test_side_effects, wrap_integer(-1, child0)); } else { sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); return wrap_if_needed(false, context, test_side_effects, string_concat3(wrap_str_lit("-("), sub1, wrap_char(')')), outer_op, op); From d27b9ae951f3cfbb769cef3cc858e8489d5c016f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 22:15:50 -0500 Subject: [PATCH 75/89] Regenerate examples --- examples/compiled/base64.sh | 16 +-- examples/compiled/sha256sum.sh | 180 ++++++++++++++++----------------- 2 files changed, 98 insertions(+), 98 deletions(-) diff --git a/examples/compiled/base64.sh b/examples/compiled/base64.sh index 5ec7b154..237b6347 100755 --- a/examples/compiled/base64.sh +++ b/examples/compiled/base64.sh @@ -81,20 +81,20 @@ _encode() { _getchar b2 printf \\$(((_$((_codes + (b1 >> 2))))/64))$(((_$((_codes + (b1 >> 2))))/8%8))$(((_$((_codes + (b1 >> 2))))%8)) if [ $b2 -lt 0 ] ; then - printf \\$(((_$((_codes + (63 & (b1 << 4)))))/64))$(((_$((_codes + (63 & (b1 << 4)))))/8%8))$(((_$((_codes + (63 & (b1 << 4)))))%8)) + printf \\$(((_$((_codes + (0x3f & (b1 << 4)))))/64))$(((_$((_codes + (0x3f & (b1 << 4)))))/8%8))$(((_$((_codes + (0x3f & (b1 << 4)))))%8)) printf "=" printf "=" break else - printf \\$(((_$((_codes + (63 & ((b1 << 4) | (b2 >> 4))))))/64))$(((_$((_codes + (63 & ((b1 << 4) | (b2 >> 4))))))/8%8))$(((_$((_codes + (63 & ((b1 << 4) | (b2 >> 4))))))%8)) + printf \\$(((_$((_codes + (0x3f & ((b1 << 4) | (b2 >> 4))))))/64))$(((_$((_codes + (0x3f & ((b1 << 4) | (b2 >> 4))))))/8%8))$(((_$((_codes + (0x3f & ((b1 << 4) | (b2 >> 4))))))%8)) _getchar b3 if [ $b3 -lt 0 ] ; then - printf \\$(((_$((_codes + (63 & (b2 << 2)))))/64))$(((_$((_codes + (63 & (b2 << 2)))))/8%8))$(((_$((_codes + (63 & (b2 << 2)))))%8)) + printf \\$(((_$((_codes + (0x3f & (b2 << 2)))))/64))$(((_$((_codes + (0x3f & (b2 << 2)))))/8%8))$(((_$((_codes + (0x3f & (b2 << 2)))))%8)) printf "=" break else - printf \\$(((_$((_codes + (63 & ((b2 << 2) | (b3 >> 6))))))/64))$(((_$((_codes + (63 & ((b2 << 2) | (b3 >> 6))))))/8%8))$(((_$((_codes + (63 & ((b2 << 2) | (b3 >> 6))))))%8)) - printf \\$(((_$((_codes + (63 & b3))))/64))$(((_$((_codes + (63 & b3))))/8%8))$(((_$((_codes + (63 & b3))))%8)) + printf \\$(((_$((_codes + (0x3f & ((b2 << 2) | (b3 >> 6))))))/64))$(((_$((_codes + (0x3f & ((b2 << 2) | (b3 >> 6))))))/8%8))$(((_$((_codes + (0x3f & ((b2 << 2) | (b3 >> 6))))))%8)) + printf \\$(((_$((_codes + (0x3f & b3))))/64))$(((_$((_codes + (0x3f & b3))))/8%8))$(((_$((_codes + (0x3f & b3))))%8)) fi fi done @@ -125,7 +125,7 @@ _decode() { done i=0 while [ $i -lt 64 ]; do - : $((_$((_lut + 255 & _$((_codes + i)))) = i)) + : $((_$((_lut + 0xff & _$((_codes + i)))) = i)) : $((i += 1)) done while _get c1; [ $c1 -ge 0 ]; do @@ -136,11 +136,11 @@ _decode() { if _get c3; [ $c3 -lt 0 ] ; then break fi - printf \\$(((255 & ((c2 << 4) | (c3 >> 2)))/64))$(((255 & ((c2 << 4) | (c3 >> 2)))/8%8))$(((255 & ((c2 << 4) | (c3 >> 2)))%8)) + printf \\$(((0xff & ((c2 << 4) | (c3 >> 2)))/64))$(((0xff & ((c2 << 4) | (c3 >> 2)))/8%8))$(((0xff & ((c2 << 4) | (c3 >> 2)))%8)) if _get c4; [ $c4 -lt 0 ] ; then break fi - printf \\$(((255 & ((c3 << 6) | c4))/64))$(((255 & ((c3 << 6) | c4))/8%8))$(((255 & ((c3 << 6) | c4))%8)) + printf \\$(((0xff & ((c3 << 6) | c4))/64))$(((0xff & ((c3 << 6) | c4))/8%8))$(((0xff & ((c3 << 6) | c4))%8)) done endlet $1 c4 c3 c2 c1 i } diff --git a/examples/compiled/sha256sum.sh b/examples/compiled/sha256sum.sh index 5d6fe551..2319c660 100755 --- a/examples/compiled/sha256sum.sh +++ b/examples/compiled/sha256sum.sh @@ -14,70 +14,70 @@ defarr() { _malloc $1 $2; } defarr _k 64 _sha256_setup() { - : $((_$((_k + 0)) = 1116352408)) - : $((_$((_k + 1)) = 1899447441)) - : $((_$((_k + 2)) = -1245643825)) - : $((_$((_k + 3)) = -373957723)) - : $((_$((_k + 4)) = 961987163)) - : $((_$((_k + 5)) = 1508970993)) - : $((_$((_k + 6)) = -1841331548)) - : $((_$((_k + 7)) = -1424204075)) - : $((_$((_k + 8)) = -670586216)) - : $((_$((_k + 9)) = 310598401)) - : $((_$((_k + 10)) = 607225278)) - : $((_$((_k + 11)) = 1426881987)) - : $((_$((_k + 12)) = 1925078388)) - : $((_$((_k + 13)) = -2132889090)) - : $((_$((_k + 14)) = -1680079193)) - : $((_$((_k + 15)) = -1046744716)) - : $((_$((_k + 16)) = -459576895)) - : $((_$((_k + 17)) = -272742522)) - : $((_$((_k + 18)) = 264347078)) - : $((_$((_k + 19)) = 604807628)) - : $((_$((_k + 20)) = 770255983)) - : $((_$((_k + 21)) = 1249150122)) - : $((_$((_k + 22)) = 1555081692)) - : $((_$((_k + 23)) = 1996064986)) - : $((_$((_k + 24)) = -1740746414)) - : $((_$((_k + 25)) = -1473132947)) - : $((_$((_k + 26)) = -1341970488)) - : $((_$((_k + 27)) = -1084653625)) - : $((_$((_k + 28)) = -958395405)) - : $((_$((_k + 29)) = -710438585)) - : $((_$((_k + 30)) = 113926993)) - : $((_$((_k + 31)) = 338241895)) - : $((_$((_k + 32)) = 666307205)) - : $((_$((_k + 33)) = 773529912)) - : $((_$((_k + 34)) = 1294757372)) - : $((_$((_k + 35)) = 1396182291)) - : $((_$((_k + 36)) = 1695183700)) - : $((_$((_k + 37)) = 1986661051)) - : $((_$((_k + 38)) = -2117940946)) - : $((_$((_k + 39)) = -1838011259)) - : $((_$((_k + 40)) = -1564481375)) - : $((_$((_k + 41)) = -1474664885)) - : $((_$((_k + 42)) = -1035236496)) - : $((_$((_k + 43)) = -949202525)) - : $((_$((_k + 44)) = -778901479)) - : $((_$((_k + 45)) = -694614492)) - : $((_$((_k + 46)) = -200395387)) - : $((_$((_k + 47)) = 275423344)) - : $((_$((_k + 48)) = 430227734)) - : $((_$((_k + 49)) = 506948616)) - : $((_$((_k + 50)) = 659060556)) - : $((_$((_k + 51)) = 883997877)) - : $((_$((_k + 52)) = 958139571)) - : $((_$((_k + 53)) = 1322822218)) - : $((_$((_k + 54)) = 1537002063)) - : $((_$((_k + 55)) = 1747873779)) - : $((_$((_k + 56)) = 1955562222)) - : $((_$((_k + 57)) = 2024104815)) - : $((_$((_k + 58)) = -2067236844)) - : $((_$((_k + 59)) = -1933114872)) - : $((_$((_k + 60)) = -1866530822)) - : $((_$((_k + 61)) = -1538233109)) - : $((_$((_k + 62)) = -1090935817)) - : $((_$((_k + 63)) = -965641998)) + : $((_$((_k + 0)) = 0x428a2f98)) + : $((_$((_k + 1)) = 0x71374491)) + : $((_$((_k + 2)) = 0xb5c0fbcf)) + : $((_$((_k + 3)) = 0xe9b5dba5)) + : $((_$((_k + 4)) = 0x3956c25b)) + : $((_$((_k + 5)) = 0x59f111f1)) + : $((_$((_k + 6)) = 0x923f82a4)) + : $((_$((_k + 7)) = 0xab1c5ed5)) + : $((_$((_k + 8)) = 0xd807aa98)) + : $((_$((_k + 9)) = 0x12835b01)) + : $((_$((_k + 10)) = 0x243185be)) + : $((_$((_k + 11)) = 0x550c7dc3)) + : $((_$((_k + 12)) = 0x72be5d74)) + : $((_$((_k + 13)) = 0x80deb1fe)) + : $((_$((_k + 14)) = 0x9bdc06a7)) + : $((_$((_k + 15)) = 0xc19bf174)) + : $((_$((_k + 16)) = 0xe49b69c1)) + : $((_$((_k + 17)) = 0xefbe4786)) + : $((_$((_k + 18)) = 0xfc19dc6)) + : $((_$((_k + 19)) = 0x240ca1cc)) + : $((_$((_k + 20)) = 0x2de92c6f)) + : $((_$((_k + 21)) = 0x4a7484aa)) + : $((_$((_k + 22)) = 0x5cb0a9dc)) + : $((_$((_k + 23)) = 0x76f988da)) + : $((_$((_k + 24)) = 0x983e5152)) + : $((_$((_k + 25)) = 0xa831c66d)) + : $((_$((_k + 26)) = 0xb00327c8)) + : $((_$((_k + 27)) = 0xbf597fc7)) + : $((_$((_k + 28)) = 0xc6e00bf3)) + : $((_$((_k + 29)) = 0xd5a79147)) + : $((_$((_k + 30)) = 0x6ca6351)) + : $((_$((_k + 31)) = 0x14292967)) + : $((_$((_k + 32)) = 0x27b70a85)) + : $((_$((_k + 33)) = 0x2e1b2138)) + : $((_$((_k + 34)) = 0x4d2c6dfc)) + : $((_$((_k + 35)) = 0x53380d13)) + : $((_$((_k + 36)) = 0x650a7354)) + : $((_$((_k + 37)) = 0x766a0abb)) + : $((_$((_k + 38)) = 0x81c2c92e)) + : $((_$((_k + 39)) = 0x92722c85)) + : $((_$((_k + 40)) = 0xa2bfe8a1)) + : $((_$((_k + 41)) = 0xa81a664b)) + : $((_$((_k + 42)) = 0xc24b8b70)) + : $((_$((_k + 43)) = 0xc76c51a3)) + : $((_$((_k + 44)) = 0xd192e819)) + : $((_$((_k + 45)) = 0xd6990624)) + : $((_$((_k + 46)) = 0xf40e3585)) + : $((_$((_k + 47)) = 0x106aa070)) + : $((_$((_k + 48)) = 0x19a4c116)) + : $((_$((_k + 49)) = 0x1e376c08)) + : $((_$((_k + 50)) = 0x2748774c)) + : $((_$((_k + 51)) = 0x34b0bcb5)) + : $((_$((_k + 52)) = 0x391c0cb3)) + : $((_$((_k + 53)) = 0x4ed8aa4a)) + : $((_$((_k + 54)) = 0x5b9cca4f)) + : $((_$((_k + 55)) = 0x682e6ff3)) + : $((_$((_k + 56)) = 0x748f82ee)) + : $((_$((_k + 57)) = 0x78a5636f)) + : $((_$((_k + 58)) = 0x84c87814)) + : $((_$((_k + 59)) = 0x8cc70208)) + : $((_$((_k + 60)) = 0x90befffa)) + : $((_$((_k + 61)) = 0xa4506ceb)) + : $((_$((_k + 62)) = 0xbef9a3f7)) + : $((_$((_k + 63)) = 0xc67178f2)) } defarr _w 64 @@ -86,14 +86,14 @@ defarr _hash 8 defarr _temp 8 _sha256_init() { _nbits=0 - : $((_$((_hash + 0)) = 1779033703)) - : $((_$((_hash + 1)) = -1150833019)) - : $((_$((_hash + 2)) = 1013904242)) - : $((_$((_hash + 3)) = -1521486534)) - : $((_$((_hash + 4)) = 1359893119)) - : $((_$((_hash + 5)) = -1694144372)) - : $((_$((_hash + 6)) = 528734635)) - : $((_$((_hash + 7)) = 1541459225)) + : $((_$((_hash + 0)) = 0x6a09e667)) + : $((_$((_hash + 1)) = 0xbb67ae85)) + : $((_$((_hash + 2)) = 0x3c6ef372)) + : $((_$((_hash + 3)) = 0xa54ff53a)) + : $((_$((_hash + 4)) = 0x510e527f)) + : $((_$((_hash + 5)) = 0x9b05688c)) + : $((_$((_hash + 6)) = 0x1f83d9ab)) + : $((_$((_hash + 7)) = 0x5be0cd19)) } : $((t2 = ma = t1 = ch = i = s1 = s0 = b3 = b2 = b1 = b0 = bytes = 0)) @@ -101,18 +101,18 @@ _sha256_add_block() { let bytes $2 let b0; let b1; let b2; let b3; let s0; let s1; let i; let ch; let t1; let ma; let t2 i=0 while [ $i -lt 16 ]; do - b0=$((255 & _$((bytes + (i * 4))))) - b1=$((255 & _$((bytes + (i * 4) + 1)))) - b2=$((255 & _$((bytes + (i * 4) + 2)))) - b3=$((255 & _$((bytes + (i * 4) + 3)))) + b0=$((0xff & _$((bytes + (i * 4))))) + b1=$((0xff & _$((bytes + (i * 4) + 1)))) + b2=$((0xff & _$((bytes + (i * 4) + 2)))) + b3=$((0xff & _$((bytes + (i * 4) + 3)))) : $((_$((_w + i)) = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3)) : $((i += 1)) done i=16 while [ $i -lt 64 ]; do - s0=$(((((_$((_w + (i - 15))) >> 7) & (2147483647 >> (7 - 1))) | ((_$((_w + (i - 15))) << (32 - 7)) & -1)) ^ (((_$((_w + (i - 15))) >> 18) & (2147483647 >> (18 - 1))) | ((_$((_w + (i - 15))) << (32 - 18)) & -1)) ^ ((_$((_w + (i - 15))) >> 3) & 536870911))) - s1=$(((((_$((_w + (i - 2))) >> 17) & (2147483647 >> (17 - 1))) | ((_$((_w + (i - 2))) << (32 - 17)) & -1)) ^ (((_$((_w + (i - 2))) >> 19) & (2147483647 >> (19 - 1))) | ((_$((_w + (i - 2))) << (32 - 19)) & -1)) ^ ((_$((_w + (i - 2))) >> 10) & 4194303))) - : $((_$((_w + i)) = (_$((_w + (i - 16))) + s0 + _$((_w + (i - 7))) + s1) & -1)) + s0=$(((((_$((_w + (i - 15))) >> 7) & (0x7fffffff >> (7 - 1))) | ((_$((_w + (i - 15))) << (32 - 7)) & 0xffffffff)) ^ (((_$((_w + (i - 15))) >> 18) & (0x7fffffff >> (18 - 1))) | ((_$((_w + (i - 15))) << (32 - 18)) & 0xffffffff)) ^ ((_$((_w + (i - 15))) >> 3) & 0x1fffffff))) + s1=$(((((_$((_w + (i - 2))) >> 17) & (0x7fffffff >> (17 - 1))) | ((_$((_w + (i - 2))) << (32 - 17)) & 0xffffffff)) ^ (((_$((_w + (i - 2))) >> 19) & (0x7fffffff >> (19 - 1))) | ((_$((_w + (i - 2))) << (32 - 19)) & 0xffffffff)) ^ ((_$((_w + (i - 2))) >> 10) & 0x3fffff))) + : $((_$((_w + i)) = (_$((_w + (i - 16))) + s0 + _$((_w + (i - 7))) + s1) & 0xffffffff)) : $((i += 1)) done i=0 @@ -122,25 +122,25 @@ _sha256_add_block() { let bytes $2 done i=0 while [ $i -lt 64 ]; do - s1=$(((((_$((_temp + 4)) >> 6) & (2147483647 >> (6 - 1))) | ((_$((_temp + 4)) << (32 - 6)) & -1)) ^ (((_$((_temp + 4)) >> 11) & (2147483647 >> (11 - 1))) | ((_$((_temp + 4)) << (32 - 11)) & -1)) ^ (((_$((_temp + 4)) >> 25) & (2147483647 >> (25 - 1))) | ((_$((_temp + 4)) << (32 - 25)) & -1)))) + s1=$(((((_$((_temp + 4)) >> 6) & (0x7fffffff >> (6 - 1))) | ((_$((_temp + 4)) << (32 - 6)) & 0xffffffff)) ^ (((_$((_temp + 4)) >> 11) & (0x7fffffff >> (11 - 1))) | ((_$((_temp + 4)) << (32 - 11)) & 0xffffffff)) ^ (((_$((_temp + 4)) >> 25) & (0x7fffffff >> (25 - 1))) | ((_$((_temp + 4)) << (32 - 25)) & 0xffffffff)))) ch=$(((_$((_temp + 4)) & _$((_temp + 5))) ^ (~(_$((_temp + 4))) & _$((_temp + 6))))) - t1=$(((_$((_temp + 7)) + s1 + ch + _$((_k + i)) + _$((_w + i))) & -1)) - s0=$(((((_$((_temp + 0)) >> 2) & (2147483647 >> (2 - 1))) | ((_$((_temp + 0)) << (32 - 2)) & -1)) ^ (((_$((_temp + 0)) >> 13) & (2147483647 >> (13 - 1))) | ((_$((_temp + 0)) << (32 - 13)) & -1)) ^ (((_$((_temp + 0)) >> 22) & (2147483647 >> (22 - 1))) | ((_$((_temp + 0)) << (32 - 22)) & -1)))) + t1=$(((_$((_temp + 7)) + s1 + ch + _$((_k + i)) + _$((_w + i))) & 0xffffffff)) + s0=$(((((_$((_temp + 0)) >> 2) & (0x7fffffff >> (2 - 1))) | ((_$((_temp + 0)) << (32 - 2)) & 0xffffffff)) ^ (((_$((_temp + 0)) >> 13) & (0x7fffffff >> (13 - 1))) | ((_$((_temp + 0)) << (32 - 13)) & 0xffffffff)) ^ (((_$((_temp + 0)) >> 22) & (0x7fffffff >> (22 - 1))) | ((_$((_temp + 0)) << (32 - 22)) & 0xffffffff)))) ma=$(((_$((_temp + 0)) & _$((_temp + 1))) ^ (_$((_temp + 0)) & _$((_temp + 2))) ^ (_$((_temp + 1)) & _$((_temp + 2))))) - t2=$(((s0 + ma) & -1)) + t2=$(((s0 + ma) & 0xffffffff)) : $((_$((_temp + 7)) = _$((_temp + 6)))) : $((_$((_temp + 6)) = _$((_temp + 5)))) : $((_$((_temp + 5)) = _$((_temp + 4)))) - : $((_$((_temp + 4)) = (_$((_temp + 3)) + t1) & -1)) + : $((_$((_temp + 4)) = (_$((_temp + 3)) + t1) & 0xffffffff)) : $((_$((_temp + 3)) = _$((_temp + 2)))) : $((_$((_temp + 2)) = _$((_temp + 1)))) : $((_$((_temp + 1)) = _$((_temp + 0)))) - : $((_$((_temp + 0)) = (t1 + t2) & -1)) + : $((_$((_temp + 0)) = (t1 + t2) & 0xffffffff)) : $((i += 1)) done i=0 while [ $i -lt 8 ]; do - : $((_$((_hash + i)) = (_$((_hash + i)) + _$((_temp + i))) & -1)) + : $((_$((_hash + i)) = (_$((_hash + i)) + _$((_temp + i))) & 0xffffffff)) : $((i += 1)) done endlet $1 t2 ma t1 ch i s1 s0 b3 b2 b1 b0 bytes @@ -152,8 +152,8 @@ _hex() { let byte $2 let digits defstr __str_0 "0123456789abcdef" digits=$__str_0 - printf \\$(((_$((digits + (15 & (byte >> 4)))))/64))$(((_$((digits + (15 & (byte >> 4)))))/8%8))$(((_$((digits + (15 & (byte >> 4)))))%8)) - printf \\$(((_$((digits + (15 & byte))))/64))$(((_$((digits + (15 & byte))))/8%8))$(((_$((digits + (15 & byte))))%8)) + printf \\$(((_$((digits + (0xf & (byte >> 4)))))/64))$(((_$((digits + (0xf & (byte >> 4)))))/8%8))$(((_$((digits + (0xf & (byte >> 4)))))%8)) + printf \\$(((_$((digits + (0xf & byte))))/64))$(((_$((digits + (0xf & byte))))/8%8))$(((_$((digits + (0xf & byte))))%8)) endlet $1 digits byte } @@ -173,7 +173,7 @@ _process_file() { let filename $2 fi : $((_nbits += (8 * n))) if [ $n -lt 64 ] ; then - : $((_$((_buf + n)) = 128)) + : $((_$((_buf + n)) = 0x80)) i=$((n + 1)) while [ $i -lt 64 ]; do : $((_$((_buf + i)) = 0)) @@ -189,7 +189,7 @@ _process_file() { let filename $2 fi i=1 while [ $i -le 8 ]; do - : $((_$((_buf + 64 - i)) = 255 & _nbits)) + : $((_$((_buf + 64 - i)) = 0xff & _nbits)) : $((_nbits >>= 8)) : $((i += 1)) done From a4bf9647d28715a2e78a38a443d341f8b3a6ecc8 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 22:23:58 -0500 Subject: [PATCH 76/89] Fix warning --- sh.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sh.c b/sh.c index e0664b85..0f9ee262 100644 --- a/sh.c +++ b/sh.c @@ -110,6 +110,7 @@ text wrap_integer(int multiply, int obj) { return wrap_int_oct(multiply * -get_val_(INTEGER_OCT, obj)); default: fatal_error("wrap_integer: unknown integer type"); + return 0; } } From 1e44bce1e36519b9e1149dd34deda58456c2ae2a Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 22:34:43 -0500 Subject: [PATCH 77/89] Fix hex/oct literals for enum --- pnut.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pnut.c b/pnut.c index aa3af315..6436a093 100644 --- a/pnut.c +++ b/pnut.c @@ -2433,12 +2433,12 @@ ast parse_enum() { // a new node if the value is not already a literal. We use the last // literal type to determine which type to use when creating a new node. value = non_parenthesized_operand(value); - if (get_op(value) != INTEGER || get_op(value) != INTEGER_HEX || get_op(value) != INTEGER_OCT) { + if (get_op(value) != INTEGER && get_op(value) != INTEGER_HEX && get_op(value) != INTEGER_OCT) { value = new_ast0(last_literal_type, -eval_constant(value, false)); } last_literal_type = get_op(value); #else - value = new_ast0(INTEGER, -eval_constant(value, false)); + value = new_ast0(last_literal_type, -eval_constant(value, false)); #endif next_value = get_val(value) - 1; // Next value is the current value + 1, but val is negative } else { From 2b99f4b84cc6f64e1ab4432b421b3efa3970f3fc Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 22:48:19 -0500 Subject: [PATCH 78/89] Convert oct/hex literals to decimal outside $(( )) In the generated shell code we often assume that numbers have a canonical representation, for example when comparing numbers or when matching on a case constant in a switch statement. By wrapping the octal and hexadecimal literals in $(( )), we prevent them from "escaping" into variables and in contexts where decimal numbers are expected. --- examples/compiled/c4.sh | 4 ++-- examples/compiled/repl.sh | 2 +- pnut.c | 5 +++-- sh.c | 15 +++++++++++---- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/examples/compiled/c4.sh b/examples/compiled/c4.sh index 51389321..7649fe68 100755 --- a/examples/compiled/c4.sh +++ b/examples/compiled/c4.sh @@ -337,7 +337,7 @@ _expr() { # lev: $2 while [ $_tk = $__DQUOTE__ ]; do _next __ done - _data=$(((_data + 1) & -(1))) + _data=$(((_data + 1) & -1)) _ty=$_PTR elif [ $_tk = $_Sizeof ] ; then _next __ @@ -483,7 +483,7 @@ _expr() { # lev: $2 _next __ : $((_$((_e += 1)) = _IMM)) if [ $_tk = $_Num ] ; then - : $((_$((_e += 1)) = -(_ival))) + : $((_$((_e += 1)) = -_ival)) _next __ else : $((_$((_e += 1)) = -1)) diff --git a/examples/compiled/repl.sh b/examples/compiled/repl.sh index bd580762..4d796821 100755 --- a/examples/compiled/repl.sh +++ b/examples/compiled/repl.sh @@ -747,7 +747,7 @@ _prim() { # no: $2 _pop y _pop x if [ $((y >> 1)) -lt 0 ] ; then - _push2 __ $(((-(((x >> 1) / -((y >> 1)))) << 1) | 1)) $(((0 << 1) | 1)) + _push2 __ $(((-((x >> 1) / -(y >> 1)) << 1) | 1)) $(((0 << 1) | 1)) else _push2 __ $(((((x >> 1) / (y >> 1)) << 1) | 1)) $(((0 << 1) | 1)) fi diff --git a/pnut.c b/pnut.c index 6436a093..4d4554e7 100644 --- a/pnut.c +++ b/pnut.c @@ -1978,10 +1978,11 @@ void get_tok() { syntax_error("invalid hex integer -- it must have at least one digit"); } } else { + while (accum_digit(8)); #ifdef sh - tok = INTEGER_OCT; + // 0 is a valid octal number, but we don't want to mark it as octal since it's so common + tok = val == 0 ? INTEGER : INTEGER_OCT; #endif - while (accum_digit(8)); } } else { while (accum_digit(10)); diff --git a/sh.c b/sh.c index 0f9ee262..bdb17892 100644 --- a/sh.c +++ b/sh.c @@ -1103,9 +1103,12 @@ text comp_initializer_list(ast initializer_list, int expected_len) { element = car(initializer_list); switch (get_op(element)) { case INTEGER: + args = concatenate_strings_with(args, wrap_int(-get_val_(INTEGER, element)), wrap_char(' ')); + break; case INTEGER_HEX: case INTEGER_OCT: - args = concatenate_strings_with(args, wrap_integer(1, element), wrap_char(' ')); + // We need to wrap in $(( ... )) to make sure the number is converted to base 10 when stored in a variable. + args = concatenate_strings_with(args, string_concat3(wrap_str_lit("$(("), wrap_integer(1, element), wrap_str_lit("))")), wrap_char(' ')); break; case CHARACTER: // TODO: Character identifiers are only defined at the end of the script, so we can't use them here @@ -1205,9 +1208,13 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) if (nb_children >= 4) { child3 = get_child(node, 3); } if (nb_children == 0) { - if (op == INTEGER || op == INTEGER_HEX || op == INTEGER_OCT) { - return wrap_in_condition_if_needed(context, test_side_effects, wrap_integer(1, node)); - } else if (op == CHARACTER) { + if (op == INTEGER) { + return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(-get_val_(INTEGER, node))); + } else if (op == INTEGER_HEX || op == INTEGER_OCT) { + // We need to wrap in $(( ... )) to make sure the number is converted to base 10 when stored in a variable. + return wrap_if_needed(false, context, test_side_effects, wrap_integer(1, node), outer_op, op); + } + else if (op == CHARACTER) { #ifdef SH_INLINE_CHAR_LITERAL return wrap_in_condition_if_needed(context, test_side_effects, wrap_int(get_val_(CHARACTER, node))); #else From 1e81b93c36e0a4a63846ee711b083b0e180d7781 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 22:55:09 -0500 Subject: [PATCH 79/89] Enable with PARSE_NUMERIC_LITERAL_WITH_BASE option --- pnut.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pnut.c b/pnut.c index 4d4554e7..7904a267 100644 --- a/pnut.c +++ b/pnut.c @@ -88,6 +88,12 @@ #undef OPTIMIZE_LONG_LINES #endif +// Toggles parsing literals with their base (octal, decimal or hexadecimal). +// This is used by the shell code generator to output the literal in the correct base. +#ifdef sh +#define PARSE_NUMERIC_LITERAL_WITH_BASE +#endif + // Options that turns Pnut into a C preprocessor or some variant of it // DEBUG_GETCHAR: Read and print the input character by character. // DEBUG_CPP: Run preprocessor like gcc -E. This can be useful for debugging the preprocessor. @@ -162,7 +168,7 @@ enum { // Non-character operands INTEGER = 401, // Integer written in decimal -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE INTEGER_HEX = 402, // Integer written in hexadecimal INTEGER_OCT = 403, // Integer written in octal #endif @@ -1138,7 +1144,7 @@ int eval_constant(ast expr, bool if_macro) { switch (op) { case PARENS: return eval_constant(child0, if_macro); case INTEGER: -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE case INTEGER_HEX: case INTEGER_OCT: #endif @@ -1790,7 +1796,7 @@ void paste_tokens(int left_tok, int left_val) { if (right_tok == IDENTIFIER || right_tok == TYPE || right_tok == MACRO || right_tok <= WHILE_KW) { accum_string_string(right_val); } else if (right_tok == INTEGER -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE || right_tok == INTEGER_HEX || right_tok == INTEGER_OCT #endif ) { @@ -1805,12 +1811,12 @@ void paste_tokens(int left_tok, int left_val) { val = end_ident(); tok = heap[val+2]; // The kind of the identifier } else if (left_tok == INTEGER -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE || left_tok == INTEGER_HEX || left_tok == INTEGER_OCT #endif ) { if (right_tok == INTEGER -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE || right_tok == INTEGER_HEX || right_tok == INTEGER_OCT #endif ) { @@ -1967,7 +1973,7 @@ void get_tok() { tok = INTEGER; if (val == 0) { // val == 0 <=> ch == '0' if (ch == 'x' || ch == 'X') { -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE tok = INTEGER_HEX; #endif get_ch(); @@ -1979,7 +1985,7 @@ void get_tok() { } } else { while (accum_digit(8)); -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE // 0 is a valid octal number, but we don't want to mark it as octal since it's so common tok = val == 0 ? INTEGER : INTEGER_OCT; #endif @@ -2429,7 +2435,7 @@ ast parse_enum() { value = parse_assignment_expression(); if (value == 0) parse_error("Enum value must be a constant expression", tok); -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE // Preserve the type of integer literals (dec/hex/oct) by only creating // a new node if the value is not already a literal. We use the last // literal type to determine which type to use when creating a new node. @@ -3018,7 +3024,7 @@ ast parse_primary_expression() { ast tail; if (tok == IDENTIFIER || tok == CHARACTER || tok == INTEGER -#ifdef sh +#ifdef PARSE_NUMERIC_LITERAL_WITH_BASE || tok == INTEGER_HEX || tok == INTEGER_OCT #endif ) { From 289959c9a16f6e9257589e16832a56c6fe0a37a7 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 23:03:22 -0500 Subject: [PATCH 80/89] Remove redundant parens for -/~/! unary ops Expressions are responsible for adding parenthesis around themselves if the outer operation has a higher precedence than the inner operation. --- examples/compiled/sha256sum.sh | 2 +- sh.c | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/examples/compiled/sha256sum.sh b/examples/compiled/sha256sum.sh index 2319c660..0055589c 100755 --- a/examples/compiled/sha256sum.sh +++ b/examples/compiled/sha256sum.sh @@ -123,7 +123,7 @@ _sha256_add_block() { let bytes $2 i=0 while [ $i -lt 64 ]; do s1=$(((((_$((_temp + 4)) >> 6) & (0x7fffffff >> (6 - 1))) | ((_$((_temp + 4)) << (32 - 6)) & 0xffffffff)) ^ (((_$((_temp + 4)) >> 11) & (0x7fffffff >> (11 - 1))) | ((_$((_temp + 4)) << (32 - 11)) & 0xffffffff)) ^ (((_$((_temp + 4)) >> 25) & (0x7fffffff >> (25 - 1))) | ((_$((_temp + 4)) << (32 - 25)) & 0xffffffff)))) - ch=$(((_$((_temp + 4)) & _$((_temp + 5))) ^ (~(_$((_temp + 4))) & _$((_temp + 6))))) + ch=$(((_$((_temp + 4)) & _$((_temp + 5))) ^ (~_$((_temp + 4)) & _$((_temp + 6))))) t1=$(((_$((_temp + 7)) + s1 + ch + _$((_k + i)) + _$((_w + i))) & 0xffffffff)) s0=$(((((_$((_temp + 0)) >> 2) & (0x7fffffff >> (2 - 1))) | ((_$((_temp + 0)) << (32 - 2)) & 0xffffffff)) ^ (((_$((_temp + 0)) >> 13) & (0x7fffffff >> (13 - 1))) | ((_$((_temp + 0)) << (32 - 13)) & 0xffffffff)) ^ (((_$((_temp + 0)) >> 22) & (0x7fffffff >> (22 - 1))) | ((_$((_temp + 0)) << (32 - 22)) & 0xffffffff)))) ma=$(((_$((_temp + 0)) & _$((_temp + 1))) ^ (_$((_temp + 0)) & _$((_temp + 2))) ^ (_$((_temp + 1)) & _$((_temp + 2))))) diff --git a/sh.c b/sh.c index bdb17892..85ddcdd2 100644 --- a/sh.c +++ b/sh.c @@ -1242,21 +1242,13 @@ text comp_rvalue_go(ast node, int context, ast test_side_effects, int outer_op) } else if (op == '+' || op == PARENS) { // +x is equivalent to x return comp_rvalue_go(child0, context, test_side_effects, outer_op); - } else if (op == '-') { - // Check if the rest of ast is a literal, if so directly return the negated value. - // Note: I think this can be simplified by not wrapped in () in the else case. + } else if (op == '-' || op == '~' || op == '!') { if (get_op(child0) == INTEGER || op == INTEGER_HEX || op == INTEGER_OCT) { return wrap_in_condition_if_needed(context, test_side_effects, wrap_integer(-1, child0)); } else { sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); - return wrap_if_needed(false, context, test_side_effects, string_concat3(wrap_str_lit("-("), sub1, wrap_char(')')), outer_op, op); + return wrap_if_needed(false, context, test_side_effects, string_concat(wrap_char(op), sub1), outer_op, op); } - } else if (op == '~') { - sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); - return wrap_if_needed(false, context, test_side_effects, string_concat3(wrap_str_lit("~("), sub1, wrap_char(')')), outer_op, op); - } else if (op == '!') { - sub1 = comp_rvalue_go(child0, RVALUE_CTX_ARITH_EXPANSION, 0, op); - return wrap_if_needed(true, context, test_side_effects, string_concat(wrap_char('!'), sub1), outer_op, op); } else if (op == MINUS_MINUS_PRE) { sub1 = comp_lvalue(child0); return wrap_if_needed(true, context, test_side_effects, string_concat(sub1, wrap_str_lit(" -= 1")), outer_op, op); From 50878b0435caa8458b6758b1700bbc150615b650 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 23:13:37 -0500 Subject: [PATCH 81/89] Fix puthex_unsigned when int is wider than 32 bits --- sh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sh.c b/sh.c index 85ddcdd2..5cb9cc76 100644 --- a/sh.c +++ b/sh.c @@ -209,13 +209,13 @@ text concatenate_strings_with(text t1, text t2, text sep) { // Output unsigned integer in hex void puthex_unsigned(int n) { // Because n is signed, we clear the upper bits after shifting in case n was negative - if (n & ~15) puthex_unsigned((n >> 4) & 0x0fffffff); + if ((n >> 4) & 0x0fffffff) puthex_unsigned((n >> 4) & 0x0fffffff); putchar("0123456789abcdef"[n & 15]); } void putoct_unsigned(int n) { // Because n is signed, we clear the upper bits after shifting in case n was negative - if (n & ~7) putoct_unsigned((n >> 3) & 0x1fffffff); + if ((n >> 3) & 0x1fffffff) putoct_unsigned((n >> 3) & 0x1fffffff); putchar('0' + (n & 7)); } From 7be496502cc525384d71370be574fecd76f0de16 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 14 Feb 2025 15:16:49 -0500 Subject: [PATCH 82/89] Support %l format specifier in inlined printf --- sh-runtime.c | 1 + sh.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sh-runtime.c b/sh-runtime.c index a7d4875b..b52f7c6d 100644 --- a/sh-runtime.c +++ b/sh-runtime.c @@ -636,6 +636,7 @@ DEPENDS_ON(put_pstr) putstr(" printf \"%%\"\n"); putstr(" printf_reset\n"); putstr(" ;;\n"); + // TODO: Support %l format specifier putstr(" 'd'|'i'|'o'|'u'|'x'|'X')\n"); putstr(" printf \"%${__flags}${__width}${__precision}${__head_char}\" $1\n"); putstr(" shift\n"); diff --git a/sh.c b/sh.c index 5cb9cc76..341d6da9 100644 --- a/sh.c +++ b/sh.c @@ -1638,7 +1638,15 @@ void handle_printf_call(char *format_str, ast params) { break; // The following options are the same between the shell's printf and C's printf - case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': + case 'l': case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': + if (*format_str == 'l') { + while (*format_str == 'l') format_str += 1; // Skip the 'l' for long + if (*format_str != 'd' && *format_str != 'i' && *format_str != 'o' && *format_str != 'u' && *format_str != 'x' && *format_str != 'X') { + printf("*format_str=%c%c\n", *(format_str - 1), *format_str); + fatal_error("Invalid printf format: Unsupported long specifier"); + } + } + if (param == 0) fatal_error("Not enough parameters for printf"); params_text = concatenate_strings_with(params_text, width_text, wrap_char(' ')); // Add width param if needed params_text = concatenate_strings_with(params_text, precision_text, wrap_char(' ')); // Add precision param if needed From 6c04fec9386168e4cf61591d2fe2e82c2a6c7908 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 14 Feb 2025 20:14:21 -0500 Subject: [PATCH 83/89] Always track current file in fp_filepath --- pnut.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pnut.c b/pnut.c index 7904a267..a272a3a2 100644 --- a/pnut.c +++ b/pnut.c @@ -119,8 +119,8 @@ struct IncludeStack { FILE* fp; struct IncludeStack *next; char *dirname; // The base path of the file, used to resolve relative paths -#ifdef INCLUDE_LINE_NUMBER_ON_ERROR char *filepath; // The path of the file, used to print error messages +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR int line_number; int column_number; #endif @@ -745,8 +745,8 @@ void get_ch() { include_stack2 = include_stack; include_stack = include_stack->next; fp = include_stack->fp; -#ifdef INCLUDE_LINE_NUMBER_ON_ERROR fp_filepath = include_stack->filepath; +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR line_number = include_stack->line_number; column_number = include_stack->column_number; #endif @@ -856,8 +856,8 @@ void include_file(char *file_name, char *relative_to) { include_stack2->next = include_stack; include_stack2->fp = fp; include_stack2->dirname = file_parent_directory(fp_filepath); -#ifdef INCLUDE_LINE_NUMBER_ON_ERROR include_stack2->filepath = fp_filepath; +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR include_stack2->line_number = 1; include_stack2->column_number = 0; // Save the current file position so we can return to it after the included file is done From 19b47934727a4c3aee017b91ff858025224ea11c Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 16 Feb 2025 10:19:05 -0500 Subject: [PATCH 84/89] Add option to parse 64 bits literals This feature is controlled by the SUPPORT_64_BIT_LITERALS option, and will be enabled for pnut-exe. --- pnut.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 9 deletions(-) diff --git a/pnut.c b/pnut.c index a272a3a2..8d810803 100644 --- a/pnut.c +++ b/pnut.c @@ -871,6 +871,61 @@ void include_file(char *file_name, char *relative_to) { include_stack = include_stack2; } +#ifdef SUPPORT_64_BIT_LITERALS +// Array used to accumulate 64 bit unsigned integers on 32 bit systems +int val_32[2]; + +// x = x * y +void u64_mul_u32(int *x, int y) { + + // Note, because we are using 32 bit **signed** integers, we need to clear the + // sign bit when shifting right to avoid sign extension. + #define I32_LOGICAL_RSHIFT_16(x) ((x >> 16) & 0xffff) + + int xlo = x[0] & 0xffff; + int xhi = I32_LOGICAL_RSHIFT_16(x[0]); + int ylo = y & 0xffff; + int yhi = I32_LOGICAL_RSHIFT_16(y); + int lo = xlo * ylo; /* 0 .. 0xfffe0001 */ + int m1 = xlo * yhi + (lo >> 16); /* 0 .. 0xfffeffff */ + int m2 = xhi * ylo; /* 0 .. 0xfffe0001 */ + int m3 = (m1 & 0xffff) + (m2 & 0xffff); /* 0 .. 0x1fffe */ + int hi = xhi * yhi + I32_LOGICAL_RSHIFT_16(m1) + I32_LOGICAL_RSHIFT_16(m2) + I32_LOGICAL_RSHIFT_16(m3); /* 0 .. 0xfffffffe */ + x[0] = ((m3 & 0xffff) << 16) + (lo & 0xffff); + x[1] = x[1] * y + hi; +} + +// x = x + y +void u64_add_u32(int *x, int y) { + int a = x[0]; int b = x[1]; + int lo = x[0] + y; + // Carry (using signed integers) + x[1] += (x[0] < 0 != lo < 0); + x[0] = lo; +} + +// Pack a 64 bit unsigned integer into an object. +// Because most integers are small and we want to save memory, we only store the +// large int object ("large ints") if it is larger than 31 bits. Otherwise, we +// store it as a regular integer. The sign bit is used to distinguish between +// large ints (positive) and regular ints (negative). +void u64_to_obj(int *x) { + if (x[0] >= 0 && x[1] == 0) { // "small int" + val = -x[0]; + } else { + val = alloc_obj(2); + heap[val ] = x[0]; + heap[val + 1] = x[1]; + } +} + +#define DIGIT_BYTE (val_32[0] % 256) +#define INIT_ACCUM_DIGIT() val_32[0] = 0; val_32[1] = 0; +#else +#define DIGIT_BYTE (-val % 256) +#define INIT_ACCUM_DIGIT() val = 0; +#endif + int accum_digit(int base) { int digit = 99; if ('0' <= ch && ch <= '9') { @@ -888,7 +943,12 @@ int accum_digit(int base) { // fatal_error("literal integer overflow"); // } +#ifdef SUPPORT_64_BIT_LITERALS + u64_mul_u32(val_32, base); + u64_add_u32(val_32, digit); +#else val = val * base - digit; +#endif get_ch(); return 1; } @@ -904,21 +964,21 @@ void get_string_char() { // Parse octal character, up to 3 digits. // Note that \1111 is parsed as '\111' followed by '1' // See https://en.wikipedia.org/wiki/Escape_sequences_in_C#Notes - val = 0; + INIT_ACCUM_DIGIT(); accum_digit(8); accum_digit(8); accum_digit(8); - val = -(val % 256); // keep low 8 bits, without overflowing + val = DIGIT_BYTE; // keep low 8 bits, without overflowing } else if (ch == 'x' || ch == 'X') { get_ch(); - val = 0; + INIT_ACCUM_DIGIT(); // Allow 1 or 2 hex digits. if (accum_digit(16)) { accum_digit(16); } else { syntax_error("invalid hex escape -- it must have at least one digit"); } - val = -(val % 256); // keep low 8 bits, without overflowing + val = DIGIT_BYTE; // keep low 8 bits, without overflowing } else { if (ch == 'a') { val = 7; @@ -1966,18 +2026,16 @@ void get_tok() { break; } else if ('0' <= ch && ch <= '9') { - val = '0' - ch; - - get_ch(); + INIT_ACCUM_DIGIT(); tok = INTEGER; - if (val == 0) { // val == 0 <=> ch == '0' + if (ch == '0') { // val == 0 <=> ch == '0' + get_ch(); if (ch == 'x' || ch == 'X') { #ifdef PARSE_NUMERIC_LITERAL_WITH_BASE tok = INTEGER_HEX; #endif get_ch(); - val = 0; if (accum_digit(16)) { while (accum_digit(16)); } else { @@ -1987,13 +2045,21 @@ void get_tok() { while (accum_digit(8)); #ifdef PARSE_NUMERIC_LITERAL_WITH_BASE // 0 is a valid octal number, but we don't want to mark it as octal since it's so common +#ifdef SUPPORT_64_BIT_LITERALS + tok = val_32[0] == 0 && val_32[1] == 0 ? INTEGER : INTEGER_OCT; +#else tok = val == 0 ? INTEGER : INTEGER_OCT; +#endif #endif } } else { while (accum_digit(10)); } +#ifdef SUPPORT_64_BIT_LITERALS + u64_to_obj(val_32); +#endif + break; From 4165bb3ad21d919d8628c9732980cc1908a883b3 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sat, 15 Feb 2025 11:00:21 -0500 Subject: [PATCH 85/89] Add checks in code that only support small ints Token pasting, constant expression evaluation and enum values are limited to small integers when SUPPORT_64_BIT_LITERALS is enabled. --- pnut.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pnut.c b/pnut.c index 8d810803..a1cf9e2e 100644 --- a/pnut.c +++ b/pnut.c @@ -567,11 +567,17 @@ void accum_string_string(int string_probe) { } // Similar to accum_string_string, but writes an integer to the string pool +// Note that this function only supports small integers, represented as positive number. void accum_string_integer(int n) { +#ifdef SUPPORT_64_BIT_LITERALS + if (n < 0) fatal_error("accum_string_integer: Only small integers can be pasted"); +#else if (n < 0) { accum_string_char('-'); accum_string_integer(-n); - } else { + } else +#endif + { if (n > 9) accum_string_integer(n / 10); accum_string_char('0' + n % 10); } @@ -1207,6 +1213,10 @@ int eval_constant(ast expr, bool if_macro) { #ifdef PARSE_NUMERIC_LITERAL_WITH_BASE case INTEGER_HEX: case INTEGER_OCT: +#endif +#ifdef SUPPORT_64_BIT_LITERALS + // Disable large integers for now, hopefully they don't appear in TCC in enums and #if expressions + if (get_val(expr) > 0) fatal_error("constant expression too large"); #endif return -get_val(expr); case CHARACTER: return get_val_(CHARACTER, expr); @@ -1817,7 +1827,12 @@ void stringify() { } } +// Concatenates two non-negative integers into a single integer +// Note that this function only supports small integers, represented as positive integers. int paste_integers(int left_val, int right_val) { +#ifdef SUPPORT_64_BIT_LITERALS + if (left_val < 0 || right_val < 0) fatal_error("Only small integers can be pasted"); +#endif int result = left_val; int right_digits = right_val; while (right_digits > 0) { @@ -2511,7 +2526,7 @@ ast parse_enum() { } last_literal_type = get_op(value); #else - value = new_ast0(last_literal_type, -eval_constant(value, false)); + value = new_ast0(last_literal_type, -eval_constant(value, false)); // negative value to indicate it's a small integer #endif next_value = get_val(value) - 1; // Next value is the current value + 1, but val is negative } else { From 392369afd4c7873fc488def64164fa5dc6afd710 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 16 Feb 2025 12:01:29 -0500 Subject: [PATCH 86/89] Support large 64-bit immediates in 64-bit pnut-exe --- exe.c | 36 +++++++++++++++++++++++++++++------- pnut.c | 11 +++++++++++ x86.c | 27 ++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/exe.c b/exe.c index 523abbcb..f57c54eb 100644 --- a/exe.c +++ b/exe.c @@ -38,16 +38,27 @@ void emit_i64_le(int n) { emit_i32_le(n >> 31); } -void emit_word_le(int n) { - if (word_size == 4) { - emit_i32_le(n); - } else if (word_size == 8) { - emit_i64_le(n); +#ifdef SUPPORT_64_BIT_LITERALS +void emit_i32_le_large_imm(int imm_obj) { + if (imm_obj <= 0) { + emit_i32_le(-imm_obj); } else { - fatal_error("emit_word_le: unknown word size"); + // Check that the number doesn't overflow 64 bits + if (heap[imm_obj + 1] != 0) fatal_error("emit_i32_le_large_imm: integer overflow"); + emit_i32_le(heap[imm_obj]); } } +void emit_i64_le_large_imm(int imm_obj) { + if (imm_obj <= 0) { + emit_i64_le(-imm_obj); + } else { + emit_i32_le(heap[imm_obj]); + emit_i32_le(heap[imm_obj + 1]); + } +} +#endif + void write_i8(int n) { putchar(n & 0xff); } @@ -82,7 +93,10 @@ const int reg_Z; const int reg_SP; const int reg_glo; -void mov_reg_imm(int dst, int imm); +void mov_reg_imm(int dst, int imm); // Move 32 bit immediate to register +#ifdef SUPPORT_64_BIT_LITERALS +void mov_reg_large_imm(int dst, int large_imm); // Move large immediate to register +#endif void mov_reg_reg(int dst, int src); void mov_mem_reg(int base, int offset, int src); void mov_mem8_reg(int base, int offset, int src); @@ -1215,7 +1229,11 @@ void codegen_rvalue(ast node) { if (nb_children == 0) { if (op == INTEGER) { +#ifdef SUPPORT_64_BIT_LITERALS + mov_reg_large_imm(reg_X, get_val_(INTEGER, node)); +#else mov_reg_imm(reg_X, -get_val_(INTEGER, node)); +#endif push_reg(reg_X); } else if (op == CHARACTER) { mov_reg_imm(reg_X, get_val_(CHARACTER, node)); @@ -1253,7 +1271,11 @@ void codegen_rvalue(ast node) { push_reg(reg_X); break; case BINDING_ENUM_CST: +#ifdef SUPPORT_64_BIT_LITERALS + mov_reg_large_imm(reg_X, get_val_(INTEGER, heap[binding+3])); +#else mov_reg_imm(reg_X, -get_val_(INTEGER, heap[binding+3])); +#endif push_reg(reg_X); break; diff --git a/pnut.c b/pnut.c index a1cf9e2e..57a4a13b 100644 --- a/pnut.c +++ b/pnut.c @@ -94,6 +94,11 @@ #define PARSE_NUMERIC_LITERAL_WITH_BASE #endif +// 64 bit literals are only supported on 64 bit platforms for now +#if defined(target_x86_64_linux) || defined(target_x86_64_mac) +#define SUPPORT_64_BIT_LITERALS +#endif + // Options that turns Pnut into a C preprocessor or some variant of it // DEBUG_GETCHAR: Read and print the input character by character. // DEBUG_CPP: Run preprocessor like gcc -E. This can be useful for debugging the preprocessor. @@ -1612,12 +1617,18 @@ void init_pnut_macros() { #if defined(sh) init_ident(MACRO, "PNUT_SH"); #elif defined(target_i386_linux) + init_ident(MACRO, "PNUT_EXE"); + init_ident(MACRO, "PNUT_EXE_32"); init_ident(MACRO, "PNUT_I386"); init_ident(MACRO, "PNUT_I386_LINUX"); #elif defined (target_x86_64_linux) + init_ident(MACRO, "PNUT_EXE"); + init_ident(MACRO, "PNUT_EXE_64"); init_ident(MACRO, "PNUT_X86_64"); init_ident(MACRO, "PNUT_X86_64_LINUX"); #elif defined (target_x86_64_mac) + init_ident(MACRO, "PNUT_EXE"); + init_ident(MACRO, "PNUT_EXE_64"); init_ident(MACRO, "PNUT_X86_64"); init_ident(MACRO, "PNUT_X86_64_MAC"); #endif diff --git a/x86.c b/x86.c index 8defa2d1..53273098 100644 --- a/x86.c +++ b/x86.c @@ -152,9 +152,34 @@ void mov_reg_imm(int dst, int imm) { rex_prefix(0, dst); emit_i8(0xb8 + (dst & 7)); - emit_word_le(imm); + if (word_size == 4) { + emit_i32_le(imm); + } else if (word_size == 8) { + emit_i64_le(imm); + } else { + fatal_error("mov_reg_imm: unknown word size"); + } } +#ifdef SUPPORT_64_BIT_LITERALS +void mov_reg_large_imm(int dst, int large_imm) { + + // MOV dst_reg, large_imm ;; Move 32 bit or 64 bit immediate value to register + // See: https://web.archive.org/web/20240407051903/https://www.felixcloutier.com/x86/mov + + rex_prefix(0, dst); + emit_i8(0xb8 + (dst & 7)); + + if (word_size == 4) { + emit_i32_le_large_imm(large_imm); + } else if (word_size == 8) { + emit_i64_le_large_imm(large_imm); + } else { + fatal_error("mov_reg_large_imm: unknown word size"); + } +} +#endif + void add_reg_imm(int dst, int imm) { // ADD dst_reg, imm ;; Add 32 bit immediate value to register From 4d78d6b6d9187a556cc2fb3ed9dc35af36ee8059 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Sun, 16 Feb 2025 12:09:26 -0500 Subject: [PATCH 87/89] Fix warnings --- pnut.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pnut.c b/pnut.c index 57a4a13b..9ed90e67 100644 --- a/pnut.c +++ b/pnut.c @@ -908,10 +908,9 @@ void u64_mul_u32(int *x, int y) { // x = x + y void u64_add_u32(int *x, int y) { - int a = x[0]; int b = x[1]; int lo = x[0] + y; // Carry (using signed integers) - x[1] += (x[0] < 0 != lo < 0); + x[1] += ((x[0] < 0) != (lo < 0)); x[0] = lo; } From 9df164378a1461e0c94d6b2e02656bed6eefc3fe Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 18 Feb 2025 09:33:13 -0500 Subject: [PATCH 88/89] Fix param list parser for void functions --- pnut.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pnut.c b/pnut.c index 9ed90e67..3b7418f1 100644 --- a/pnut.c +++ b/pnut.c @@ -2783,7 +2783,7 @@ int parse_param_list() { while (tok != ')' && tok != EOF) { if (is_type_starter(tok)) { decl = parse_declarator(true, parse_declaration_specifiers()); - if (get_op(decl) == VOID_KW) { + if (get_op(get_child_(DECL, decl, 1)) == VOID_KW) { if (tok != ')' || result != 0) parse_error("void must be the only parameter", tok); break; } From 2190ae3eb62a0f3be8c351701faf776ea0beba09 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 18 Feb 2025 09:34:23 -0500 Subject: [PATCH 89/89] Add test for void functions --- tests/_all/void_functions.c | 12 ++++++++++++ tests/_all/void_functions.golden | 1 + 2 files changed, 13 insertions(+) create mode 100644 tests/_all/void_functions.c create mode 100644 tests/_all/void_functions.golden diff --git a/tests/_all/void_functions.c b/tests/_all/void_functions.c new file mode 100644 index 00000000..4599b45b --- /dev/null +++ b/tests/_all/void_functions.c @@ -0,0 +1,12 @@ +#include + +int test(void); // Forward declaration + +int test(void) { + putchar('T'); putchar('\n'); + return 0; +} + +int main(void) { + return test(); +} diff --git a/tests/_all/void_functions.golden b/tests/_all/void_functions.golden new file mode 100644 index 00000000..62a6e3c9 --- /dev/null +++ b/tests/_all/void_functions.golden @@ -0,0 +1 @@ +T