From 23bb340373e88e6ee0136ecfa27cdc7cf3ce475a Mon Sep 17 00:00:00 2001 From: Kyle Lin Date: Fri, 6 Dec 2024 18:45:08 +0800 Subject: [PATCH] Support pointer data types for sizeof operator - Refactor test driver to make it capable of running different stage based on supplied stage number. - Refactor make rule "check" for checking stage 0 and stage 2. - Replace constant value 4 with PTR_SIZE to give appropriate corresponding target arch's pointer size. - Add sizeof test. - Introduce __SIZE_OF_PTR__ macro in lib/c.c. - Fix arm32 div/mod, this was caused by inproper stack alignment previously calculated in reg_alloc.c. - Fix rv32 div/mod, this was caused by missing approximating value checking. --- Makefile | 12 ++++++++++-- README.md | 12 +++++++++++- lib/c.c | 2 ++ src/arm-codegen.c | 10 +++++++--- src/parser.c | 9 ++++++++- src/reg-alloc.c | 23 +++++++++++++++++----- src/riscv-codegen.c | 9 +++++---- tests/driver.sh | 47 +++++++++++++++++++++++++++++++++++++++++++-- 8 files changed, 106 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index a864783a..93afc674 100644 --- a/Makefile +++ b/Makefile @@ -52,10 +52,18 @@ $(OUT)/tests/%.elf: tests/%.c $(OUT)/$(STAGE0) chmod +x $@ ; $(PRINTF) "Running $@ ...\n" $(Q)$(TARGET_EXEC) $@ && $(call pass) -check: $(TESTBINS) tests/driver.sh - tests/driver.sh +check: check-stage0 check-stage2 + +check-stage0: $(OUT)/$(STAGE0) $(TESTBINS) tests/driver.sh + $(VECHO) " TEST STAGE 0\n" + tests/driver.sh 0 + +check-stage2: $(OUT)/$(STAGE2) $(TESTBINS) tests/driver.sh + $(VECHO) " TEST STAGE 2\n" + tests/driver.sh 2 check-snapshots: $(OUT)/$(STAGE0) $(SNAPSHOTS) tests/check-snapshots.sh + $(VECHO) " TEST SNAPSHOTS\n" tests/check-snapshots.sh $(OUT)/%.o: %.c diff --git a/README.md b/README.md index f22e636a..fced1a89 100644 --- a/README.md +++ b/README.md @@ -112,17 +112,27 @@ Verify that the emitted IRs are identical to the snapshots by specifying `check- $ make check-snapshots ``` -`shecc` comes with unit tests. To run the tests, give `check` as an argument: +`shecc` comes with unit tests consist of stage 0, stage 2. To run these tests, give `check` as an argument: ```shell $ make check ``` Reference output: ``` + TEST STAGE 0 ... int main(int argc, int argv) { exit(sizeof(char)); } => 1 int main(int argc, int argv) { int a; a = 0; switch (3) { case 0: return 2; case 3: a = 10; break; case 1: return 0; } exit(a); } => 10 int main(int argc, int argv) { int a; a = 0; switch (3) { case 0: return 2; default: a = 10; break; } exit(a); } => 10 +OK + TEST STAGE 2 +... +int main(int argc, int argv) { exit(sizeof(char*)); } +exit code => 4 +output => +int main(int argc, int argv) { exit(sizeof(int*)); } +exit code => 4 +output => OK ``` diff --git a/lib/c.c b/lib/c.c index 722d35bb..8627086f 100644 --- a/lib/c.c +++ b/lib/c.c @@ -14,6 +14,7 @@ #define false 0 #if defined(__arm__) +#define __SIZEOF_POINTER__ 4 #define __syscall_exit 1 #define __syscall_read 3 #define __syscall_write 4 @@ -23,6 +24,7 @@ #define __syscall_munmap 91 #elif defined(__riscv) +#define __SIZEOF_POINTER__ 4 #define __syscall_exit 93 #define __syscall_read 63 #define __syscall_write 64 diff --git a/src/arm-codegen.c b/src/arm-codegen.c index 58359619..4658a4b8 100644 --- a/src/arm-codegen.c +++ b/src/arm-codegen.c @@ -336,8 +336,11 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) } interm = __r8; /* div/mod emulation */ - /* Preserve the values of the dividend and divisor */ - emit(__stmdb(__AL, 1, __sp, (1 << rn) | (1 << rm))); + /* Preserve the values of the dividend and divisor. This also keeps + * stack pointer to be aligned. + */ + emit(__stmdb(__AL, 1, __sp, + (1 << __r11) | (1 << __r12) | (1 << rn) | (1 << rm))); /* Obtain absolute values of the dividend and divisor */ emit(__srl_amt(__AL, 0, arith_rs, __r8, rn, 31)); emit(__add_r(__AL, rn, rn, __r8)); @@ -382,7 +385,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) * in rd. */ emit(__mov_r(__AL, __r9, rn)); - emit(__ldm(__AL, 1, __sp, (1 << rn) | (1 << rm))); + emit(__ldm(__AL, 1, __sp, + (1 << __r11) | (1 << __r12) | (1 << rn) | (1 << rm))); emit(__mov_r(__AL, rd, interm)); /* Handle the correct sign for the quotient or remainder */ emit(__cmp_i(__AL, __r10, 0)); diff --git a/src/parser.c b/src/parser.c index 484de1b4..41717251 100644 --- a/src/parser.c +++ b/src/parser.c @@ -844,7 +844,11 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) read_ternary_operation(parent, bb); lex_expect(T_close_bracket); } else if (lex_accept(T_sizeof)) { + /* TODO: Use more generalized type grammar parsing function to handle + * type reading + */ char token[MAX_TYPE_LEN]; + int ptr_cnt = 0; lex_expect(T_open_bracket); int find_type_flag = lex_accept(T_struct) ? 2 : 1; @@ -853,9 +857,12 @@ void read_expr_operand(block_t *parent, basic_block_t **bb) if (!type) error("Unable to find type"); + while (lex_accept(T_asterisk)) + ptr_cnt++; + ph1_ir = add_ph1_ir(OP_load_constant); vd = require_var(parent); - vd->init_val = type->size; + vd->init_val = ptr_cnt ? PTR_SIZE : type->size; strcpy(vd->var_name, gen_name()); ph1_ir->dest = vd; opstack_push(vd); diff --git a/src/reg-alloc.c b/src/reg-alloc.c index fe0dc5de..f2cbfb5e 100644 --- a/src/reg-alloc.c +++ b/src/reg-alloc.c @@ -12,6 +12,18 @@ * dead variable and does NOT wrtie it back to the stack. */ +/* Aligns size to nearest multiple of 4, this meets + * ARMv7's alignment requirement. + * + * This function should + * be called whenever handling with user-defined type's + * size. + */ +int align_size(int i) +{ + return i <= 4 ? 4 : (i + 3) & ~3; +} + bool check_live_out(basic_block_t *bb, var_t *var) { for (int i = 0; i < bb->live_out_idx; i++) { @@ -239,11 +251,11 @@ void reg_alloc() src0 = GLOBAL_FUNC.stack_size; if (global_insn->rd->is_ptr) GLOBAL_FUNC.stack_size += - (PTR_SIZE * global_insn->rd->array_size); + align_size(PTR_SIZE * global_insn->rd->array_size); else { type_t *type = find_type(global_insn->rd->type_name, 0); GLOBAL_FUNC.stack_size += - (global_insn->rd->array_size * type->size); + align_size(global_insn->rd->array_size * type->size); } dest = @@ -260,7 +272,7 @@ void reg_alloc() strcmp(global_insn->rd->type_name, "char") && strcmp(global_insn->rd->type_name, "_Bool")) { type_t *type = find_type(global_insn->rd->type_name, 0); - GLOBAL_FUNC.stack_size += type->size; + GLOBAL_FUNC.stack_size += align_size(type->size); } else /* 'char' is aligned to one byte for the convenience */ GLOBAL_FUNC.stack_size += 4; @@ -365,9 +377,10 @@ void reg_alloc() } if (insn->rd->array_size) - fn->func->stack_size += (insn->rd->array_size * sz); + fn->func->stack_size += + align_size(insn->rd->array_size * sz); else - fn->func->stack_size += sz; + fn->func->stack_size += align_size(sz); dest = prepare_dest(bb, insn->rd, -1, -1); ir = bb_add_ph2_ir(bb, OP_address_of); diff --git a/src/riscv-codegen.c b/src/riscv-codegen.c index 92329dfa..08587960 100644 --- a/src/riscv-codegen.c +++ b/src/riscv-codegen.c @@ -72,7 +72,7 @@ void update_elf_offset(ph2_ir_t *ph2_ir) if (hard_mul_div) elf_offset += 4; else - elf_offset += 104; + elf_offset += 108; return; case OP_load_data_address: case OP_neq: @@ -331,12 +331,13 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir) /* Unsigned integer division */ emit(__addi(__t0, __zero, 0)); emit(__addi(__t1, __zero, 1)); - emit(__beq(__t3, __zero, 48)); - emit(__beq(__t2, __zero, 44)); + emit(__beq(__t3, __zero, 52)); + emit(__beq(__t2, __zero, 48)); + emit(__beq(__t2, __t3, 20)); emit(__bltu(__t2, __t3, 16)); emit(__slli(__t3, __t3, 1)); emit(__slli(__t1, __t1, 1)); - emit(__jal(__zero, -12)); + emit(__jal(__zero, -16)); emit(__bltu(__t2, __t3, 12)); emit(__sub(__t2, __t2, __t3)); emit(__add(__t0, __t0, __t1)); diff --git a/tests/driver.sh b/tests/driver.sh index 2f318a64..3c2df090 100755 --- a/tests/driver.sh +++ b/tests/driver.sh @@ -2,7 +2,22 @@ set -u -readonly SHECC="$PWD/out/shecc" +if [ "$#" != 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +case "$1" in + "0") + readonly SHECC="$PWD/out/shecc" ;; + "1") + readonly SHECC="$PWD/out/shecc-stage1.elf" ;; + "2") + readonly SHECC="$PWD/out/shecc-stage2.elf" ;; + *) + echo "$1 is not a valid stage" + exit 1 ;; +esac # try - test shecc with given code # Usage: @@ -405,8 +420,36 @@ items 5 "int a; a = 10; a -= 5; return a;" items 20 "int *p; int a[3]; a[0] = 10; a[1] = 20; a[2] = 30; p = a; p+=1; return p[0];" # sizeof -expr 4 "sizeof(int)"; +expr 0 "sizeof(void)"; +expr 1 "sizeof(_Bool)"; expr 1 "sizeof(char)"; +expr 4 "sizeof(int)"; +# sizeof pointers +expr 4 "sizeof(void*)"; +expr 4 "sizeof(_Bool*)"; +expr 4 "sizeof(char*)"; +expr 4 "sizeof(int*)"; +# sizeof multi-level pointer +expr 4 "sizeof(void**)"; +expr 4 "sizeof(_Bool**)"; +expr 4 "sizeof(char**)"; +expr 4 "sizeof(int**)"; +# sizeof struct +try_ 4 << EOF +typedef struct { + int a; + int b; +} struct_t; +int main() { return sizeof(struct_t*); } +EOF +# sizeof enum +try_ 4 << EOF +typedef enum { + A, + B +} enum_t; +int main() { return sizeof(enum_t*); } +EOF # switch-case items 10 "int a; a = 0; switch (3) { case 0: return 2; case 3: a = 10; break; case 1: return 0; } return a;"