From c1485573101783b5a010e8fc5b64121ef1f528c1 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 2 Jan 2025 17:48:50 -0500 Subject: [PATCH 1/9] Support __{FILE,LINE,DATE,TIME,TIMESTAMP}__ macros --- pnut.c | 53 ++++++++++++++++--- .../_all/preprocessor/macro/builtin-stubbed.c | 46 ++++++++++++++++ .../preprocessor/macro/builtin-stubbed.golden | 5 ++ tests/_all/preprocessor/macro/builtin.c | 3 ++ tests/_all/preprocessor/macro/builtin.golden | 5 ++ 5 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 tests/_all/preprocessor/macro/builtin-stubbed.c create mode 100644 tests/_all/preprocessor/macro/builtin-stubbed.golden create mode 100644 tests/_all/preprocessor/macro/builtin.c create mode 100644 tests/_all/preprocessor/macro/builtin.golden diff --git a/pnut.c b/pnut.c index addb5eaa..a9377cef 100644 --- a/pnut.c +++ b/pnut.c @@ -866,6 +866,13 @@ int WRITE_ID; int OPEN_ID; int CLOSE_ID; +// Macros that are defined by the preprocessor +int FILE__ID; +int LINE__ID; +int DATE__ID; +int TIME__ID; +int TIMESTAMP__ID; + // When we parse a macro, we generally want the tokens as they are, without expanding them. void get_tok_macro() { bool prev_expand_macro = expand_macro; @@ -1260,8 +1267,7 @@ void get_ident() { tok = heap[val+2]; } -int init_ident(int tok, char *name) { - +int intern_str(char* name) { int i = 0; int prev_ch = ch; // The character may be important to the calling function, saving it @@ -1273,15 +1279,19 @@ int init_ident(int tok, char *name) { i += 1; } - i = end_ident(); - - heap[i+2] = tok; + i = end_string(); ch = prev_ch; return i; } +int init_ident(int tok, char *name) { + int i = intern_str(name); + heap[i+2] = tok; + return i; +} + void init_ident_table() { int i = 0; @@ -1364,8 +1374,28 @@ void init_ident_table() { NOT_SUPPORTED_ID = init_ident(IDENTIFIER, "NOT_SUPPORTED"); } +void init_builtin_string_macro(int macro_id, char* value) { + // Macro object shape: ([(tok, val)], arity). -1 arity means it's an object-like macro + heap[macro_id + 3] = cons(cons(cons(STRING, intern_str(value)), 0), -1); +} + +void init_builtin_int_macro(int macro_id, int value) { + heap[macro_id + 3] = cons(cons(cons(INTEGER, -value), 0), -1); +} + void init_pnut_macros() { init_ident(MACRO, "PNUT_CC"); + FILE__ID = init_ident(MACRO, "__FILE__"); + LINE__ID = init_ident(MACRO, "__LINE__"); + DATE__ID = init_ident(MACRO, "__DATE__"); + TIME__ID = init_ident(MACRO, "__TIME__"); + TIMESTAMP__ID = init_ident(MACRO, "__TIMESTAMP__"); + + init_builtin_string_macro(FILE__ID, ""); + init_builtin_int_macro (LINE__ID, 0); + init_builtin_string_macro(DATE__ID, "Jan 1 1970"); + init_builtin_string_macro(TIME__ID, "00:00:00"); + init_builtin_string_macro(TIMESTAMP__ID, "Jan 1 1970 00:00:00"); } // A macro argument is represented using a list of tokens. @@ -1487,7 +1517,18 @@ bool attempt_macro_expansion(int macro) { int tokens = car(heap[macro + 3]); macro = val; if (cdr(heap[macro + 3]) == -1) { // Object-like macro - play_macro(tokens, 0); + // Note: We don't check if the macro was redefined by the program + if (macro == FILE__ID) { + play_macro(cons(cons(STRING, intern_str(fp_filepath)), 0), 0); + } +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR + else if (macro == LINE__ID) { + play_macro(cons(cons(INTEGER, -line_number), 0), 0); + } +#endif + else { + play_macro(tokens, 0); + } return true; } else { new_macro_args = get_macro_args_toks(macro); diff --git a/tests/_all/preprocessor/macro/builtin-stubbed.c b/tests/_all/preprocessor/macro/builtin-stubbed.c new file mode 100644 index 00000000..eb12a3e5 --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin-stubbed.c @@ -0,0 +1,46 @@ +// tests for __FILE__, __LINE__, __DATE__, __TIME__, __TIMESTAMP__ built-in macros +#include + +#ifndef __FILE__ +#error "__FILE__ is not defined" +#endif +#ifndef __LINE__ +#error "__LINE__ is not defined" +#endif +#ifndef __DATE__ +#error "__DATE__ is not defined" +#endif +#ifndef __TIME__ +#error "__TIME__ is not defined" +#endif +#ifndef __TIMESTAMP__ +#error "__TIMESTAMP__ is not defined" +#endif + +void putint(int n) { + if (n < 0) { + putchar('-'); + putint(-n); + } else if (n > 9) { + putint(n / 10); + putchar('0' + n % 10); + } else { + putchar('0' + n); + } +} + +void putstr(char *str) { + while (*str) { + putchar(*str); + str += 1; + } +} + +int main() { + putstr(__FILE__); putchar('\n'); + putint(__LINE__); putchar('\n'); + putstr(__DATE__); putchar('\n'); + putstr(__TIME__); putchar('\n'); + putstr(__TIMESTAMP__); putchar('\n'); + return 0; +} diff --git a/tests/_all/preprocessor/macro/builtin-stubbed.golden b/tests/_all/preprocessor/macro/builtin-stubbed.golden new file mode 100644 index 00000000..a89014fe --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin-stubbed.golden @@ -0,0 +1,5 @@ +tests/_all/preprocessor/macro/builtin-stubbed.c +0 +Jan 1 1970 +00:00:00 +Jan 1 1970 00:00:00 diff --git a/tests/_all/preprocessor/macro/builtin.c b/tests/_all/preprocessor/macro/builtin.c new file mode 100644 index 00000000..91fda6ca --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin.c @@ -0,0 +1,3 @@ +// tests for __FILE__, __LINE__, __DATE__, __TIME__, __TIMESTAMP__ built-in macros +// comp_pnut_opt: -DINCLUDE_LINE_NUMBER_ON_ERROR +#include "builtin-stubbed.c" diff --git a/tests/_all/preprocessor/macro/builtin.golden b/tests/_all/preprocessor/macro/builtin.golden new file mode 100644 index 00000000..0f1a8a20 --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin.golden @@ -0,0 +1,5 @@ +tests/_all/preprocessor/macro/builtin-stubbed.c +41 +Jan 1 1970 +00:00:00 +Jan 1 1970 00:00:00 From 697e898133003c6d275aa504bedb5a7d1790128c Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 2 Jan 2025 19:33:24 -0500 Subject: [PATCH 2/9] Replace base64 with md5sum in run-tests.sh Debian woody doesn't include the base64 utility and it cannot be installed with apt-get. base64 was only used to generate a unique ID from the compilation options used, so it is replaced with md5sum which is available on woody. --- run-tests.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/run-tests.sh b/run-tests.sh index bf8687ee..22d4c67f 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -51,9 +51,14 @@ esac compile_pnut() { # extra pnut compilation options: $1 pnut_source="pnut.c" extra_opts="$1" - extra_opts_id=$(printf "%s" $extra_opts | base64) - pnut_exe="./tests/pnut-by-gcc-$extra_opts_id.exe" - pnut_exe_backend="./tests/pnut-$extra_opts_id.$ext" + if [ -z "$extra_opts" ]; then + extra_opts_id="base" + else + extra_opts_id=$(printf "%s" "$extra_opts" | md5sum | cut -c 1-16) # 16 characters should be enough + fi + extra_opts_suffix=${extra_opts_id:+"-"}$extra_opts_id # Add a dash if there are extra options + pnut_exe="./tests/pnut-by-gcc${extra_opts_suffix}.exe" + pnut_exe_backend="./tests/pnut-$extra_opts_suffix.$ext" if [ ! -f "$pnut_exe" ]; then gcc "$pnut_source" $PNUT_EXE_OPTIONS $extra_opts -o "$pnut_exe" 2> /dev/null || fail "Error: Failed to compile $pnut_source with $backend" From 42234890fc0b5dcd11bf760b5473cfcb8ac2c4c1 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 2 Jan 2025 20:18:45 -0500 Subject: [PATCH 3/9] Mention #line directive in comment --- pnut.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pnut.c b/pnut.c index a9377cef..fd20dd14 100644 --- a/pnut.c +++ b/pnut.c @@ -1517,7 +1517,8 @@ bool attempt_macro_expansion(int macro) { int tokens = car(heap[macro + 3]); macro = val; if (cdr(heap[macro + 3]) == -1) { // Object-like macro - // Note: We don't check if the macro was redefined by the program + // Note: Redefining __{FILE,LINE}__ macros, either with the #define or #line + // directives is not supported. if (macro == FILE__ID) { play_macro(cons(cons(STRING, intern_str(fp_filepath)), 0), 0); } From d8f29c786c0777136d984e39ff610fd1904dbd4f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 3 Jan 2025 10:37:51 -0500 Subject: [PATCH 4/9] Use env time instead of hardcoded path in scripts --- bootstrap-pnut-exe.sh | 2 +- bootstrap-pnut-sh.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap-pnut-exe.sh b/bootstrap-pnut-exe.sh index 2febf96c..295eb176 100755 --- a/bootstrap-pnut-exe.sh +++ b/bootstrap-pnut-exe.sh @@ -15,7 +15,7 @@ if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi printf_timing() { msg=$1 cmd=$2 - real_time=`/usr/bin/time -p sh -c "$cmd" 2>&1 | grep '^real ' | sed 's/.* //'` + real_time=`env time -p sh -c "$cmd" 2>&1 | grep '^real ' | sed 's/.* //'` printf "%ss %s\n" $real_time "$msg" } diff --git a/bootstrap-pnut-sh.sh b/bootstrap-pnut-sh.sh index 20c196b1..717de18a 100755 --- a/bootstrap-pnut-sh.sh +++ b/bootstrap-pnut-sh.sh @@ -12,7 +12,7 @@ bootstrap_with_shell() { echo "Bootstrap with $1" - /usr/bin/time $1 "$TEMP_DIR/pnut-sh.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-sh-twice-bootstrapped.sh" + env time $1 "$TEMP_DIR/pnut-sh.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-sh-twice-bootstrapped.sh" diff "$TEMP_DIR/pnut-sh.sh" "$TEMP_DIR/pnut-sh-twice-bootstrapped.sh" From 79b4ab5b6aee2a13146c28dcfc338b4485826fc8 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 3 Jan 2025 11:17:20 -0500 Subject: [PATCH 5/9] Reuse temp variables between sides of &&/|| exprs After evaluating the left side of a && or || expression, the temporary variables can be reused for the right side. The reduction in temporary variables makes pnut around 2-4% faster. --- sh.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sh.c b/sh.c index a385c43d..ef017a07 100644 --- a/sh.c +++ b/sh.c @@ -909,6 +909,7 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { ast previous_conditional_fun_calls; ast left_conditional_fun_calls; ast right_conditional_fun_calls; + int start_gensym_ix = gensym_ix; if (nb_children == 0) { if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR || op == INTEGER || op == CHARACTER) { @@ -967,9 +968,11 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { // The left side is always executed, unless the whole expression is executed conditionally. // We could compile it as always executed, but it makes the Shell code less regular so we compile it conditionally. sub1 = handle_side_effects_go(get_child(node, 0), true); + gensym_ix = start_gensym_ix; // Reset gensym counter because the 2 sides are independent left_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = 0; sub2 = handle_side_effects_go(get_child(node, 1), true); + gensym_ix = start_gensym_ix; // Reset gensym counter because the 2 sides are independent right_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = previous_conditional_fun_calls; return new_ast4(op, sub1, sub2, left_conditional_fun_calls, right_conditional_fun_calls); From 208df3278339a53151269fa9227f1f890ca0989e Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 6 Jan 2025 21:34:45 -0500 Subject: [PATCH 6/9] Map putchar('{char}') calls to printf '{char}` This saves a few seconds on dash, and makes the code easier to read. It compromises slightly from our principle of generating "regular" code without any special cases, but the original code looked bad and was harder to read so I think it's worth it. --- sh.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sh.c b/sh.c index ef017a07..5a582340 100644 --- a/sh.c +++ b/sh.c @@ -1445,15 +1445,21 @@ text fun_call_params(ast params) { #ifdef INCLUDE_COMP_PUTCHAR_INLINE text comp_putchar_inline(ast param) { - text res = comp_rvalue(param, RVALUE_CTX_ARITH_EXPANSION); + text res; ast ident; + if (get_op(param) == CHARACTER && get_val(param) >= 32 && get_val(param) <= 126) { // Printable ASCII characters + return string_concat3(wrap_str_lit("printf \""), escape_text(wrap_char(get_val(param)), true), wrap_char('\"')); + } + + res = comp_rvalue(param, RVALUE_CTX_ARITH_EXPANSION); + if (contains_side_effects) { ident = fresh_ident(); append_glo_decl(string_concat4(comp_lvalue(ident), wrap_str_lit("=$(("), res, wrap_str_lit("))"))); res = comp_lvalue(ident); } else if (get_op(param) != IDENTIFIER) { - res = string_concat3(wrap_char('('), res, wrap_char(')')); + res = string_concat3(wrap_char('('), res, wrap_char(')')); // Wrap in parentheses to avoid priority of operations issues } res = From 961931c91b2853e0a2bda2541ec18b70ca39616d Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 6 Jan 2025 21:49:08 -0500 Subject: [PATCH 7/9] Regenerate examples --- examples/compiled/base64.sh | 7 +++---- examples/compiled/sha256sum.sh | 5 ++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/compiled/base64.sh b/examples/compiled/base64.sh index d633d243..ac22f57b 100755 --- a/examples/compiled/base64.sh +++ b/examples/compiled/base64.sh @@ -84,15 +84,15 @@ _encode() { printf \\$(((_$((_codes + (b1 >> 2))))/64))$(((_$((_codes + (b1 >> 2))))/8%8))$(((_$((_codes + (b1 >> 2))))%8)) if [ $b2 -lt 0 ] ; then printf \\$(((_$((_codes + (63 & (b1 << 4)))))/64))$(((_$((_codes + (63 & (b1 << 4)))))/8%8))$(((_$((_codes + (63 & (b1 << 4)))))%8)) - printf \\$(((__EQ__)/64))$(((__EQ__)/8%8))$(((__EQ__)%8)) - printf \\$(((__EQ__)/64))$(((__EQ__)/8%8))$(((__EQ__)%8)) + printf "=" + printf "=" break else printf \\$(((_$((_codes + (63 & ((b1 << 4) | (b2 >> 4))))))/64))$(((_$((_codes + (63 & ((b1 << 4) | (b2 >> 4))))))/8%8))$(((_$((_codes + (63 & ((b1 << 4) | (b2 >> 4))))))%8)) _getchar b3 if [ $b3 -lt 0 ] ; then printf \\$(((_$((_codes + (63 & (b2 << 2)))))/64))$(((_$((_codes + (63 & (b2 << 2)))))/8%8))$(((_$((_codes + (63 & (b2 << 2)))))%8)) - printf \\$(((__EQ__)/64))$(((__EQ__)/8%8))$(((__EQ__)%8)) + printf "=" break else printf \\$(((_$((_codes + (63 & ((b2 << 2) | (b3 >> 6))))))/64))$(((_$((_codes + (63 & ((b2 << 2) | (b3 >> 6))))))/8%8))$(((_$((_codes + (63 & ((b2 << 2) | (b3 >> 6))))))%8)) @@ -164,7 +164,6 @@ _main() { let argc $2; let myargv $3 readonly __NUL__=0 readonly __NEWLINE__=10 readonly __MINUS__=45 -readonly __EQ__=61 readonly __d__=100 # Runtime library __stdin_buf= diff --git a/examples/compiled/sha256sum.sh b/examples/compiled/sha256sum.sh index 606cccb0..9ced31d5 100755 --- a/examples/compiled/sha256sum.sh +++ b/examples/compiled/sha256sum.sh @@ -206,8 +206,8 @@ _process_file() { let filename $2 _hex __ $h : $((i += 1)) done - printf \\$(((__SPACE__)/64))$(((__SPACE__)/8%8))$(((__SPACE__)%8)) - printf \\$(((__SPACE__)/64))$(((__SPACE__)/8%8))$(((__SPACE__)%8)) + printf " " + printf " " while [ $((_$filename)) != 0 ]; do printf \\$(((_$filename)/64))$(((_$filename)/8%8))$(((_$filename)%8)) : $((filename += 1)) @@ -233,7 +233,6 @@ _main() { let argc $2; let myargv $3 # Character constants readonly __NEWLINE__=10 -readonly __SPACE__=32 # Runtime library unpack_escaped_string() { From d6c774f8b3a260cc39f6261a551b1ff2d1561cbf Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Mon, 6 Jan 2025 22:12:53 -0500 Subject: [PATCH 8/9] Put back all shells on bootstrap-pnut-sh CI check All shells except ksh were removed by accident a few days ago. --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4668c5ef..426a3918 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -124,7 +124,7 @@ jobs: bootstrap-pnut-sh: strategy: matrix: - shell: ["ksh"] + shell: ["bash", "dash", "ksh", "mksh", "yash", "zsh"] runs-on: ubuntu-latest steps: - name: Checkout code @@ -138,7 +138,7 @@ jobs: - name: Bootstrap pnut-sh.sh on ${{ matrix.shell }} run: | set -e - ksh ./bootstrap-pnut-sh.sh --shell ${{ matrix.shell }} --fast + ./bootstrap-pnut-sh.sh --shell ${{ matrix.shell }} --fast bootstrap-pnut-exe: strategy: From 1d1e526f46099d8e9aa061e4e5f9288b27f3d345 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Tue, 7 Jan 2025 10:57:37 -0500 Subject: [PATCH 9/9] Make measure-file-size.sh compatible with __FILE__ --- analysis/measure-file-size.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/analysis/measure-file-size.sh b/analysis/measure-file-size.sh index c0a5ebc9..5899b5ad 100755 --- a/analysis/measure-file-size.sh +++ b/analysis/measure-file-size.sh @@ -36,8 +36,14 @@ expand_includes() { # $1 = output-name, $2 = options ./$TEMP_DIR/pnut-sh.exe pnut.c $2 > "$TEMP_DIR/$1.sh" ./$TEMP_DIR/pnut-sh.exe "$TEMP_DIR/$1.c" $2 > "$TEMP_DIR/$1-preincluded.sh" - diff -q "$TEMP_DIR/$1.sh" "$TEMP_DIR/$1-preincluded.sh" || \ - { echo "Error: $1.sh and $1-preincluded.sh differ"; exit 1; } + # Because we use the __FILE__ macro in pnut, the preincluded.sh file will have + # a different path than the original file. We need to replace the path in the + # preincluded file with the path of the original file. + # Note: | is used as the delimiter because the path contains / + cat "$TEMP_DIR/$1-preincluded.sh" | sed "s|$TEMP_DIR/$1.c|pnut.c|" > "$TEMP_DIR/$1-preincluded-canonical.sh" + + diff -q "$TEMP_DIR/$1.sh" "$TEMP_DIR/$1-preincluded-canonical.sh" || \ + { echo "Error: $1.sh and $1-preincluded-canonical.sh differ"; exit 1; } } included_files() {