From c1485573101783b5a010e8fc5b64121ef1f528c1 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 2 Jan 2025 17:48:50 -0500 Subject: [PATCH 1/5] Support __{FILE,LINE,DATE,TIME,TIMESTAMP}__ macros --- pnut.c | 53 ++++++++++++++++--- .../_all/preprocessor/macro/builtin-stubbed.c | 46 ++++++++++++++++ .../preprocessor/macro/builtin-stubbed.golden | 5 ++ tests/_all/preprocessor/macro/builtin.c | 3 ++ tests/_all/preprocessor/macro/builtin.golden | 5 ++ 5 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 tests/_all/preprocessor/macro/builtin-stubbed.c create mode 100644 tests/_all/preprocessor/macro/builtin-stubbed.golden create mode 100644 tests/_all/preprocessor/macro/builtin.c create mode 100644 tests/_all/preprocessor/macro/builtin.golden diff --git a/pnut.c b/pnut.c index addb5eaa..a9377cef 100644 --- a/pnut.c +++ b/pnut.c @@ -866,6 +866,13 @@ int WRITE_ID; int OPEN_ID; int CLOSE_ID; +// Macros that are defined by the preprocessor +int FILE__ID; +int LINE__ID; +int DATE__ID; +int TIME__ID; +int TIMESTAMP__ID; + // When we parse a macro, we generally want the tokens as they are, without expanding them. void get_tok_macro() { bool prev_expand_macro = expand_macro; @@ -1260,8 +1267,7 @@ void get_ident() { tok = heap[val+2]; } -int init_ident(int tok, char *name) { - +int intern_str(char* name) { int i = 0; int prev_ch = ch; // The character may be important to the calling function, saving it @@ -1273,15 +1279,19 @@ int init_ident(int tok, char *name) { i += 1; } - i = end_ident(); - - heap[i+2] = tok; + i = end_string(); ch = prev_ch; return i; } +int init_ident(int tok, char *name) { + int i = intern_str(name); + heap[i+2] = tok; + return i; +} + void init_ident_table() { int i = 0; @@ -1364,8 +1374,28 @@ void init_ident_table() { NOT_SUPPORTED_ID = init_ident(IDENTIFIER, "NOT_SUPPORTED"); } +void init_builtin_string_macro(int macro_id, char* value) { + // Macro object shape: ([(tok, val)], arity). -1 arity means it's an object-like macro + heap[macro_id + 3] = cons(cons(cons(STRING, intern_str(value)), 0), -1); +} + +void init_builtin_int_macro(int macro_id, int value) { + heap[macro_id + 3] = cons(cons(cons(INTEGER, -value), 0), -1); +} + void init_pnut_macros() { init_ident(MACRO, "PNUT_CC"); + FILE__ID = init_ident(MACRO, "__FILE__"); + LINE__ID = init_ident(MACRO, "__LINE__"); + DATE__ID = init_ident(MACRO, "__DATE__"); + TIME__ID = init_ident(MACRO, "__TIME__"); + TIMESTAMP__ID = init_ident(MACRO, "__TIMESTAMP__"); + + init_builtin_string_macro(FILE__ID, ""); + init_builtin_int_macro (LINE__ID, 0); + init_builtin_string_macro(DATE__ID, "Jan 1 1970"); + init_builtin_string_macro(TIME__ID, "00:00:00"); + init_builtin_string_macro(TIMESTAMP__ID, "Jan 1 1970 00:00:00"); } // A macro argument is represented using a list of tokens. @@ -1487,7 +1517,18 @@ bool attempt_macro_expansion(int macro) { int tokens = car(heap[macro + 3]); macro = val; if (cdr(heap[macro + 3]) == -1) { // Object-like macro - play_macro(tokens, 0); + // Note: We don't check if the macro was redefined by the program + if (macro == FILE__ID) { + play_macro(cons(cons(STRING, intern_str(fp_filepath)), 0), 0); + } +#ifdef INCLUDE_LINE_NUMBER_ON_ERROR + else if (macro == LINE__ID) { + play_macro(cons(cons(INTEGER, -line_number), 0), 0); + } +#endif + else { + play_macro(tokens, 0); + } return true; } else { new_macro_args = get_macro_args_toks(macro); diff --git a/tests/_all/preprocessor/macro/builtin-stubbed.c b/tests/_all/preprocessor/macro/builtin-stubbed.c new file mode 100644 index 00000000..eb12a3e5 --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin-stubbed.c @@ -0,0 +1,46 @@ +// tests for __FILE__, __LINE__, __DATE__, __TIME__, __TIMESTAMP__ built-in macros +#include + +#ifndef __FILE__ +#error "__FILE__ is not defined" +#endif +#ifndef __LINE__ +#error "__LINE__ is not defined" +#endif +#ifndef __DATE__ +#error "__DATE__ is not defined" +#endif +#ifndef __TIME__ +#error "__TIME__ is not defined" +#endif +#ifndef __TIMESTAMP__ +#error "__TIMESTAMP__ is not defined" +#endif + +void putint(int n) { + if (n < 0) { + putchar('-'); + putint(-n); + } else if (n > 9) { + putint(n / 10); + putchar('0' + n % 10); + } else { + putchar('0' + n); + } +} + +void putstr(char *str) { + while (*str) { + putchar(*str); + str += 1; + } +} + +int main() { + putstr(__FILE__); putchar('\n'); + putint(__LINE__); putchar('\n'); + putstr(__DATE__); putchar('\n'); + putstr(__TIME__); putchar('\n'); + putstr(__TIMESTAMP__); putchar('\n'); + return 0; +} diff --git a/tests/_all/preprocessor/macro/builtin-stubbed.golden b/tests/_all/preprocessor/macro/builtin-stubbed.golden new file mode 100644 index 00000000..a89014fe --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin-stubbed.golden @@ -0,0 +1,5 @@ +tests/_all/preprocessor/macro/builtin-stubbed.c +0 +Jan 1 1970 +00:00:00 +Jan 1 1970 00:00:00 diff --git a/tests/_all/preprocessor/macro/builtin.c b/tests/_all/preprocessor/macro/builtin.c new file mode 100644 index 00000000..91fda6ca --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin.c @@ -0,0 +1,3 @@ +// tests for __FILE__, __LINE__, __DATE__, __TIME__, __TIMESTAMP__ built-in macros +// comp_pnut_opt: -DINCLUDE_LINE_NUMBER_ON_ERROR +#include "builtin-stubbed.c" diff --git a/tests/_all/preprocessor/macro/builtin.golden b/tests/_all/preprocessor/macro/builtin.golden new file mode 100644 index 00000000..0f1a8a20 --- /dev/null +++ b/tests/_all/preprocessor/macro/builtin.golden @@ -0,0 +1,5 @@ +tests/_all/preprocessor/macro/builtin-stubbed.c +41 +Jan 1 1970 +00:00:00 +Jan 1 1970 00:00:00 From 697e898133003c6d275aa504bedb5a7d1790128c Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 2 Jan 2025 19:33:24 -0500 Subject: [PATCH 2/5] Replace base64 with md5sum in run-tests.sh Debian woody doesn't include the base64 utility and it cannot be installed with apt-get. base64 was only used to generate a unique ID from the compilation options used, so it is replaced with md5sum which is available on woody. --- run-tests.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/run-tests.sh b/run-tests.sh index bf8687ee..22d4c67f 100755 --- a/run-tests.sh +++ b/run-tests.sh @@ -51,9 +51,14 @@ esac compile_pnut() { # extra pnut compilation options: $1 pnut_source="pnut.c" extra_opts="$1" - extra_opts_id=$(printf "%s" $extra_opts | base64) - pnut_exe="./tests/pnut-by-gcc-$extra_opts_id.exe" - pnut_exe_backend="./tests/pnut-$extra_opts_id.$ext" + if [ -z "$extra_opts" ]; then + extra_opts_id="base" + else + extra_opts_id=$(printf "%s" "$extra_opts" | md5sum | cut -c 1-16) # 16 characters should be enough + fi + extra_opts_suffix=${extra_opts_id:+"-"}$extra_opts_id # Add a dash if there are extra options + pnut_exe="./tests/pnut-by-gcc${extra_opts_suffix}.exe" + pnut_exe_backend="./tests/pnut-$extra_opts_suffix.$ext" if [ ! -f "$pnut_exe" ]; then gcc "$pnut_source" $PNUT_EXE_OPTIONS $extra_opts -o "$pnut_exe" 2> /dev/null || fail "Error: Failed to compile $pnut_source with $backend" From 42234890fc0b5dcd11bf760b5473cfcb8ac2c4c1 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Thu, 2 Jan 2025 20:18:45 -0500 Subject: [PATCH 3/5] Mention #line directive in comment --- pnut.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pnut.c b/pnut.c index a9377cef..fd20dd14 100644 --- a/pnut.c +++ b/pnut.c @@ -1517,7 +1517,8 @@ bool attempt_macro_expansion(int macro) { int tokens = car(heap[macro + 3]); macro = val; if (cdr(heap[macro + 3]) == -1) { // Object-like macro - // Note: We don't check if the macro was redefined by the program + // Note: Redefining __{FILE,LINE}__ macros, either with the #define or #line + // directives is not supported. if (macro == FILE__ID) { play_macro(cons(cons(STRING, intern_str(fp_filepath)), 0), 0); } From d8f29c786c0777136d984e39ff610fd1904dbd4f Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 3 Jan 2025 10:37:51 -0500 Subject: [PATCH 4/5] Use env time instead of hardcoded path in scripts --- bootstrap-pnut-exe.sh | 2 +- bootstrap-pnut-sh.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap-pnut-exe.sh b/bootstrap-pnut-exe.sh index 2febf96c..295eb176 100755 --- a/bootstrap-pnut-exe.sh +++ b/bootstrap-pnut-exe.sh @@ -15,7 +15,7 @@ if [ ! -d "$TEMP_DIR" ]; then mkdir "$TEMP_DIR"; fi printf_timing() { msg=$1 cmd=$2 - real_time=`/usr/bin/time -p sh -c "$cmd" 2>&1 | grep '^real ' | sed 's/.* //'` + real_time=`env time -p sh -c "$cmd" 2>&1 | grep '^real ' | sed 's/.* //'` printf "%ss %s\n" $real_time "$msg" } diff --git a/bootstrap-pnut-sh.sh b/bootstrap-pnut-sh.sh index 20c196b1..717de18a 100755 --- a/bootstrap-pnut-sh.sh +++ b/bootstrap-pnut-sh.sh @@ -12,7 +12,7 @@ bootstrap_with_shell() { echo "Bootstrap with $1" - /usr/bin/time $1 "$TEMP_DIR/pnut-sh.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-sh-twice-bootstrapped.sh" + env time $1 "$TEMP_DIR/pnut-sh.sh" $PNUT_SH_OPTIONS "pnut.c" > "$TEMP_DIR/pnut-sh-twice-bootstrapped.sh" diff "$TEMP_DIR/pnut-sh.sh" "$TEMP_DIR/pnut-sh-twice-bootstrapped.sh" From 79b4ab5b6aee2a13146c28dcfc338b4485826fc8 Mon Sep 17 00:00:00 2001 From: Laurent Huberdeau Date: Fri, 3 Jan 2025 11:17:20 -0500 Subject: [PATCH 5/5] Reuse temp variables between sides of &&/|| exprs After evaluating the left side of a && or || expression, the temporary variables can be reused for the right side. The reduction in temporary variables makes pnut around 2-4% faster. --- sh.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sh.c b/sh.c index a385c43d..ef017a07 100644 --- a/sh.c +++ b/sh.c @@ -909,6 +909,7 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { ast previous_conditional_fun_calls; ast left_conditional_fun_calls; ast right_conditional_fun_calls; + int start_gensym_ix = gensym_ix; if (nb_children == 0) { if (op == IDENTIFIER || op == IDENTIFIER_INTERNAL || op == IDENTIFIER_STRING || op == IDENTIFIER_DOLLAR || op == INTEGER || op == CHARACTER) { @@ -967,9 +968,11 @@ ast handle_side_effects_go(ast node, bool executes_conditionally) { // The left side is always executed, unless the whole expression is executed conditionally. // We could compile it as always executed, but it makes the Shell code less regular so we compile it conditionally. sub1 = handle_side_effects_go(get_child(node, 0), true); + gensym_ix = start_gensym_ix; // Reset gensym counter because the 2 sides are independent left_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = 0; sub2 = handle_side_effects_go(get_child(node, 1), true); + gensym_ix = start_gensym_ix; // Reset gensym counter because the 2 sides are independent right_conditional_fun_calls = conditional_fun_calls; conditional_fun_calls = previous_conditional_fun_calls; return new_ast4(op, sub1, sub2, left_conditional_fun_calls, right_conditional_fun_calls);