From 64c883a32013ebfe2624fe6ec0ed0f5a9de3a716 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 05:58:47 -0300 Subject: [PATCH 01/20] vcscanf: move buffer variables to top of function This buffer is currently used only when decoding strings. Place it at the top level so other code can also use it. Ensure it is initialized to NULL. --- libc/stdio/vcscanf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 80373886b00..50850cbbb0a 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -60,6 +60,8 @@ int __vcscanf(int callback(void *), // struct FreeMe *next; void *ptr; } *freeme = NULL; + void *buf = NULL; + size_t bufsize; const unsigned char *p = (const unsigned char *)fmt; int *n_ptr; int items = 0; @@ -84,8 +86,6 @@ int __vcscanf(int callback(void *), // break; case '%': { uint128_t number; - void *buf; - size_t bufsize; unsigned width = 0; unsigned char bits = 32; unsigned char charbytes = sizeof(char); From 3403b6c604f72a1911145af1215d882fa72013f1 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:05:37 -0300 Subject: [PATCH 02/20] vcscanf: change buf type to unsigned char * The data it points to will be accessed directly. Also remove casts which are now unnecessary. --- libc/stdio/vcscanf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 50850cbbb0a..1fc4f44df5d 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -60,7 +60,7 @@ int __vcscanf(int callback(void *), // struct FreeMe *next; void *ptr; } *freeme = NULL; - void *buf = NULL; + unsigned char *buf = NULL; size_t bufsize; const unsigned char *p = (const unsigned char *)fmt; int *n_ptr; @@ -321,7 +321,7 @@ int __vcscanf(int callback(void *), // } if (c != -1 && j + !rawmode < bufsize && (rawmode || !isspace(c))) { if (charbytes == 1) { - ((unsigned char *)buf)[j++] = (unsigned char)c; + buf[j++] = (unsigned char)c; c = READ; } else if (tpdecodecb((wint_t *)&c, c, (void *)callback, arg) != -1) { @@ -343,7 +343,7 @@ int __vcscanf(int callback(void *), // goto Done; } else if (!rawmode && j < bufsize) { if (charbytes == sizeof(char)) { - ((unsigned char *)buf)[j] = '\0'; + buf[j] = '\0'; } else if (charbytes == sizeof(char16_t)) { ((char16_t *)buf)[j] = u'\0'; } else if (charbytes == sizeof(wchar_t)) { @@ -355,7 +355,7 @@ int __vcscanf(int callback(void *), // } ++items; if (ismalloc) { - *va_arg(va, char **) = buf; + *va_arg(va, char **) = (void *) buf; } } else { do { From 44cb3056bc7aa897b1a3dfeac9d0ff097cf310de Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:07:13 -0300 Subject: [PATCH 03/20] vcscanf: add buffer cursor variable Keeps track of current buffer position. --- libc/stdio/vcscanf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 1fc4f44df5d..e0b24bfd1de 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -62,6 +62,7 @@ int __vcscanf(int callback(void *), // } *freeme = NULL; unsigned char *buf = NULL; size_t bufsize; + size_t bufcur; const unsigned char *p = (const unsigned char *)fmt; int *n_ptr; int items = 0; From c16171349cff9798b113a4e17e65600496240626 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:07:44 -0300 Subject: [PATCH 04/20] vcscanf: define BUFFER macro This is similar to the READ macro. READs a character and places it in the buffer. Automatically maintains the NUL terminator. If the end is reached, reallocate the buffer. --- libc/stdio/vcscanf.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index e0b24bfd1de..49f2bed5923 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -34,6 +34,21 @@ c; \ }) +#define BUFFER_GROW 48 +#define BUFFER \ + ({ \ + int c = READ; \ + if (bufcur >= bufsize - 1) { \ + bufsize = bufsize + BUFFER_GROW; \ + buf = realloc(buf, bufsize); \ + } \ + if (c != -1) { \ + buf[bufcur++] = c; \ + buf[bufcur] = '\0'; \ + } \ + c; \ + }) + /** * String / file / stream decoder. * From 0e7c6aa5e12eefc06d96b83316a787d0de5d8282 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:16:20 -0300 Subject: [PATCH 05/20] vcscanf: free buf before returning if not NULL This condition indicates that the floating point parser exited early. --- libc/stdio/vcscanf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 49f2bed5923..5bec3efd88a 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -393,5 +393,6 @@ int __vcscanf(int callback(void *), // if (items == -1) free(entry->ptr); free(entry); } + if (buf) free(buf); return items; } From c8e941bb7908d99c9a9fdf67ce95883a2e342420 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:18:28 -0300 Subject: [PATCH 06/20] vcscanf: define floating point result variable Holds the parsed floating point value. --- libc/stdio/vcscanf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 5bec3efd88a..b6ea002649e 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -102,6 +102,7 @@ int __vcscanf(int callback(void *), // break; case '%': { uint128_t number; + double fp; unsigned width = 0; unsigned char bits = 32; unsigned char charbytes = sizeof(char); From ec2b2720f5af091b8c588b95f530e16286004090 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:20:20 -0300 Subject: [PATCH 07/20] vcscanf: add floating point number scanner case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handle all the documented format specifiers. Exit with error if given a length modifier other than "l". Skip spaces, initialize the buffer and then begin parsing. References: https://en.cppreference.com/w/c/io/fscanfî1;129A --- libc/stdio/vcscanf.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index b6ea002649e..d68c4a5eac3 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -225,6 +225,27 @@ int __vcscanf(int callback(void *), // base = 10; } goto DecodeNumber; + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': // floating point number + if (!(charbytes == sizeof(char) || charbytes == sizeof(wchar_t))) { + items = -1; + goto Done; + } + while (isspace(c)) { + c = READ; + } + bufsize = BUFFER_GROW; + buf = malloc(bufsize); + bufcur = 0; + buf[bufcur++] = c; + buf[bufcur] = '\0'; + goto ConsumeFloatingPointNumber; default: items = einval(); goto Done; From a2225a2d850d59427e0b9e8c869d22b64ff28629 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:26:44 -0300 Subject: [PATCH 08/20] vcscanf: include the gdtoa library The parsing of floating point numbers is quite complex. Reuse available libraries to accomplish the task. --- libc/stdio/vcscanf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index d68c4a5eac3..9bfdd8900de 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -26,6 +26,7 @@ #include "libc/str/tpdecodecb.internal.h" #include "libc/str/utf16.h" #include "libc/sysv/errfuns.h" +#include "third_party/gdtoa/gdtoa.h" #define READ \ ({ \ From 99c5006ea94ae0da532d112c79abba385b615c83 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:29:15 -0300 Subject: [PATCH 09/20] vcscanf: parse floating point numbers The vcscanf function attempts to recognize the floating point number and then uses the strtod function to calculate its numeric value. It begins by recognizing an optional plus or minus sign, followed by either hexadecimal notation or NaN and infinity constants. Then it looks for an optional integer part, a point or comma, a fractional part, and an optional exponent symbol. If it finds an exponent, it recognizes the optional sign and digits. Every time a character is accepted, it is also buffered. The accumulated buffer is passed to strtod when recognition is finished and the output is assigned to the output pointer of appropriate length. This only happens if not in discard mode. Finally, the buffer is freed and its state is reset. References: https://en.cppreference.com/w/c/string/byte/strtof --- libc/stdio/vcscanf.c | 144 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 9bfdd8900de..982b6752517 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -332,6 +332,150 @@ int __vcscanf(int callback(void *), // goto Done; } continue; + ConsumeFloatingPointNumber: + if (c == '+' || c == '-') { + c = BUFFER; + } + bool hexadecimal = false; + if (c == '0') { + c = BUFFER; + if (c == 'x' || c == 'X') { + c = BUFFER; + hexadecimal = true; + goto BufferFloatingPointNumber; + } else if (c == -1) { + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } else { + goto BufferFloatingPointNumber; + } + } else if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == 'a' || c == 'A') { + c = BUFFER; + if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == '(') { + c = BUFFER; + do { + bool isdigit = c >= '0' && c <= '9'; + bool isletter = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + if (!(c == '_' || isdigit || isletter)) { + goto Done; + } + } while ((c = BUFFER) != -1 && c != ')'); + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } else { + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } + } else { + goto Done; + } + } else { + goto Done; + } + } else if (c == 'i' || c == 'I') { + c = BUFFER; + if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == 'f' || c == 'F') { + c = BUFFER; + if (c == 'i' || c == 'I') { + c = BUFFER; + if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == 'i' || c == 'I') { + c = BUFFER; + if (c == 't' || c == 'T') { + c = BUFFER; + if (c == 'y' || c == 'Y') { + } else { + goto Done; + } + } else { + goto Done; + } + } else { + goto Done; + } + } else { + goto Done; + } + } else { + if (c != -1 && unget) { + unget(c, arg); + } + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } + } else { + goto Done; + } + } else { + goto Done; + } + } + BufferFloatingPointNumber: + enum { INTEGER, FRACTIONAL, SIGN, EXPONENT } state = INTEGER; + do { + bool isdecdigit = c >= '0' && c <= '9'; + bool ishexdigit = (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + bool ispoint = c == '.' || c == ','; + bool isdecexp = c == 'e' || c == 'E'; + bool ishexp = c == 'p' || c == 'P'; + bool issign = c == '+' || c == '-'; + + switch (state) { + case INTEGER: + case FRACTIONAL: + if (isdecdigit || (hexadecimal && ishexdigit)) { + goto Continue; + } else if (state == INTEGER && ispoint) { + state = FRACTIONAL; + goto Continue; + } else if (isdecexp || (hexadecimal && ishexp)) { + state = SIGN; + goto Continue; + } else goto Break; + case SIGN: + if (issign) { + state = EXPONENT; + goto Continue; + } + state = EXPONENT; + // fallthrough + case EXPONENT: + if (isdecdigit) { + goto Continue; + } else goto Break; + default: + goto Break; + } + Continue: + continue; + Break: + if (c != -1 && unget) { + unget(c, arg); + } + break; + } while ((c = BUFFER) != -1); + fp = strtod((char *)buf, NULL); + GotFloatingPointNumber: + if (!discard) { + ++items; + void *out = va_arg(va, void *); + if (charbytes == sizeof(char)) { + *(float *)out = (float)fp; + } else { + *(double *)out = (double)fp; + } + } + free(buf); + buf = NULL; + bufcur = bufsize = 0; + continue; ReportConsumed: n_ptr = va_arg(va, int *); *n_ptr = consumed - 1; // minus lookahead From 758cdb72daf192bdcfae01f23ceebcc2f0d43754 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:42:28 -0300 Subject: [PATCH 10/20] examples: add a parsefloat example Essentially an expanded version of the basic test case posted in GitHub issue #456. Also includes examples from documentation. References: https://github.com/jart/cosmopolitan/issues/456 https://en.cppreference.com/w/c/string/byte/strtof --- examples/parsefloat.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 examples/parsefloat.c diff --git a/examples/parsefloat.c b/examples/parsefloat.c new file mode 100644 index 00000000000..ea0fb87d156 --- /dev/null +++ b/examples/parsefloat.c @@ -0,0 +1,31 @@ +#include + +#define PARSE_AND_PRINT(type, scan_fmt, print_fmt, str) \ + do { \ + type val; int ret; \ + ret = sscanf(str, scan_fmt, &val); \ + printf("\"%s\" => " print_fmt " = %d\n", str, val, ret); \ + } while (0) + +int main() +{ + PARSE_AND_PRINT(float, "%f", "%f", "0.3715"); + PARSE_AND_PRINT(float, "%f", "%f", ".3715"); + PARSE_AND_PRINT(float, "%f", "%f", "3715"); + PARSE_AND_PRINT(float, "%f", "%f", "111.11"); + PARSE_AND_PRINT(float, "%f", "%f", "-2.22"); + PARSE_AND_PRINT(float, "%f", "%f", "Nan"); + PARSE_AND_PRINT(float, "%f", "%f", "nAn(2)"); + PARSE_AND_PRINT(float, "%f", "%f", "-NAN(_asdfZXCV1234_)"); + PARSE_AND_PRINT(float, "%f", "%f", "-nan"); + PARSE_AND_PRINT(float, "%f", "%f", "+nan"); + PARSE_AND_PRINT(float, "%f", "%f", "inF"); + PARSE_AND_PRINT(float, "%f", "%f", "iNfINiTy"); + PARSE_AND_PRINT(float, "%f", "%f", "+inf"); + PARSE_AND_PRINT(float, "%f", "%f", "-inf"); + PARSE_AND_PRINT(float, "%f", "%f", "0X1.BC70A3D70A3D7P+6"); + PARSE_AND_PRINT(float, "%f", "%f", "1.18973e+4932zzz"); + PARSE_AND_PRINT(float, "%f", "%.10f", " -0.0000000123junk"); + PARSE_AND_PRINT(float, "%f", "%f", "junk"); + return 0; +} From 90f19ed4485f67a9d9dc29155fbd2196988997e2 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 06:47:45 -0300 Subject: [PATCH 11/20] vcscanf: ensure buf is NULL after string decoding The presence of a pointer in buf could result in double free bugs when execution reaches the end of the function and it is freed. --- libc/stdio/vcscanf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 982b6752517..eff623019be 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -540,6 +540,7 @@ int __vcscanf(int callback(void *), // if (ismalloc) { *va_arg(va, char **) = (void *) buf; } + buf = NULL; } else { do { if (isspace(c)) break; From bb0d4c2c150de53bd2238c88a34ca7a682958aff Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 08:00:54 -0300 Subject: [PATCH 12/20] vcscanf: fix formatting errors Somehow I ended up introducing mixed tabs and spaces into the file. --- libc/stdio/vcscanf.c | 246 +++++++++++++++++++++---------------------- 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index eff623019be..7032ceb5166 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -103,7 +103,7 @@ int __vcscanf(int callback(void *), // break; case '%': { uint128_t number; - double fp; + double fp; unsigned width = 0; unsigned char bits = 32; unsigned char charbytes = sizeof(char); @@ -336,145 +336,145 @@ int __vcscanf(int callback(void *), // if (c == '+' || c == '-') { c = BUFFER; } - bool hexadecimal = false; + bool hexadecimal = false; if (c == '0') { c = BUFFER; if (c == 'x' || c == 'X') { c = BUFFER; - hexadecimal = true; - goto BufferFloatingPointNumber; + hexadecimal = true; + goto BufferFloatingPointNumber; } else if (c == -1) { - fp = strtod((char *) buf, NULL); - goto GotFloatingPointNumber; + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; } else { - goto BufferFloatingPointNumber; - } + goto BufferFloatingPointNumber; + } } else if (c == 'n' || c == 'N') { - c = BUFFER; - if (c == 'a' || c == 'A') { - c = BUFFER; - if (c == 'n' || c == 'N') { - c = BUFFER; - if (c == '(') { - c = BUFFER; - do { - bool isdigit = c >= '0' && c <= '9'; - bool isletter = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - if (!(c == '_' || isdigit || isletter)) { - goto Done; - } - } while ((c = BUFFER) != -1 && c != ')'); - fp = strtod((char *) buf, NULL); - goto GotFloatingPointNumber; - } else { - fp = strtod((char *) buf, NULL); - goto GotFloatingPointNumber; - } - } else { - goto Done; - } - } else { - goto Done; - } + c = BUFFER; + if (c == 'a' || c == 'A') { + c = BUFFER; + if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == '(') { + c = BUFFER; + do { + bool isdigit = c >= '0' && c <= '9'; + bool isletter = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + if (!(c == '_' || isdigit || isletter)) { + goto Done; + } + } while ((c = BUFFER) != -1 && c != ')'); + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } else { + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } + } else { + goto Done; + } + } else { + goto Done; + } } else if (c == 'i' || c == 'I') { - c = BUFFER; - if (c == 'n' || c == 'N') { - c = BUFFER; - if (c == 'f' || c == 'F') { - c = BUFFER; - if (c == 'i' || c == 'I') { - c = BUFFER; - if (c == 'n' || c == 'N') { - c = BUFFER; - if (c == 'i' || c == 'I') { - c = BUFFER; - if (c == 't' || c == 'T') { - c = BUFFER; - if (c == 'y' || c == 'Y') { + c = BUFFER; + if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == 'f' || c == 'F') { + c = BUFFER; + if (c == 'i' || c == 'I') { + c = BUFFER; + if (c == 'n' || c == 'N') { + c = BUFFER; + if (c == 'i' || c == 'I') { + c = BUFFER; + if (c == 't' || c == 'T') { + c = BUFFER; + if (c == 'y' || c == 'Y') { } else { - goto Done; - } + goto Done; + } } else { - goto Done; - } + goto Done; + } } else { - goto Done; - } + goto Done; + } } else { - goto Done; - } - } else { - if (c != -1 && unget) { - unget(c, arg); - } - fp = strtod((char *) buf, NULL); - goto GotFloatingPointNumber; - } - } else { - goto Done; - } - } else { - goto Done; - } - } - BufferFloatingPointNumber: - enum { INTEGER, FRACTIONAL, SIGN, EXPONENT } state = INTEGER; - do { - bool isdecdigit = c >= '0' && c <= '9'; - bool ishexdigit = (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); - bool ispoint = c == '.' || c == ','; - bool isdecexp = c == 'e' || c == 'E'; - bool ishexp = c == 'p' || c == 'P'; - bool issign = c == '+' || c == '-'; + goto Done; + } + } else { + if (c != -1 && unget) { + unget(c, arg); + } + fp = strtod((char *) buf, NULL); + goto GotFloatingPointNumber; + } + } else { + goto Done; + } + } else { + goto Done; + } + } + BufferFloatingPointNumber: + enum { INTEGER, FRACTIONAL, SIGN, EXPONENT } state = INTEGER; + do { + bool isdecdigit = c >= '0' && c <= '9'; + bool ishexdigit = (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + bool ispoint = c == '.' || c == ','; + bool isdecexp = c == 'e' || c == 'E'; + bool ishexp = c == 'p' || c == 'P'; + bool issign = c == '+' || c == '-'; switch (state) { - case INTEGER: - case FRACTIONAL: - if (isdecdigit || (hexadecimal && ishexdigit)) { - goto Continue; - } else if (state == INTEGER && ispoint) { - state = FRACTIONAL; - goto Continue; - } else if (isdecexp || (hexadecimal && ishexp)) { - state = SIGN; - goto Continue; - } else goto Break; - case SIGN: - if (issign) { - state = EXPONENT; - goto Continue; - } - state = EXPONENT; - // fallthrough - case EXPONENT: - if (isdecdigit) { - goto Continue; - } else goto Break; - default: - goto Break; + case INTEGER: + case FRACTIONAL: + if (isdecdigit || (hexadecimal && ishexdigit)) { + goto Continue; + } else if (state == INTEGER && ispoint) { + state = FRACTIONAL; + goto Continue; + } else if (isdecexp || (hexadecimal && ishexp)) { + state = SIGN; + goto Continue; + } else goto Break; + case SIGN: + if (issign) { + state = EXPONENT; + goto Continue; + } + state = EXPONENT; + // fallthrough + case EXPONENT: + if (isdecdigit) { + goto Continue; + } else goto Break; + default: + goto Break; } - Continue: - continue; - Break: - if (c != -1 && unget) { - unget(c, arg); - } - break; + Continue: + continue; + Break: + if (c != -1 && unget) { + unget(c, arg); + } + break; } while ((c = BUFFER) != -1); - fp = strtod((char *)buf, NULL); + fp = strtod((char *)buf, NULL); GotFloatingPointNumber: - if (!discard) { - ++items; + if (!discard) { + ++items; void *out = va_arg(va, void *); - if (charbytes == sizeof(char)) { - *(float *)out = (float)fp; - } else { - *(double *)out = (double)fp; - } - } - free(buf); - buf = NULL; - bufcur = bufsize = 0; + if (charbytes == sizeof(char)) { + *(float *)out = (float)fp; + } else { + *(double *)out = (double)fp; + } + } + free(buf); + buf = NULL; + bufcur = bufsize = 0; continue; ReportConsumed: n_ptr = va_arg(va, int *); @@ -540,7 +540,7 @@ int __vcscanf(int callback(void *), // if (ismalloc) { *va_arg(va, char **) = (void *) buf; } - buf = NULL; + buf = NULL; } else { do { if (isspace(c)) break; From cc79f6233d6625c811b63dd2ad19735398dae3fd Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 08:03:16 -0300 Subject: [PATCH 13/20] vcscanf: add curly braces around else clause Makes the goto statement easier to read. Also consistent with the rest of the codebase. --- libc/stdio/vcscanf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 7032ceb5166..62a2545b3a1 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -438,7 +438,9 @@ int __vcscanf(int callback(void *), // } else if (isdecexp || (hexadecimal && ishexp)) { state = SIGN; goto Continue; - } else goto Break; + } else { + goto Break; + } case SIGN: if (issign) { state = EXPONENT; From c38aec9949ef1989f87bcbc6820bd3966dd90980 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 19:30:06 -0300 Subject: [PATCH 14/20] test: add sscanf floating point parser tests Test that floating point numbers are correctly parsed from strings. --- test/libc/stdio/sscanf_test.c | 109 ++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/test/libc/stdio/sscanf_test.c b/test/libc/stdio/sscanf_test.c index e1408c09a3e..411f176b11f 100644 --- a/test/libc/stdio/sscanf_test.c +++ b/test/libc/stdio/sscanf_test.c @@ -21,6 +21,7 @@ #include "libc/intrin/bits.h" #include "libc/inttypes.h" #include "libc/limits.h" +#include "libc/math.h" #include "libc/mem/mem.h" #include "libc/runtime/runtime.h" #include "libc/stdio/internal.h" @@ -328,6 +329,114 @@ TEST(sscanf, flexdecimal_hex) { EXPECT_EQ(666, y); } +TEST(sscanf, floating_point_simple) { + float x = 666.666f, y = x, z = y; + EXPECT_EQ(3, sscanf("0.3715 .3715 3715", "%f %f %f", &x, &y, &z)); + EXPECT_EQ(0.3715f, x); + EXPECT_EQ(0.3715f, y); + EXPECT_EQ(3715.0f, z); +} + +TEST(sscanf, floating_point_simple_double_precision) { + double x = 666.666, y = x, z = y; + EXPECT_EQ(3, sscanf("0.3715 .3715 3715", "%lf %lf %lf", &x, &y, &z)); + EXPECT_EQ(0.3715, x); + EXPECT_EQ(0.3715, y); + EXPECT_EQ(3715.0, z); +} + +TEST(sscanf, floating_point_nan) { + float a = 666.666f, b = a, c = b, d = c, e = d, f = e; + EXPECT_EQ(4, sscanf("nan -NAN nAn NaN", "%f %f %f %f", &a, &b, &c, &d)); + EXPECT_EQ(2, sscanf("nan(2) -NaN(_ABCDzxcv1234_)", "%f %f", &e, &f)); + EXPECT_TRUE(isnan(a)); + EXPECT_TRUE(isnan(b)); + EXPECT_TRUE(isnan(c)); + EXPECT_TRUE(isnan(d)); + EXPECT_TRUE(isnan(e)); + EXPECT_TRUE(isnan(f)); +} + +TEST(sscanf, floating_point_nan_double_precision) { + double a = 666.666, b = a, c = b, d = c, e = d, f = e; + EXPECT_EQ(4, sscanf("nan -NAN nAn NaN", "%lf %lf %lf %lf", &a, &b, &c, &d)); + EXPECT_EQ(2, sscanf("nan(2) -NAN(_ABCDzxcv1234_)", "%lf %lf", &e, &f)); + EXPECT_TRUE(isnan(a)); + EXPECT_TRUE(isnan(b)); + EXPECT_TRUE(isnan(c)); + EXPECT_TRUE(isnan(d)); + EXPECT_TRUE(isnan(e)); + EXPECT_TRUE(isnan(f)); +} + +TEST(sscanf, floating_point_infinity) { + float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f; + EXPECT_EQ(4, sscanf("inf +INF -iNf InF", "%f %f %f %f", &a, &b, &c, &d)); + EXPECT_EQ(3, sscanf("+infinity -INFINITY iNfInItY", "%f %f %f", &e, &f, &g)); + EXPECT_TRUE(isinf(a)); + EXPECT_TRUE(isinf(b)); + EXPECT_TRUE(isinf(c)); + EXPECT_TRUE(isinf(d)); + EXPECT_TRUE(isinf(e)); + EXPECT_TRUE(isinf(f)); + EXPECT_TRUE(isinf(g)); +} + +TEST(sscanf, floating_point_infinity_double_precision) { + double a = 666.666, b = a, c = b, d = c, e = d, f = e, g = f; + EXPECT_EQ(4, sscanf("inf +INF -iNf InF", "%lf %lf %lf %lf", &a, &b, &c, &d)); + EXPECT_EQ(3, sscanf("+infinity -INFINITY iNfInItY", "%lf %lf %lf", &e, &f, &g)); + EXPECT_TRUE(isinf(a)); + EXPECT_TRUE(isinf(b)); + EXPECT_TRUE(isinf(c)); + EXPECT_TRUE(isinf(d)); + EXPECT_TRUE(isinf(e)); + EXPECT_TRUE(isinf(f)); + EXPECT_TRUE(isinf(g)); +} + +TEST(sscanf, floating_point_documentation_examples) { + float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h; + + EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b)); + EXPECT_EQ(2, sscanf("Nan nan(2)", "%f %f", &c, &d)); + EXPECT_EQ(1, sscanf("inF", "%f", &e)); + EXPECT_EQ(2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz", "%f %f", &f, &g)); + EXPECT_EQ(1, sscanf("-0.0000000123junk", "%f", &h)); + EXPECT_EQ(1, sscanf("junk", "%f", &i)); + + EXPECT_EQ(111.11f, a); + EXPECT_EQ(-2.22f, b); + EXPECT_TRUE(isnan(c)); + EXPECT_TRUE(isnan(d)); + EXPECT_TRUE(isinf(e)); + EXPECT_EQ(0X1.BC70A3D70A3D7P+6f, f); + EXPECT_TRUE(isinf(g)); + EXPECT_EQ(-0.0000000123f, h); + EXPECT_EQ(.0f, i); +} + +TEST(sscanf, floating_point_documentation_examples_double_precision) { + double a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h; + + EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b)); + EXPECT_EQ(2, sscanf("Nan nan(2)", "%lf %lf", &c, &d)); + EXPECT_EQ(1, sscanf("inF", "%lf", &e)); + EXPECT_EQ(2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz", "%lf %lf", &f, &g)); + EXPECT_EQ(1, sscanf("-0.0000000123junk", "%lf", &h)); + EXPECT_EQ(1, sscanf("junk", "%lf", &i)); + + EXPECT_EQ(111.11, a); + EXPECT_EQ(-2.22, b); + EXPECT_TRUE(isnan(c)); + EXPECT_TRUE(isnan(d)); + EXPECT_TRUE(isinf(e)); + EXPECT_EQ(0X1.BC70A3D70A3D7P+6, f); + EXPECT_TRUE(isinf(g)); + EXPECT_EQ(-0.0000000123, h); + EXPECT_EQ(.0, i); +} + TEST(sscanf, luplus) { long x = 666; EXPECT_EQ(1, sscanf("+123", "%lu", &x)); From ded97bf5afac6d7ea6398b0d458bbb9eabd18c7c Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 20:05:41 -0300 Subject: [PATCH 15/20] vcscanf: add curly braces around another else Makes the goto statement easier to read. Also consistent with the rest of the codebase. --- libc/stdio/vcscanf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 62a2545b3a1..d74b7006740 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -451,7 +451,9 @@ int __vcscanf(int callback(void *), // case EXPONENT: if (isdecdigit) { goto Continue; - } else goto Break; + } else { + goto Break; + } default: goto Break; } From c78ae967f8a5148b157efc67a130e8dc57c57198 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 20:34:04 -0300 Subject: [PATCH 16/20] vcscanf: buffer the last 'y' of "infinity" I don't know why I thought this wasn't necessary. --- libc/stdio/vcscanf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index d74b7006740..b2bcc1b9c86 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -391,6 +391,7 @@ int __vcscanf(int callback(void *), // if (c == 't' || c == 'T') { c = BUFFER; if (c == 'y' || c == 'Y') { + c = BUFFER; } else { goto Done; } From 2daad6ede8edcccfb6876dc100b4d736e191002d Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 20:36:58 -0300 Subject: [PATCH 17/20] vcscanf: buffer the ')' of "nan(whatever)" For some reason I thought the do while (c = BUFFER ...) loop already did it for me. Apparently not! --- libc/stdio/vcscanf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index b2bcc1b9c86..ec88a0e0baf 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -364,6 +364,9 @@ int __vcscanf(int callback(void *), // goto Done; } } while ((c = BUFFER) != -1 && c != ')'); + if (c == ')') { + c = BUFFER; + } fp = strtod((char *) buf, NULL); goto GotFloatingPointNumber; } else { From 64d2b12639fc34aea98ac2517b5f20eb19ecf676 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Thu, 2 Nov 2023 20:47:48 -0300 Subject: [PATCH 18/20] test: parse more floats at once I fixed a bug somewhat related to this. Make sure it doesn't come back. --- test/libc/stdio/sscanf_test.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/test/libc/stdio/sscanf_test.c b/test/libc/stdio/sscanf_test.c index 411f176b11f..94b3e772927 100644 --- a/test/libc/stdio/sscanf_test.c +++ b/test/libc/stdio/sscanf_test.c @@ -396,14 +396,11 @@ TEST(sscanf, floating_point_infinity_double_precision) { } TEST(sscanf, floating_point_documentation_examples) { - float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h; + float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h, j = i; EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b)); - EXPECT_EQ(2, sscanf("Nan nan(2)", "%f %f", &c, &d)); - EXPECT_EQ(1, sscanf("inF", "%f", &e)); - EXPECT_EQ(2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz", "%f %f", &f, &g)); - EXPECT_EQ(1, sscanf("-0.0000000123junk", "%f", &h)); - EXPECT_EQ(1, sscanf("junk", "%f", &i)); + EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%f %f %f", &c, &d, &e)); + EXPECT_EQ(5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk", "%f %f %f %f %f", &f, &g, &h, &i, &j)); EXPECT_EQ(111.11f, a); EXPECT_EQ(-2.22f, b); @@ -414,17 +411,15 @@ TEST(sscanf, floating_point_documentation_examples) { EXPECT_TRUE(isinf(g)); EXPECT_EQ(-0.0000000123f, h); EXPECT_EQ(.0f, i); + EXPECT_EQ(.0f, j); } TEST(sscanf, floating_point_documentation_examples_double_precision) { - double a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h; + double a = 666.666, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h, j = i; EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b)); - EXPECT_EQ(2, sscanf("Nan nan(2)", "%lf %lf", &c, &d)); - EXPECT_EQ(1, sscanf("inF", "%lf", &e)); - EXPECT_EQ(2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz", "%lf %lf", &f, &g)); - EXPECT_EQ(1, sscanf("-0.0000000123junk", "%lf", &h)); - EXPECT_EQ(1, sscanf("junk", "%lf", &i)); + EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%lf %lf %lf", &c, &d, &e)); + EXPECT_EQ(5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk", "%lf %lf %lf %lf %lf", &f, &g, &h, &i, &j)); EXPECT_EQ(111.11, a); EXPECT_EQ(-2.22, b); @@ -435,6 +430,7 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) { EXPECT_TRUE(isinf(g)); EXPECT_EQ(-0.0000000123, h); EXPECT_EQ(.0, i); + EXPECT_EQ(.0, j); } TEST(sscanf, luplus) { From 8f12a061ad03225180a8027bfdaa6ada076d1850 Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Fri, 3 Nov 2023 23:01:10 -0300 Subject: [PATCH 19/20] vcscanf: remove strtod call repetition Place it after the GotFloatingPointNumber label where it belongs. --- libc/stdio/vcscanf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index ec88a0e0baf..3e26bc9790d 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -344,7 +344,6 @@ int __vcscanf(int callback(void *), // hexadecimal = true; goto BufferFloatingPointNumber; } else if (c == -1) { - fp = strtod((char *) buf, NULL); goto GotFloatingPointNumber; } else { goto BufferFloatingPointNumber; @@ -367,10 +366,8 @@ int __vcscanf(int callback(void *), // if (c == ')') { c = BUFFER; } - fp = strtod((char *) buf, NULL); goto GotFloatingPointNumber; } else { - fp = strtod((char *) buf, NULL); goto GotFloatingPointNumber; } } else { @@ -411,7 +408,6 @@ int __vcscanf(int callback(void *), // if (c != -1 && unget) { unget(c, arg); } - fp = strtod((char *) buf, NULL); goto GotFloatingPointNumber; } } else { @@ -469,8 +465,8 @@ int __vcscanf(int callback(void *), // } break; } while ((c = BUFFER) != -1); - fp = strtod((char *)buf, NULL); GotFloatingPointNumber: + fp = strtod((char *)buf, NULL); if (!discard) { ++items; void *out = va_arg(va, void *); From 850ab9536956140c541f5a4683dbb1c67a3a572e Mon Sep 17 00:00:00 2001 From: Matheus Afonso Martins Moreira Date: Sun, 12 Nov 2023 23:01:14 -0300 Subject: [PATCH 20/20] vcscanf: refactor buffer usage Create a dedicated buffer for floating point parsing purposes and restore the original string decoding buffer just as it was. This fixes the segmentation fault in the tinylinux mode tests. I suspect the floating point buffer was ending up in the free list somehow which resulted in a double free when the function exited. --- libc/stdio/vcscanf.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/libc/stdio/vcscanf.c b/libc/stdio/vcscanf.c index 3e26bc9790d..686623aa0c0 100644 --- a/libc/stdio/vcscanf.c +++ b/libc/stdio/vcscanf.c @@ -35,17 +35,17 @@ c; \ }) -#define BUFFER_GROW 48 +#define FP_BUFFER_GROW 48 #define BUFFER \ ({ \ int c = READ; \ - if (bufcur >= bufsize - 1) { \ - bufsize = bufsize + BUFFER_GROW; \ - buf = realloc(buf, bufsize); \ + if (fpbufcur >= fpbufsize - 1) { \ + fpbufsize = fpbufsize + FP_BUFFER_GROW; \ + fpbuf = realloc(fpbuf, fpbufsize); \ } \ if (c != -1) { \ - buf[bufcur++] = c; \ - buf[bufcur] = '\0'; \ + fpbuf[fpbufcur++] = c; \ + fpbuf[fpbufcur] = '\0'; \ } \ c; \ }) @@ -76,9 +76,9 @@ int __vcscanf(int callback(void *), // struct FreeMe *next; void *ptr; } *freeme = NULL; - unsigned char *buf = NULL; - size_t bufsize; - size_t bufcur; + unsigned char *fpbuf = NULL; + size_t fpbufsize; + size_t fpbufcur; const unsigned char *p = (const unsigned char *)fmt; int *n_ptr; int items = 0; @@ -103,6 +103,8 @@ int __vcscanf(int callback(void *), // break; case '%': { uint128_t number; + unsigned char *buf; + size_t bufsize; double fp; unsigned width = 0; unsigned char bits = 32; @@ -241,11 +243,11 @@ int __vcscanf(int callback(void *), // while (isspace(c)) { c = READ; } - bufsize = BUFFER_GROW; - buf = malloc(bufsize); - bufcur = 0; - buf[bufcur++] = c; - buf[bufcur] = '\0'; + fpbufsize = FP_BUFFER_GROW; + fpbuf = malloc(fpbufsize); + fpbufcur = 0; + fpbuf[fpbufcur++] = c; + fpbuf[fpbufcur] = '\0'; goto ConsumeFloatingPointNumber; default: items = einval(); @@ -466,7 +468,7 @@ int __vcscanf(int callback(void *), // break; } while ((c = BUFFER) != -1); GotFloatingPointNumber: - fp = strtod((char *)buf, NULL); + fp = strtod((char *)fpbuf, NULL); if (!discard) { ++items; void *out = va_arg(va, void *); @@ -476,9 +478,9 @@ int __vcscanf(int callback(void *), // *(double *)out = (double)fp; } } - free(buf); - buf = NULL; - bufcur = bufsize = 0; + free(fpbuf); + fpbuf = NULL; + fpbufcur = fpbufsize = 0; continue; ReportConsumed: n_ptr = va_arg(va, int *); @@ -565,6 +567,6 @@ int __vcscanf(int callback(void *), // if (items == -1) free(entry->ptr); free(entry); } - if (buf) free(buf); + if (fpbuf) free(fpbuf); return items; }