From 7faaac612a42f5ea4113b3c833fe5dfff386dc88 Mon Sep 17 00:00:00 2001 From: Joshua James Venter Date: Wed, 10 Jul 2024 17:49:51 +0200 Subject: [PATCH] atol bug-fix for leading underscores - Support leading underscores (bug fix) for prefixed literal - Added relevant tests Signed-off-by: Joshua James Venter --- docs/changelog.md | 7 +++++ stdlib/src/builtin/string.mojo | 12 +++++--- stdlib/test/builtin/test_string.mojo | 41 ++++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 3cb11dc845..6c2e95c5ea 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -471,6 +471,13 @@ future and `StringSlice.__len__` now does return the Unicode codepoints length. - `SIMD.load/store` now supports `UnsafePointer` overloads. +- The `atol` function now correctly supports leading underscores, + (e.g.`atol("0x_ff", 0)`), when the appropriate base is specified or inferred + (base 0). non-base-10 integer literals as per Python's [Integer Literals](\ + ). + ([PR #3180](https://github.com/modularml/mojo/pull/3180) + by [@jjvraw](https://github.com/jjvraw)) + ### ❌ Removed - It is no longer possible to cast (implicitly or explicitly) from `Reference` diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo index 5483d65ca0..e3a3c9b353 100644 --- a/stdlib/src/builtin/string.mojo +++ b/stdlib/src/builtin/string.mojo @@ -228,6 +228,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: var ord_letter_max = (-1, -1) var result = 0 var is_negative: Bool = False + var has_prefix: Bool = False var start: Int = 0 var str_len = len(str_ref) var buff = str_ref.unsafe_ptr() @@ -250,14 +251,17 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: str_ref[start + 1] == "b" or str_ref[start + 1] == "B" ): start += 2 + has_prefix = True elif base == 8 and ( str_ref[start + 1] == "o" or str_ref[start + 1] == "O" ): start += 2 + has_prefix = True elif base == 16 and ( str_ref[start + 1] == "x" or str_ref[start + 1] == "X" ): start += 2 + has_prefix = True alias ord_0 = ord("0") # FIXME: @@ -269,6 +273,7 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: var real_base_new_start = _identify_base(str_ref, start) real_base = real_base_new_start[0] start = real_base_new_start[1] + has_prefix = real_base != 10 if real_base == -1: raise Error(_atol_error(base, str_ref)) else: @@ -285,10 +290,9 @@ fn _atol(str_ref: StringRef, base: Int = 10) raises -> Int: var found_valid_chars_after_start = False var has_space_after_number = False - # single underscores are only allowed between digits - # starting "was_last_digit_undescore" to true such that - # if the first digit is an undesrcore an error is raised - var was_last_digit_undescore = True + # Prefixed integer literals with real_base 2, 8, 16 may begin with leading + # underscores under the conditions they have a prefix + var was_last_digit_undescore = not (real_base in (2, 8, 16) and has_prefix) for pos in range(start, str_len): var ord_current = int(buff[pos]) if ord_current == ord_underscore: diff --git a/stdlib/test/builtin/test_string.mojo b/stdlib/test/builtin/test_string.mojo index 1a0fe0c470..025db4956f 100644 --- a/stdlib/test/builtin/test_string.mojo +++ b/stdlib/test/builtin/test_string.mojo @@ -365,6 +365,9 @@ def test_atol(): assert_equal(10, atol("0o12", 8)) assert_equal(10, atol("0O12", 8)) assert_equal(35, atol("Z", 36)) + assert_equal(255, atol("0x_00_ff", 16)) + assert_equal(18, atol("0b0001_0010", 2)) + assert_equal(18, atol("0b_000_1001_0", 2)) # Negative cases with assert_raises( @@ -398,12 +401,37 @@ def test_atol(): ): _ = atol("5", 5) + with assert_raises( + contains="String is not convertible to integer with base 10: '0x_ff'" + ): + _ = atol("0x_ff") + + with assert_raises( + contains="String is not convertible to integer with base 3: '_12'" + ): + _ = atol("_12", 3) + with assert_raises(contains="Base must be >= 2 and <= 36, or 0."): _ = atol("0", 1) with assert_raises(contains="Base must be >= 2 and <= 36, or 0."): _ = atol("0", 37) + with assert_raises( + contains="String is not convertible to integer with base 16: '_ff'" + ): + _ = atol("_ff", base=16) + + with assert_raises( + contains="String is not convertible to integer with base 2: ' _01'" + ): + _ = atol(" _01", base=2) + + with assert_raises( + contains="String is not convertible to integer with base 10: '0x_ff'" + ): + _ = atol("0x_ff") + with assert_raises( contains="String is not convertible to integer with base 10: ''" ): @@ -433,6 +461,14 @@ def test_atol_base_0(): assert_equal(0, atol("0X0", base=0)) + assert_equal(255, atol("0x_00_ff", base=0)) + + assert_equal(18, atol("0b_0001_0010", base=0)) + assert_equal(18, atol("0b000_1001_0", base=0)) + + assert_equal(10, atol("0o_000_12", base=0)) + assert_equal(10, atol("0o00_12", base=0)) + with assert_raises( contains="String is not convertible to integer with base 0: ' 0x'" ): @@ -453,11 +489,6 @@ def test_atol_base_0(): ): _ = atol("0r100", base=0) - with assert_raises( - contains="String is not convertible to integer with base 0: '0b_0'" - ): - _ = atol("0b_0", base=0) - with assert_raises( contains="String is not convertible to integer with base 0: '0xf__f'" ):