From 5e0085b0b90abea029268a8cf2872cee010372cf Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Wed, 5 Jun 2024 15:50:44 -0400
Subject: [PATCH 01/27] start fixing isspace and str len

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 install_id                         |   1 +
 stdlib/src/builtin/string.mojo     | 120 +++++++++++++----------------
 stdlib/src/utils/string_slice.mojo |  42 ++++++++++
 3 files changed, 98 insertions(+), 65 deletions(-)
 create mode 100644 install_id

diff --git a/install_id b/install_id
new file mode 100644
index 0000000000..d9bca3b7f2
--- /dev/null
+++ b/install_id
@@ -0,0 +1 @@
+vj3s8KOW8jfIJiUFVSYOasUNvswv4KZ_oz0P2TsU_FU
\ No newline at end of file
diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 5972fe35ae..c3c336246c 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -705,7 +705,7 @@ struct _StringIter[
         self.length = length
         self.continuation_bytes = 0
         for i in range(length):
-            if _utf8_byte_type(int(unsafe_pointer[i])) == 1:
+            if _utf8_byte_type(unsafe_pointer[i]) == 1:
                 self.continuation_bytes += 1
 
     fn __iter__(self) -> Self:
@@ -716,7 +716,7 @@ struct _StringIter[
         if forward:
             var byte_len = 1
             if self.continuation_bytes > 0:
-                var byte_type = _utf8_byte_type(int(self.ptr[self.index]))
+                var byte_type = _utf8_byte_type(self.ptr[self.index])
                 if byte_type != 0:
                     byte_len = int(byte_type)
                     self.continuation_bytes -= byte_len - 1
@@ -728,11 +728,11 @@ struct _StringIter[
         else:
             var byte_len = 1
             if self.continuation_bytes > 0:
-                var byte_type = _utf8_byte_type(int(self.ptr[self.index - 1]))
+                var byte_type = _utf8_byte_type(self.ptr[self.index - 1])
                 if byte_type != 0:
                     while byte_type == 1:
                         byte_len += 1
-                        var b = int(self.ptr[self.index - byte_len])
+                        var b = self.ptr[self.index - byte_len]
                         byte_type = _utf8_byte_type(b)
                     self.continuation_bytes -= byte_len - 1
             self.index -= byte_len
@@ -1198,7 +1198,7 @@ struct String(
             An iterator of references to the string elements.
         """
         return _StringIter[__lifetime_of(self)](
-            unsafe_pointer=self.unsafe_ptr(), length=len(self)
+            unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
         )
 
     fn __reversed__(ref [_]self) -> _StringIter[__lifetime_of(self), False]:
@@ -1208,7 +1208,7 @@ struct String(
             A reversed iterator of references to the string elements.
         """
         return _StringIter[__lifetime_of(self), forward=False](
-            unsafe_pointer=self.unsafe_ptr(), length=len(self)
+            unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
         )
 
     # ===------------------------------------------------------------------=== #
@@ -1222,7 +1222,7 @@ struct String(
         Returns:
             True if the string length is greater than zero, and False otherwise.
         """
-        return len(self) > 0
+        return self.byte_length() > 0
 
     @always_inline
     fn __len__(self) -> Int:
@@ -1231,12 +1231,15 @@ struct String(
         Returns:
             The string byte length.
         """
-        # Avoid returning -1 if the buffer is not initialized
-        if not self.unsafe_ptr():
-            return 0
+        # TODO: everything uses this method assuming it's byte length
+        # var unicode_length = self.byte_length()
 
-        # The negative 1 is to account for the terminator.
-        return len(self._buffer) - 1
+        # for i in range(unicode_length):
+        #     if _utf8_byte_type(self._buffer[i]) == 1:
+        #         unicode_length -= 1
+
+        # return unicode_length
+        return self.byte_length()
 
     @always_inline
     fn __str__(self) -> String:
@@ -1267,6 +1270,19 @@ struct String(
     # Methods
     # ===------------------------------------------------------------------=== #
 
+    fn byte_length(self) -> Int:
+        """Returns the string byte length.
+
+        Returns:
+            The string byte length.
+        """
+        # Avoid returning -1 if the buffer is not initialized
+        if not self.unsafe_ptr():
+            return 0
+
+        # The negative 1 is to account for the terminator.
+        return len(self._buffer) - 1
+
     @always_inline
     fn _adjust_span(self, span: Slice) -> Slice:
         """Adjusts the span based on the string length."""
@@ -1552,49 +1568,22 @@ struct String(
         )
 
     fn isspace(self) -> Bool:
-        """Determines whether the given String is a python
-        whitespace String. This corresponds to Python's
+        """Determines whether every character in the given String is a
+        python whitespace String. This corresponds to Python's
         [universal separators](
             https://docs.python.org/3/library/stdtypes.html#str.splitlines)
         `" \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029"`.
 
         Returns:
-            True if the String is one of the whitespace characters
+            True if the whole String is made up of whitespace characters
                 listed above, otherwise False.
         """
-        # TODO add line and paragraph separator as stringliteral
-        # once unicode escape secuences are accepted
-        var next_line = List[UInt8](0xC2, 0x85)
-        """TODO: \\x85"""
-        var unicode_line_sep = List[UInt8](0xE2, 0x80, 0xA8)
-        """TODO: \\u2028"""
-        var unicode_paragraph_sep = List[UInt8](0xE2, 0x80, 0xA9)
-        """TODO: \\u2029"""
-
-        @always_inline
-        fn _compare(
-            item1: UnsafePointer[UInt8], item2: UnsafePointer[UInt8], amnt: Int
-        ) -> Bool:
-            var ptr1 = DTypePointer(item1)
-            var ptr2 = DTypePointer(item2)
-            return memcmp(ptr1, ptr2, amnt) == 0
 
         if len(self) == 0:
             return False
 
         for s in self:
-            var no_null_len = len(s)
-            var ptr = s.unsafe_ptr()
-            if no_null_len == 1 and not _isspace(ptr[0]):
-                return False
-            elif no_null_len == 2 and not _compare(
-                ptr, next_line.unsafe_ptr(), 2
-            ):
-                return False
-            elif no_null_len == 3 and not (
-                _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
-                or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
-            ):
+            if not s.isspace():
                 return False
         return True
 
@@ -1623,7 +1612,7 @@ struct String(
         """
         var output = List[String]()
 
-        var str_iter_len = len(self) - 1
+        var str_byte_len = len(self) - 1
         var lhs = 0
         var rhs = 0
         var items = 0
@@ -1631,7 +1620,7 @@ struct String(
         if sep_len == 0:
             raise Error("ValueError: empty separator")
 
-        while lhs <= str_iter_len:
+        while lhs <= str_byte_len:
             rhs = self.find(sep, lhs)
             if rhs == -1:
                 output.append(self[lhs:])
@@ -1650,12 +1639,13 @@ struct String(
             output.append("")
         return output
 
-    fn split(self, *, maxsplit: Int = -1) -> List[String]:
+    fn split(self, sep: NoneType = None, maxsplit: Int = -1) -> List[String]:
         """Split the string by every Whitespace separator.
 
         Currently only uses C style separators.
 
         Args:
+            sep: None.
             maxsplit: The maximum amount of items to split from String. Defaults
                 to unlimited.
 
@@ -1671,41 +1661,39 @@ struct String(
 
         # Splitting a string with leading, trailing, and middle whitespaces
         _ = String("      hello    world     ").split() # ["hello", "world"]
+        # Splitting adjacent universal newlines:
+        _ = String(
+            "hello \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029world"
+        ).split()  # ["hello", "world"]
         ```
         .
         """
-        # TODO: implement and document splitting adjacent universal newlines:
-        # _ = String(
-        #     "hello \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029world"
-        # ).split()  # ["hello", "world"]
 
         var output = List[String]()
 
-        var str_iter_len = len(self) - 1
+        var str_byte_len = len(self) - 1
         var lhs = 0
         var rhs = 0
         var items = 0
-        # FIXME: this should iterate and build unicode strings
-        # and use self.isspace()
-        while lhs <= str_iter_len:
+        for substr in self:
             # Python adds all "whitespace chars" as one separator
             # if no separator was specified
-            while lhs <= str_iter_len:
-                if not _isspace(self._buffer.unsafe_get(lhs)[]):
+            while lhs <= str_byte_len:
+                if not substr.isspace():
                     break
                 lhs += 1
             # if it went until the end of the String, then
             # it should be sliced up until the original
             # start of the whitespace which was already appended
-            if lhs - 1 == str_iter_len:
+            if lhs - 1 == str_byte_len:
                 break
-            elif lhs == str_iter_len:
+            elif lhs == str_byte_len:
                 # if the last char is not whitespace
-                output.append(self[str_iter_len])
+                output.append(self[str_byte_len])
                 break
             rhs = lhs + 1
-            while rhs <= str_iter_len:
-                if _isspace(self._buffer.unsafe_get(rhs)[]):
+            while rhs <= str_byte_len:
+                if substr.isspace():
                     break
                 rhs += 1
 
@@ -1860,9 +1848,10 @@ struct String(
         Returns:
             A copy of the string with no trailing whitespaces.
         """
-        # TODO: should use self.__iter__ and self.isspace()
         var r_idx = len(self)
-        while r_idx > 0 and _isspace(self._buffer.unsafe_get(r_idx - 1)[]):
+        for s in self.__reversed__():
+            if not s.isspace():
+                break
             r_idx -= 1
         return self[:r_idx]
 
@@ -1888,9 +1877,10 @@ struct String(
         Returns:
             A copy of the string with no leading whitespaces.
         """
-        # TODO: should use self.__iter__ and self.isspace()
         var l_idx = 0
-        while l_idx < len(self) and _isspace(self._buffer.unsafe_get(l_idx)[]):
+        for s in self:
+            if not s.isspace():
+                break
             l_idx += 1
         return self[l_idx:]
 
diff --git a/stdlib/src/utils/string_slice.mojo b/stdlib/src/utils/string_slice.mojo
index 6922066129..12298eb8d5 100644
--- a/stdlib/src/utils/string_slice.mojo
+++ b/stdlib/src/utils/string_slice.mojo
@@ -21,6 +21,7 @@ from utils import StringSlice
 """
 
 from utils import Span
+from builtin.string import _isspace
 
 
 struct StringSlice[
@@ -180,3 +181,44 @@ struct StringSlice[
         without the string getting deallocated early.
         """
         pass
+
+    fn isspace(self) -> Bool:
+        """Determines whether the given StringSlice is a python
+        whitespace String. This corresponds to Python's
+        [universal separators](
+            https://docs.python.org/3/library/stdtypes.html#str.splitlines)
+        `" \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029"`.
+
+        Returns:
+            True if the String is one of the whitespace characters
+                listed above, otherwise False.
+        """
+        # TODO add line and paragraph separator as stringliteral
+        # once unicode escape secuences are accepted
+        var next_line = List[UInt8](0xC2, 0x85)
+        """TODO: \\x85"""
+        var unicode_line_sep = List[UInt8](0xE2, 0x80, 0xA8)
+        """TODO: \\u2028"""
+        var unicode_paragraph_sep = List[UInt8](0xE2, 0x80, 0xA9)
+        """TODO: \\u2029"""
+
+        @always_inline
+        fn _compare(
+            item1: UnsafePointer[UInt8], item2: UnsafePointer[UInt8], amnt: Int
+        ) -> Bool:
+            var ptr1 = DTypePointer(item1)
+            var ptr2 = DTypePointer(item2)
+            return memcmp(ptr1, ptr2, amnt) == 0
+
+        var no_null_len = len(self)
+        var ptr = self.unsafe_ptr()
+        if no_null_len == 1 and not _isspace(ptr[0]):
+            return False
+        elif no_null_len == 2 and not _compare(ptr, next_line.unsafe_ptr(), 2):
+            return False
+        elif no_null_len == 3 and not (
+            _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
+            or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
+        ):
+            return False
+        return False

From 954d5ab8216f77632827969539f6e2d7b160f682 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Wed, 5 Jun 2024 16:04:08 -0400
Subject: [PATCH 02/27] remove damn install_id

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 install_id | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 install_id

diff --git a/install_id b/install_id
deleted file mode 100644
index d9bca3b7f2..0000000000
--- a/install_id
+++ /dev/null
@@ -1 +0,0 @@
-vj3s8KOW8jfIJiUFVSYOasUNvswv4KZ_oz0P2TsU_FU
\ No newline at end of file

From 7e8a33d2c6d867849669041ff52678c7da3742d8 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 09:55:14 -0400
Subject: [PATCH 03/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 .gitignore                         |  2 ++
 stdlib/src/builtin/string.mojo     | 34 ++++++++++++++++++------------
 stdlib/src/utils/string_slice.mojo | 13 ++++++------
 3 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7beaae8a60..c7d479dafc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,3 +19,5 @@ venv.bak/
 
 # MacOS
 .DS_Store
+
+install_id
\ No newline at end of file
diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index d2e6af8048..01eff2d3d6 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1656,17 +1656,17 @@ struct String(
 
         var output = List[String]()
 
-        var str_byte_len = len(self) - 1
+        var str_byte_len = self.byte_length() - 1
         var lhs = 0
         var rhs = 0
         var items = 0
-        for substr in self:
+        while lhs <= str_byte_len:
             # Python adds all "whitespace chars" as one separator
             # if no separator was specified
-            while lhs <= str_byte_len:
-                if not substr.isspace():
+            for s in self[lhs:]:
+                if not s.isspace():
                     break
-                lhs += 1
+                lhs += len(s)
             # if it went until the end of the String, then
             # it should be sliced up until the original
             # start of the whitespace which was already appended
@@ -1677,10 +1677,10 @@ struct String(
                 output.append(self[str_byte_len])
                 break
             rhs = lhs + 1
-            while rhs <= str_byte_len:
-                if substr.isspace():
+            for s in self[lhs + 1 :]:
+                if s.isspace():
                     break
-                rhs += 1
+                rhs += len(s)
 
             if maxsplit > -1:
                 if items == maxsplit:
@@ -1834,9 +1834,12 @@ struct String(
             A copy of the string with no trailing whitespaces.
         """
         var r_idx = len(self)
-        for s in self.__reversed__():
-            if not s.isspace():
-                break
+        # TODO: should use this once llvm intrinsics can be used at comp time
+        # for s in self.__reversed__():
+        #     if not s.isspace():
+        #         break
+        #     r_idx -= 1
+        while r_idx > 0 and _isspace(self._buffer.unsafe_get(r_idx - 1)[]):
             r_idx -= 1
         return self[:r_idx]
 
@@ -1863,9 +1866,12 @@ struct String(
             A copy of the string with no leading whitespaces.
         """
         var l_idx = 0
-        for s in self:
-            if not s.isspace():
-                break
+        # TODO: should use this once llvm intrinsics can be used at comp time
+        # for s in self:
+        #     if not s.isspace():
+        #         break
+        #     l_idx += 1
+        while l_idx < len(self) and _isspace(self._buffer.unsafe_get(l_idx)[]):
             l_idx += 1
         return self[l_idx:]
 
diff --git a/stdlib/src/utils/string_slice.mojo b/stdlib/src/utils/string_slice.mojo
index 12298eb8d5..54e7d097f0 100644
--- a/stdlib/src/utils/string_slice.mojo
+++ b/stdlib/src/utils/string_slice.mojo
@@ -212,13 +212,14 @@ struct StringSlice[
 
         var no_null_len = len(self)
         var ptr = self.unsafe_ptr()
-        if no_null_len == 1 and not _isspace(ptr[0]):
-            return False
-        elif no_null_len == 2 and not _compare(ptr, next_line.unsafe_ptr(), 2):
-            return False
-        elif no_null_len == 3 and not (
+        if no_null_len == 1 and _isspace(ptr[0]):
+            return True
+        elif no_null_len == 2 and _compare(ptr, next_line.unsafe_ptr(), 2):
+            return True
+        elif no_null_len == 3 and (
             _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
             or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
         ):
-            return False
+            return True
+        _ = next_line, unicode_line_sep, unicode_paragraph_sep
         return False

From 944d8516aba27f487fd55cd0a1c3d46ab5109ab4 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 10:09:26 -0400
Subject: [PATCH 04/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 01eff2d3d6..81b06db539 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1236,15 +1236,14 @@ struct String(
         Returns:
             The string byte length.
         """
-        # TODO: everything uses this method assuming it's byte length
-        # var unicode_length = self.byte_length()
+        var unicode_length = self.byte_length()
 
+        # TODO: everything uses this method assuming it's byte length
         # for i in range(unicode_length):
         #     if _utf8_byte_type(self._buffer[i]) == 1:
         #         unicode_length -= 1
 
-        # return unicode_length
-        return self.byte_length()
+        return unicode_length
 
     @always_inline
     fn __str__(self) -> String:

From 968a3e3d0594267145e5087606d92fdd3157489b Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 10:25:20 -0400
Subject: [PATCH 05/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 81b06db539..9ceeeb939e 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1563,7 +1563,7 @@ struct String(
                 listed above, otherwise False.
         """
 
-        if len(self) == 0:
+        if self.byte_length() == 0:
             return False
 
         for s in self:
@@ -1596,11 +1596,11 @@ struct String(
         """
         var output = List[String]()
 
-        var str_byte_len = len(self) - 1
+        var str_byte_len = self.byte_length() - 1
         var lhs = 0
         var rhs = 0
         var items = 0
-        var sep_len = len(sep)
+        var sep_len = sep.byte_length()
         if sep_len == 0:
             raise Error("ValueError: empty separator")
 

From 43d8b1005af6e0b8f74fcea90f19c305a1605c75 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 10:30:50 -0400
Subject: [PATCH 06/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/test/builtin/test_string.mojo | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/stdlib/test/builtin/test_string.mojo b/stdlib/test/builtin/test_string.mojo
index 5eb1b077c8..73c62e0dce 100644
--- a/stdlib/test/builtin/test_string.mojo
+++ b/stdlib/test/builtin/test_string.mojo
@@ -628,8 +628,31 @@ fn test_split() raises:
     assert_true(d[0] == "hello \t" and d[1] == "" and d[2] == "\v\fworld")
 
     # Should add all whitespace-like chars as one
-    alias utf8_spaces = String(" \t\n\r\v\f")
-    var s = utf8_spaces + "hello" + utf8_spaces + "world" + utf8_spaces
+    # test all unicode separators
+    # 0 is to build a String with null terminator
+    alias next_line = List[UInt8](0xC2, 0x85, 0)
+    """TODO: \\x85"""
+    alias unicode_line_sep = List[UInt8](0xE2, 0x80, 0xA8, 0)
+    """TODO: \\u2028"""
+    alias unicode_paragraph_sep = List[UInt8](0xE2, 0x80, 0xA9, 0)
+    """TODO: \\u2029"""
+    # TODO add line and paragraph separator as stringliteral once unicode
+    # escape secuences are accepted
+    var univ_sep_var = (
+        String(" ")
+        + String("\t")
+        + String("\n")
+        + String("\r")
+        + String("\v")
+        + String("\f")
+        + String("\x1c")
+        + String("\x1d")
+        + String("\x1e")
+        + String(next_line)
+        + String(unicode_line_sep)
+        + String(unicode_paragraph_sep)
+    )
+    var s = univ_sep_var + "hello" + univ_sep_var + "world" + univ_sep_var
     d = s.split()
     assert_true(len(d) == 2)
     assert_true(d[0] == "hello" and d[1] == "world")

From b1f305539c8a4caba9b16947d6e89024f0877cad Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 19:04:29 -0400
Subject: [PATCH 07/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo     | 2 +-
 stdlib/src/utils/string_slice.mojo | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 9ceeeb939e..832673e9c3 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1556,7 +1556,7 @@ struct String(
         python whitespace String. This corresponds to Python's
         [universal separators](
             https://docs.python.org/3/library/stdtypes.html#str.splitlines)
-        `" \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029"`.
+        `" \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029"`.
 
         Returns:
             True if the whole String is made up of whitespace characters
diff --git a/stdlib/src/utils/string_slice.mojo b/stdlib/src/utils/string_slice.mojo
index 54e7d097f0..9254ce63c5 100644
--- a/stdlib/src/utils/string_slice.mojo
+++ b/stdlib/src/utils/string_slice.mojo
@@ -187,7 +187,7 @@ struct StringSlice[
         whitespace String. This corresponds to Python's
         [universal separators](
             https://docs.python.org/3/library/stdtypes.html#str.splitlines)
-        `" \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029"`.
+        `" \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029"`.
 
         Returns:
             True if the String is one of the whitespace characters

From 8e5c0ca9aa72aba8cdc22618349f54f096e2cc62 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 19:07:10 -0400
Subject: [PATCH 08/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 832673e9c3..76f2d42fb3 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1626,8 +1626,6 @@ struct String(
     fn split(self, sep: NoneType = None, maxsplit: Int = -1) -> List[String]:
         """Split the string by every Whitespace separator.
 
-        Currently only uses C style separators.
-
         Args:
             sep: None.
             maxsplit: The maximum amount of items to split from String. Defaults

From cbe54cdf51bd28a4effd28f0c3613dff5a4c1a92 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 19:07:45 -0400
Subject: [PATCH 09/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 76f2d42fb3..6b59db0964 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1651,8 +1651,8 @@ struct String(
         .
         """
 
+        _ = sep
         var output = List[String]()
-
         var str_byte_len = self.byte_length() - 1
         var lhs = 0
         var rhs = 0

From 6e7754f56351000ab21cb8af15efda204e62a5c0 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 11 Jun 2024 20:37:41 -0400
Subject: [PATCH 10/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 6b59db0964..efdbbf120b 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1645,7 +1645,7 @@ struct String(
         _ = String("      hello    world     ").split() # ["hello", "world"]
         # Splitting adjacent universal newlines:
         _ = String(
-            "hello \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029world"
+            "hello \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029world"
         ).split()  # ["hello", "world"]
         ```
         .

From 64645eecf0a16efeab5cb61fffa7f555d94820aa Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Wed, 12 Jun 2024 11:02:19 -0400
Subject: [PATCH 11/27] fix details

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index efdbbf120b..50ad8c0c85 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1275,10 +1275,10 @@ struct String(
     # ===------------------------------------------------------------------=== #
 
     fn byte_length(self) -> Int:
-        """Returns the string byte length.
+        """Returns the string byte length without null terminator.
 
         Returns:
-            The string byte length.
+            The string byte length without null terminator.
         """
         # Avoid returning -1 if the buffer is not initialized
         if not self.unsafe_ptr():

From 2fd95ceabe4e8cbdf03a4991835f77d946b243ca Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 2 Jul 2024 11:54:47 -0400
Subject: [PATCH 12/27] fix suggestions

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 .gitignore                     | 2 --
 stdlib/src/builtin/string.mojo | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index c7d479dafc..7beaae8a60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,5 +19,3 @@ venv.bak/
 
 # MacOS
 .DS_Store
-
-install_id
\ No newline at end of file
diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index f42240ff89..b799c4cd6c 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1056,8 +1056,9 @@ struct String(
 
         ```mojo
         var string = String.format_sequence(1, ", ", 2.0, ", ", "three")
-
-        assert_equal(string, "1, 2.0, three")
+        print(string) # "1, 2.0, three"
+        %# from testing import assert_equal
+        %# assert_equal(string, "1, 2.0, three")
         ```
         .
         """
@@ -1780,7 +1781,6 @@ struct String(
         .
         """
 
-        _ = sep
         var output = List[String]()
         var str_byte_len = self.byte_length() - 1
         var lhs = 0

From 2cb088066a535923879aa9ef3cf07b954d6ce4dc Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 2 Jul 2024 11:58:36 -0400
Subject: [PATCH 13/27] add llvm intrinsics issue #933

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index c690f79814..a4f80f725d 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1981,7 +1981,7 @@ struct String(
             A copy of the string with no trailing whitespaces.
         """
         var r_idx = len(self)
-        # TODO: should use this once llvm intrinsics can be used at comp time
+        # TODO (#933): should use this once llvm intrinsics can be used at comp time
         # for s in self.__reversed__():
         #     if not s.isspace():
         #         break
@@ -2013,7 +2013,7 @@ struct String(
             A copy of the string with no leading whitespaces.
         """
         var l_idx = 0
-        # TODO: should use this once llvm intrinsics can be used at comp time
+        # TODO (#933): should use this once llvm intrinsics can be used at comp time
         # for s in self:
         #     if not s.isspace():
         #         break

From 051e6ebcfc64fb67f19684a358622834533c16d8 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 2 Jul 2024 12:04:46 -0400
Subject: [PATCH 14/27] fix detail

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index a4f80f725d..e374a130b6 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1074,8 +1074,6 @@ struct String(
         Construct a String from several `Formattable` arguments:
 
         ```mojo
-        from testing import assert_equal
-
         var string = String.format_sequence(1, ", ", 2.0, ", ", "three")
         print(string) # "1, 2.0, three"
         %# from testing import assert_equal

From b32daf3e4616e451099c2c166b240d5add96419c Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 14:46:37 -0400
Subject: [PATCH 15/27] move isspace and stringiter impl to stringslice

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo         | 136 ++++-----------------
 stdlib/src/builtin/string_literal.mojo |   9 +-
 stdlib/src/utils/string_slice.mojo     | 163 ++++++++++++++++++++-----
 3 files changed, 157 insertions(+), 151 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index e374a130b6..77be761554 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -25,6 +25,7 @@ from memory import DTypePointer, LegacyPointer, UnsafePointer, memcmp, memcpy
 
 from utils import Span, StaticIntTuple, StringRef, StringSlice
 from utils._format import Formattable, Formatter, ToFormatter
+from utils.string_slice import _StringSliceIter
 
 # ===----------------------------------------------------------------------=== #
 # ord
@@ -69,11 +70,11 @@ fn ord(s: StringSlice) -> Int:
     var p = s.unsafe_ptr().bitcast[UInt8]()
     var b1 = p[]
     if (b1 >> 7) == 0:  # This is 1 byte ASCII char
-        debug_assert(s._byte_length() == 1, "input string length must be 1")
+        debug_assert(s.byte_length() == 1, "input string length must be 1")
         return int(b1)
     var num_bytes = countl_zero(~b1)
     debug_assert(
-        s._byte_length() == int(num_bytes), "input string must be one character"
+        s.byte_length() == int(num_bytes), "input string must be one character"
     )
     debug_assert(
         1 < int(num_bytes) < 5, "invalid UTF-8 byte " + str(b1) + " at index 0"
@@ -782,76 +783,6 @@ fn _utf8_byte_type(b: UInt8) -> UInt8:
     return countl_zero(~(b & 0b1111_0000))
 
 
-@value
-struct _StringIter[
-    is_mutable: Bool, //,
-    lifetime: AnyLifetime[is_mutable].type,
-    forward: Bool = True,
-]:
-    """Iterator for String.
-
-    Parameters:
-        is_mutable: Whether the slice is mutable.
-        lifetime: The lifetime of the underlying string data.
-        forward: The iteration direction. `False` is backwards.
-    """
-
-    var index: Int
-    var continuation_bytes: Int
-    var ptr: UnsafePointer[UInt8]
-    var length: Int
-
-    fn __init__(
-        inout self, *, unsafe_pointer: UnsafePointer[UInt8], length: Int
-    ):
-        self.index = 0 if forward else length
-        self.ptr = unsafe_pointer
-        self.length = length
-        self.continuation_bytes = 0
-        for i in range(length):
-            if _utf8_byte_type(unsafe_pointer[i]) == 1:
-                self.continuation_bytes += 1
-
-    fn __iter__(self) -> Self:
-        return self
-
-    fn __next__(inout self) -> StringSlice[lifetime]:
-        @parameter
-        if forward:
-            var byte_len = 1
-            if self.continuation_bytes > 0:
-                var byte_type = _utf8_byte_type(self.ptr[self.index])
-                if byte_type != 0:
-                    byte_len = int(byte_type)
-                    self.continuation_bytes -= byte_len - 1
-            self.index += byte_len
-            return StringSlice[lifetime](
-                unsafe_from_utf8_ptr=self.ptr + (self.index - byte_len),
-                len=byte_len,
-            )
-        else:
-            var byte_len = 1
-            if self.continuation_bytes > 0:
-                var byte_type = _utf8_byte_type(self.ptr[self.index - 1])
-                if byte_type != 0:
-                    while byte_type == 1:
-                        byte_len += 1
-                        var b = self.ptr[self.index - byte_len]
-                        byte_type = _utf8_byte_type(b)
-                    self.continuation_bytes -= byte_len - 1
-            self.index -= byte_len
-            return StringSlice[lifetime](
-                unsafe_from_utf8_ptr=self.ptr + self.index, len=byte_len
-            )
-
-    fn __len__(self) -> Int:
-        @parameter
-        if forward:
-            return self.length - self.index - self.continuation_bytes
-        else:
-            return self.index - self.continuation_bytes
-
-
 struct String(
     Sized,
     Stringable,
@@ -1314,23 +1245,25 @@ struct String(
             count=other_len + 1,
         )
 
-    fn __iter__(ref [_]self) -> _StringIter[__lifetime_of(self)]:
+    fn __iter__(ref [_]self) -> _StringSliceIter[__lifetime_of(self)]:
         """Iterate over elements of the string, returning immutable references.
 
         Returns:
             An iterator of references to the string elements.
         """
-        return _StringIter[__lifetime_of(self)](
+        return _StringSliceIter[__lifetime_of(self)](
             unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
         )
 
-    fn __reversed__(ref [_]self) -> _StringIter[__lifetime_of(self), False]:
+    fn __reversed__(
+        ref [_]self,
+    ) -> _StringSliceIter[__lifetime_of(self), False]:
         """Iterate backwards over the string, returning immutable references.
 
         Returns:
             A reversed iterator of references to the string elements.
         """
-        return _StringIter[__lifetime_of(self), forward=False](
+        return _StringSliceIter[__lifetime_of(self), forward=False](
             unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
         )
 
@@ -1400,19 +1333,6 @@ struct String(
     # Methods
     # ===------------------------------------------------------------------=== #
 
-    fn byte_length(self) -> Int:
-        """Returns the string byte length without null terminator.
-
-        Returns:
-            The string byte length without null terminator.
-        """
-        # Avoid returning -1 if the buffer is not initialized
-        if not self.unsafe_ptr():
-            return 0
-
-        # The negative 1 is to account for the terminator.
-        return len(self._buffer) - 1
-
     fn format_to(self, inout writer: Formatter):
         """
         Formats this string to the provided formatter.
@@ -1572,19 +1492,17 @@ struct String(
 
     @always_inline
     fn as_bytes_slice(ref [_]self) -> Span[UInt8, __lifetime_of(self)]:
-        """
-        Returns a contiguous slice of the bytes owned by this string.
-
-        This does not include the trailing null terminator.
-
+        """Returns a contiguous slice of the bytes owned by this string.
         Returns:
             A contiguous slice pointing to the bytes owned by this string.
+
+        Notes:
+            This does not include the trailing null terminator.
         """
 
+        # Does NOT include the NUL terminator.
         return Span[UInt8, __lifetime_of(self)](
-            unsafe_ptr=self._buffer.unsafe_ptr(),
-            # Does NOT include the NUL terminator.
-            len=self._byte_length(),
+            unsafe_ptr=self._buffer.unsafe_ptr(), len=self.byte_length()
         )
 
     @always_inline
@@ -1599,21 +1517,16 @@ struct String(
         #   guaranteed to be valid.
         return StringSlice(unsafe_from_utf8=self.as_bytes_slice())
 
-    fn _byte_length(self) -> Int:
+    fn byte_length(self) -> Int:
         """Get the string length in bytes.
 
-        This does not include the trailing null terminator in the count.
-
         Returns:
             The length of this string in bytes, excluding null terminator.
-        """
-
-        var buffer_len = len(self._buffer)
 
-        if buffer_len > 0:
-            return buffer_len - 1
-        else:
-            return buffer_len
+        Notes:
+            This does not include the trailing null terminator in the count.
+        """
+        return max(len(self._buffer) - 1, 0)
 
     fn _steal_ptr(inout self) -> UnsafePointer[UInt8]:
         """Transfer ownership of pointer to the underlying memory.
@@ -1711,14 +1624,7 @@ struct String(
             True if the whole String is made up of whitespace characters
                 listed above, otherwise False.
         """
-
-        if self.byte_length() == 0:
-            return False
-
-        for s in self:
-            if not s.isspace():
-                return False
-        return True
+        return self.as_string_slice().isspace()
 
     fn split(self, sep: String, maxsplit: Int = -1) raises -> List[String]:
         """Split the string by a separator.
diff --git a/stdlib/src/builtin/string_literal.mojo b/stdlib/src/builtin/string_literal.mojo
index c3d0ab1128..3f0f587a2d 100644
--- a/stdlib/src/builtin/string_literal.mojo
+++ b/stdlib/src/builtin/string_literal.mojo
@@ -191,7 +191,7 @@ struct StringLiteral(
         # TODO(MSTDL-160):
         #   Properly count Unicode codepoints instead of returning this length
         #   in bytes.
-        return self._byte_length()
+        return self.byte_length()
 
     @always_inline("nodebug")
     fn __bool__(self) -> Bool:
@@ -262,11 +262,14 @@ struct StringLiteral(
     # ===-------------------------------------------------------------------===#
 
     @always_inline
-    fn _byte_length(self) -> Int:
+    fn byte_length(self) -> Int:
         """Get the string length in bytes.
 
         Returns:
             The length of this StringLiteral in bytes.
+
+        Notes:
+            This does not include the trailing null terminator in the count.
         """
         return __mlir_op.`pop.string.size`(self.value)
 
@@ -333,7 +336,7 @@ struct StringLiteral(
 
         return Span[UInt8, ImmutableStaticLifetime](
             unsafe_ptr=ptr,
-            len=self._byte_length(),
+            len=self.byte_length(),
         )
 
     fn format_to(self, inout writer: Formatter):
diff --git a/stdlib/src/utils/string_slice.mojo b/stdlib/src/utils/string_slice.mojo
index 0357fa0ac9..90ef29eceb 100644
--- a/stdlib/src/utils/string_slice.mojo
+++ b/stdlib/src/utils/string_slice.mojo
@@ -21,12 +21,82 @@ from utils import StringSlice
 """
 
 from utils import Span
-from builtin.string import _isspace
+from builtin.string import _isspace, _utf8_byte_type
 
 alias StaticString = StringSlice[ImmutableStaticLifetime]
 """An immutable static string slice."""
 
 
+@value
+struct _StringSliceIter[
+    is_mutable: Bool, //,
+    lifetime: AnyLifetime[is_mutable].type,
+    forward: Bool = True,
+]:
+    """Iterator for String.
+
+    Parameters:
+        is_mutable: Whether the slice is mutable.
+        lifetime: The lifetime of the underlying string data.
+        forward: The iteration direction. `False` is backwards.
+    """
+
+    var index: Int
+    var continuation_bytes: Int
+    var ptr: UnsafePointer[UInt8]
+    var length: Int
+
+    fn __init__(
+        inout self, *, unsafe_pointer: UnsafePointer[UInt8], length: Int
+    ):
+        self.index = 0 if forward else length
+        self.ptr = unsafe_pointer
+        self.length = length
+        self.continuation_bytes = 0
+        for i in range(length):
+            if _utf8_byte_type(unsafe_pointer[i]) == 1:
+                self.continuation_bytes += 1
+
+    fn __iter__(self) -> Self:
+        return self
+
+    fn __next__(inout self) -> StringSlice[lifetime]:
+        @parameter
+        if forward:
+            var byte_len = 1
+            if self.continuation_bytes > 0:
+                var byte_type = _utf8_byte_type(self.ptr[self.index])
+                if byte_type != 0:
+                    byte_len = int(byte_type)
+                    self.continuation_bytes -= byte_len - 1
+            self.index += byte_len
+            return StringSlice[lifetime](
+                unsafe_from_utf8_ptr=self.ptr + (self.index - byte_len),
+                len=byte_len,
+            )
+        else:
+            var byte_len = 1
+            if self.continuation_bytes > 0:
+                var byte_type = _utf8_byte_type(self.ptr[self.index - 1])
+                if byte_type != 0:
+                    while byte_type == 1:
+                        byte_len += 1
+                        var b = self.ptr[self.index - byte_len]
+                        byte_type = _utf8_byte_type(b)
+                    self.continuation_bytes -= byte_len - 1
+            self.index -= byte_len
+            return StringSlice[lifetime](
+                unsafe_from_utf8_ptr=self.ptr + self.index, len=byte_len
+            )
+
+    fn __len__(self) -> Int:
+        @parameter
+        if forward:
+            return self.length - self.index - self.continuation_bytes
+        else:
+            return self.index - self.continuation_bytes
+
+
 struct StringSlice[
     is_mutable: Bool, //,
     lifetime: AnyLifetime[is_mutable].type,
@@ -69,8 +139,7 @@ struct StringSlice[
         # FIXME(MSTDL-160):
         #   Ensure StringLiteral _actually_ always uses UTF-8 encoding.
         self = StringSlice[lifetime](
-            unsafe_from_utf8_ptr=literal.unsafe_ptr(),
-            len=literal._byte_length(),
+            unsafe_from_utf8_ptr=literal.unsafe_ptr(), len=literal.byte_length()
         )
 
     @always_inline
@@ -156,9 +225,13 @@ struct StringSlice[
         Returns:
             The length in Unicode codepoints.
         """
-        # FIXME(MSTDL-160):
-        #   Actually perform UTF-8 decoding here to count the codepoints.
-        return len(self._slice)
+        var unicode_length = self.byte_length()
+
+        for i in range(unicode_length):
+            if _utf8_byte_type(self._slice[i]) == 1:
+                unicode_length -= 1
+
+        return unicode_length
 
     fn format_to(self, inout writer: Formatter):
         """
@@ -258,14 +331,35 @@ struct StringSlice[
         """
         return not self == rhs
 
+    fn __iter__(ref [_]self) -> _StringSliceIter[__lifetime_of(self)]:
+        """Iterate over elements of the string, returning immutable references.
+
+        Returns:
+            An iterator of references to the string elements.
+        """
+        return _StringSliceIter[__lifetime_of(self)](
+            unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
+        )
+
+    fn __reversed__(
+        ref [_]self,
+    ) -> _StringSliceIter[__lifetime_of(self), False]:
+        """Iterate backwards over the string, returning immutable references.
+
+        Returns:
+            A reversed iterator of references to the string elements.
+        """
+        return _StringSliceIter[__lifetime_of(self), forward=False](
+            unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
+        )
+
     # ===------------------------------------------------------------------===#
     # Methods
     # ===------------------------------------------------------------------===#
 
     @always_inline
     fn as_bytes_slice(self) -> Span[UInt8, lifetime]:
-        """
-        Get the sequence of encoded bytes as a slice of the underlying string.
+        """Get the sequence of encoded bytes as a slice of the underlying string.
 
         Returns:
             A slice containing the underlying sequence of encoded bytes.
@@ -274,8 +368,7 @@ struct StringSlice[
 
     @always_inline
     fn unsafe_ptr(self) -> UnsafePointer[UInt8]:
-        """
-        Gets a pointer to the first element of this string slice.
+        """Gets a pointer to the first element of this string slice.
 
         Returns:
             A pointer pointing at the first element of this string slice.
@@ -284,9 +377,8 @@ struct StringSlice[
         return self._slice.unsafe_ptr()
 
     @always_inline
-    fn _byte_length(self) -> Int:
-        """
-        Get the length of this string slice in bytes.
+    fn byte_length(self) -> Int:
+        """Get the length of this string slice in bytes.
 
         Returns:
             The length of this string slice in bytes.
@@ -295,8 +387,7 @@ struct StringSlice[
         return len(self.as_bytes_slice())
 
     fn _strref_dangerous(self) -> StringRef:
-        """
-        Returns an inner pointer to the string as a StringRef.
+        """Returns an inner pointer to the string as a StringRef.
 
         Safety:
             This functionality is extremely dangerous because Mojo eagerly
@@ -304,27 +395,30 @@ struct StringSlice[
             _strref_keepalive() method to keep the underlying string alive long
             enough.
         """
-        return StringRef(self.unsafe_ptr(), self._byte_length())
+        return StringRef(self.unsafe_ptr(), self.byte_length())
 
     fn _strref_keepalive(self):
-        """
-        A no-op that keeps `self` alive through the call.  This
+        """A no-op that keeps `self` alive through the call.  This
         can be carefully used with `_strref_dangerous()` to wield inner pointers
         without the string getting deallocated early.
         """
         pass
 
     fn isspace(self) -> Bool:
-        """Determines whether the given StringSlice is a python
-        whitespace String. This corresponds to Python's
+        """Determines whether every character in the given StringSlice is a
+        python whitespace String. This corresponds to Python's
         [universal separators](
             https://docs.python.org/3/library/stdtypes.html#str.splitlines)
         `" \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029"`.
 
         Returns:
-            True if the String is one of the whitespace characters
+            True if the whole StringSlice is made up of whitespace characters
                 listed above, otherwise False.
         """
+
+        if self.byte_length() == 0:
+            return False
+
         # TODO add line and paragraph separator as stringliteral
         # once unicode escape secuences are accepted
         var next_line = List[UInt8](0xC2, 0x85)
@@ -342,16 +436,19 @@ struct StringSlice[
             var ptr2 = DTypePointer(item2)
             return memcmp(ptr1, ptr2, amnt) == 0
 
-        var no_null_len = len(self)
-        var ptr = self.unsafe_ptr()
-        if no_null_len == 1 and _isspace(ptr[0]):
-            return True
-        elif no_null_len == 2 and _compare(ptr, next_line.unsafe_ptr(), 2):
-            return True
-        elif no_null_len == 3 and (
-            _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
-            or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
-        ):
-            return True
+        for s in self:
+            var no_null_len = s.byte_length()
+            var ptr = s.unsafe_ptr()
+            if no_null_len == 1 and _isspace(ptr[0]):
+                continue
+            elif no_null_len == 2 and _compare(ptr, next_line.unsafe_ptr(), 2):
+                continue
+            elif no_null_len == 3 and (
+                _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
+                or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
+            ):
+                continue
+            else:
+                return False
         _ = next_line, unicode_line_sep, unicode_paragraph_sep
-        return False
+        return True

From 056902551f55affbdf183643631cf98ca249ac74 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 14:58:09 -0400
Subject: [PATCH 16/27] fix byte_length usage

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/io.mojo             |  4 ++--
 stdlib/src/builtin/string.mojo         | 12 +++++++++---
 stdlib/src/builtin/string_literal.mojo |  2 +-
 stdlib/src/sys/ffi.mojo                |  2 +-
 stdlib/src/utils/inline_string.mojo    | 10 +++++-----
 5 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/stdlib/src/builtin/io.mojo b/stdlib/src/builtin/io.mojo
index 788253ad03..a6f445a6ff 100644
--- a/stdlib/src/builtin/io.mojo
+++ b/stdlib/src/builtin/io.mojo
@@ -320,7 +320,7 @@ fn _put(x: DType, file: FileDescriptor = stdout):
 @no_inline
 fn _put(x: StringSlice, file: FileDescriptor = stdout):
     # Avoid printing "(null)" for an empty/default constructed `String`
-    var str_len = x._byte_length()
+    var str_len = x.byte_length()
 
     if not str_len:
         return
@@ -341,7 +341,7 @@ fn _put(x: StringSlice, file: FileDescriptor = stdout):
 
         # The string can be printed, so that's fine.
         if str_len < MAX_STR_LEN:
-            _printf["%.*s"](x._byte_length(), x.unsafe_ptr(), file=file)
+            _printf["%.*s"](x.byte_length(), x.unsafe_ptr(), file=file)
             return
 
         # The string is large, then we need to chunk it.
diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index d65c95957a..7b7e6e571e 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1213,11 +1213,17 @@ struct String(
         """
         return self.byte_length() > 0
 
+    @deprecated(
+        "string length, in bytes (for now) PREFER: String.byte_length(), a"
+        " future version will make this method return Unicode codepoints."
+    )
     fn __len__(self) -> Int:
-        """Gets the string length, in bytes.
+        """Gets the string length, in bytes (for now) PREFER:
+        String.byte_length(), a future version will make this method return
+        Unicode codepoints.
 
         Returns:
-            The string length, in bytes.
+            The string length, in bytes (for now).
         """
         var unicode_length = self.byte_length()
 
@@ -2136,7 +2142,7 @@ struct String(
             return _is_ascii_uppercase(c) or _is_ascii_lowercase(c)
 
         for c in self:
-            debug_assert(c._byte_length() == 1, "only implemented for ASCII")
+            debug_assert(c.byte_length() == 1, "only implemented for ASCII")
             if is_ascii_cased(ord(c)):
 
                 @parameter
diff --git a/stdlib/src/builtin/string_literal.mojo b/stdlib/src/builtin/string_literal.mojo
index 1085db984b..0dcdb0d4c9 100644
--- a/stdlib/src/builtin/string_literal.mojo
+++ b/stdlib/src/builtin/string_literal.mojo
@@ -221,7 +221,7 @@ struct StringLiteral(
             A new string.
         """
         var string = String()
-        var length = self._byte_length()
+        var length = self.byte_length()
         var buffer = String._buffer_type()
         var new_capacity = length + 1
         buffer._realloc(new_capacity)
diff --git a/stdlib/src/sys/ffi.mojo b/stdlib/src/sys/ffi.mojo
index 3c65863f0d..fd2d3b8d50 100644
--- a/stdlib/src/sys/ffi.mojo
+++ b/stdlib/src/sys/ffi.mojo
@@ -231,7 +231,7 @@ fn _get_global[
 fn _get_global_or_null[name: StringLiteral]() -> UnsafePointer[NoneType]:
     return external_call[
         "KGEN_CompilerRT_GetGlobalOrNull", UnsafePointer[NoneType]
-    ](name.unsafe_ptr(), name._byte_length())
+    ](name.unsafe_ptr(), name.byte_length())
 
 
 @always_inline
diff --git a/stdlib/src/utils/inline_string.mojo b/stdlib/src/utils/inline_string.mojo
index d5481cd223..db93624194 100644
--- a/stdlib/src/utils/inline_string.mojo
+++ b/stdlib/src/utils/inline_string.mojo
@@ -123,7 +123,7 @@ struct InlineString(Sized, Stringable, CollectionElement, CollectionElementNew):
         Args:
             str_slice: The string to append.
         """
-        var total_len = len(self) + str_slice._byte_length()
+        var total_len = len(self) + str_slice.byte_length()
 
         # NOTE: Not guaranteed that we're in the small layout even if our
         #       length is shorter than the small capacity.
@@ -157,7 +157,7 @@ struct InlineString(Sized, Stringable, CollectionElement, CollectionElementNew):
             memcpy(
                 dest=buffer.unsafe_ptr() + len(self),
                 src=str_slice.unsafe_ptr(),
-                count=str_slice._byte_length(),
+                count=str_slice.byte_length(),
             )
 
             # Record that we've initialized `total_len` count of elements
@@ -441,14 +441,14 @@ struct _FixedString[CAP: Int](
         inout self,
         str_slice: StringSlice[_],
     ) -> Optional[Error]:
-        var total_len = len(self) + str_slice._byte_length()
+        var total_len = len(self) + str_slice.byte_length()
 
         # Ensure there is sufficient capacity to append `str_slice`
         if total_len > CAP:
             return Optional(
                 Error(
                     "Insufficient capacity to append len="
-                    + str(str_slice._byte_length())
+                    + str(str_slice.byte_length())
                     + " string to len="
                     + str(len(self))
                     + " FixedString with capacity="
@@ -460,7 +460,7 @@ struct _FixedString[CAP: Int](
         memcpy(
             dest=self.buffer.unsafe_ptr() + len(self),
             src=str_slice.unsafe_ptr(),
-            count=str_slice._byte_length(),
+            count=str_slice.byte_length(),
         )
 
         self.size = total_len

From 1a91e5f51e85ddf2234daf4124afe9f7acb66fd4 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 15:12:24 -0400
Subject: [PATCH 17/27] fix byte_length usage in tests

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo       | 4 ++--
 stdlib/test/builtin/test_string.mojo | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 7b7e6e571e..71123005a8 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1662,7 +1662,7 @@ struct String(
             for s in self[lhs:]:
                 if not s.isspace():
                     break
-                lhs += len(s)
+                lhs += s.byte_length()
             # if it went until the end of the String, then
             # it should be sliced up until the original
             # start of the whitespace which was already appended
@@ -1676,7 +1676,7 @@ struct String(
             for s in self[lhs + 1 :]:
                 if s.isspace():
                     break
-                rhs += len(s)
+                rhs += s.byte_length()
 
             if maxsplit > -1:
                 if items == maxsplit:
diff --git a/stdlib/test/builtin/test_string.mojo b/stdlib/test/builtin/test_string.mojo
index 6932cbd39c..2007ee6f75 100644
--- a/stdlib/test/builtin/test_string.mojo
+++ b/stdlib/test/builtin/test_string.mojo
@@ -1274,7 +1274,7 @@ def test_string_iter():
         var utf8_sequence_len = 0
         var byte_idx = 0
         for v in item:
-            var byte_len = len(v)
+            var byte_len = v.byte_length()
             assert_equal(item[byte_idx : byte_idx + byte_len], v)
             byte_idx += byte_len
             utf8_sequence_len += 1

From 01e54502cc241644b004d4805bdf8feaa9e3d245 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 15:17:58 -0400
Subject: [PATCH 18/27] remove deprecation warning

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 71123005a8..6e0f143652 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1213,10 +1213,6 @@ struct String(
         """
         return self.byte_length() > 0
 
-    @deprecated(
-        "string length, in bytes (for now) PREFER: String.byte_length(), a"
-        " future version will make this method return Unicode codepoints."
-    )
     fn __len__(self) -> Int:
         """Gets the string length, in bytes (for now) PREFER:
         String.byte_length(), a future version will make this method return

From cbebc06d8678e03b7ffec4c96a5474104eb79bd1 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 15:27:18 -0400
Subject: [PATCH 19/27] add deprecation warning, fix docstring

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 6e0f143652..4cc4068674 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1213,6 +1213,10 @@ struct String(
         """
         return self.byte_length() > 0
 
+    @deprecated(
+        "A future version will make this method return Unicode codepoints "
+        "PREFER: String.byte_length()"
+    )
     fn __len__(self) -> Int:
         """Gets the string length, in bytes (for now) PREFER:
         String.byte_length(), a future version will make this method return
@@ -1434,6 +1438,7 @@ struct String(
     @always_inline
     fn as_bytes_slice(ref [_]self) -> Span[UInt8, __lifetime_of(self)]:
         """Returns a contiguous slice of the bytes owned by this string.
+
         Returns:
             A contiguous slice pointing to the bytes owned by this string.
 

From 05d33ac96757b5a583603c678cf6b140f77cae5d Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 16:14:55 -0400
Subject: [PATCH 20/27] fix uses of String.__len__

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/base64/base64.mojo     |  8 +--
 stdlib/src/builtin/error.mojo     |  2 +-
 stdlib/src/builtin/file.mojo      |  8 +--
 stdlib/src/builtin/string.mojo    | 96 ++++++++++++++++++++-----------
 stdlib/src/tempfile/tempfile.mojo |  4 +-
 5 files changed, 75 insertions(+), 43 deletions(-)

diff --git a/stdlib/src/base64/base64.mojo b/stdlib/src/base64/base64.mojo
index d042c27e21..62e021d38f 100644
--- a/stdlib/src/base64/base64.mojo
+++ b/stdlib/src/base64/base64.mojo
@@ -72,7 +72,7 @@ fn b64encode(str: String) -> String:
     alias lookup = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
     var b64chars = lookup.unsafe_ptr()
 
-    var length = len(str)
+    var length = str.byte_length()
     var out = String._buffer_type(capacity=length + 1)
 
     @parameter
@@ -121,7 +121,7 @@ fn b64decode(str: String) -> String:
     Returns:
       The decoded string.
     """
-    var n = len(str)
+    var n = str.byte_length()
     debug_assert(n % 4 == 0, "Input length must be divisible by 4")
 
     var p = String._buffer_type(capacity=n + 1)
@@ -170,7 +170,7 @@ fn b16encode(str: String) -> String:
     alias lookup = "0123456789ABCDEF"
     var b16chars = lookup.unsafe_ptr()
 
-    var length = len(str)
+    var length = str.byte_length()
     var out = List[UInt8](capacity=length * 2 + 1)
 
     @parameter
@@ -221,7 +221,7 @@ fn b16decode(str: String) -> String:
 
         return -1
 
-    var n = len(str)
+    var n = str.byte_length()
     debug_assert(n % 2 == 0, "Input length must be divisible by 2")
 
     var p = List[UInt8](capacity=n // 2 + 1)
diff --git a/stdlib/src/builtin/error.mojo b/stdlib/src/builtin/error.mojo
index 59b05566bf..ba8fd223b5 100644
--- a/stdlib/src/builtin/error.mojo
+++ b/stdlib/src/builtin/error.mojo
@@ -80,7 +80,7 @@ struct Error(
         Returns:
             The constructed Error object.
         """
-        var length = len(src)
+        var length = src.byte_length()
         var dest = UnsafePointer[UInt8].alloc(length + 1)
         memcpy(
             dest=dest,
diff --git a/stdlib/src/builtin/file.mojo b/stdlib/src/builtin/file.mojo
index 143ba314df..faa7e73653 100644
--- a/stdlib/src/builtin/file.mojo
+++ b/stdlib/src/builtin/file.mojo
@@ -239,7 +239,7 @@ struct FileHandle:
         var bytes = file.read(ptr, 8)
         print("bytes read", bytes)
 
-        var first_element = ptr.load(0)
+        var first_element = ptr[0]
         print(first_element)
 
         # Skip 2 elements
@@ -374,7 +374,7 @@ struct FileHandle:
         ```mojo
         import os
         var f = open("/tmp/example.txt", "r")
-        f.seek(os.SEEK_CUR, 32)
+        _ = f.seek(32, os.SEEK_CUR)
         ```
 
         Start from 32 bytes from the end of the file:
@@ -382,7 +382,7 @@ struct FileHandle:
         ```mojo
         import os
         var f = open("/tmp/example.txt", "r")
-        f.seek(os.SEEK_END, -32)
+        _ = f.seek(-32, os.SEEK_END)
         ```
         .
         """
@@ -409,7 +409,7 @@ struct FileHandle:
         Args:
           data: The data to write to the file.
         """
-        self._write(data.unsafe_ptr(), len(data))
+        self._write(data.unsafe_ptr(), data.byte_length())
 
     fn write(self, data: Span[UInt8, _]) raises:
         """Write a borrowed sequence of data to the file.
diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 4cc4068674..d630cbdd33 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1006,6 +1006,17 @@ struct String(
         Returns:
             A new string containing the character at the specified position.
         """
+        # FIXME: this should work with unicode codepoints, but this doesn't yet
+        # work at compile time because of issue #933
+        # var i = 0
+        # var buf = Self._buffer_type(capacity=5)
+        # for s in self:
+        #     if i != idx:
+        #         continue
+        #     for j in range(s.byte_length()):
+        #         buf[j] = self._buffer[i + j]
+        # buf.append(0)
+        # buf.resize(len(buf))
         var normalized_idx = normalize_index["String"](idx, self)
         var buf = Self._buffer_type(capacity=1)
         buf.append(self._buffer[normalized_idx])
@@ -1024,13 +1035,27 @@ struct String(
         var start: Int
         var end: Int
         var step: Int
-        start, end, step = span.indices(len(self))
+        # FIXME: this should work with unicode codepoints, but this doesn't yet
+        # work at compile time because of issue #933
+        # var idx = 0
+        # var start = span.start.value() if span.start else 0
+        # var end = span.end.value() if span.end else 0
+        # var buffer = Self._buffer_type()
+        # for s in self:
+        #     var amnt_bytes = s.byte_length()
+        #     if not (start <= idx < end) or idx % span.step != 0:
+        #         idx += amnt_bytes
+        #         continue
+        #     for i in range(amnt_bytes):
+        #         buffer[idx + i] = self._buffer[idx + i]
+        # buffer.append(0)
+        # buffer.resize(len(buffer))
+        # return Self(buffer^)
+
+        start, end, step = span.indices(self.byte_length())
         var r = range(start, end, step)
         if step == 1:
-            return StringRef(
-                self._buffer.data + start,
-                len(r),
-            )
+            return StringRef(self._buffer.data + start, len(r))
 
         var buffer = Self._buffer_type()
         var result_len = len(r)
@@ -1127,8 +1152,8 @@ struct String(
             return other
         if not other:
             return self
-        var self_len = len(self)
-        var other_len = len(other)
+        var self_len = self.byte_length()
+        var other_len = other.byte_length()
         var total_len = self_len + other_len
         var buffer = Self._buffer_type()
         buffer.resize(total_len + 1, 0)
@@ -1167,8 +1192,8 @@ struct String(
             return
         if not other:
             return
-        var self_len = len(self)
-        var other_len = len(other)
+        var self_len = self.byte_length()
+        var other_len = other.byte_length()
         var total_len = self_len + other_len
         self._buffer.resize(total_len + 1, 0)
         # Copy the data alongside the terminator.
@@ -1387,7 +1412,7 @@ struct String(
         strings.  Using this requires the use of the _strref_keepalive() method
         to keep the underlying string alive long enough.
         """
-        return StringRef(self.unsafe_ptr(), len(self))
+        return StringRef(self.unsafe_ptr(), self.byte_length())
 
     fn _strref_keepalive(self):
         """
@@ -1512,7 +1537,7 @@ struct String(
                 break
             res += 1
 
-            offset = pos + len(substr)
+            offset = pos + substr.byte_length()
 
         return res
 
@@ -1700,7 +1725,7 @@ struct String(
             A List of Strings containing the input split by line boundaries.
         """
         var output = List[String]()
-        var length = len(self)
+        var length = self.byte_length()
         var current_offset = 0
 
         while current_offset < length:
@@ -1751,9 +1776,9 @@ struct String(
         var self_ptr = self.unsafe_ptr()
         var new_ptr = new.unsafe_ptr()
 
-        var self_len = len(self)
-        var old_len = len(old)
-        var new_len = len(new)
+        var self_len = self.byte_length()
+        var old_len = old.byte_length()
+        var new_len = new.byte_length()
 
         var res = List[UInt8]()
         res.reserve(self_len + (old_len - new_len) * occurrences + 1)
@@ -1818,7 +1843,7 @@ struct String(
             A copy of the string with no trailing characters.
         """
 
-        var r_idx = len(self)
+        var r_idx = self.byte_length()
         while r_idx > 0 and self[r_idx - 1] in chars:
             r_idx -= 1
 
@@ -1830,7 +1855,7 @@ struct String(
         Returns:
             A copy of the string with no trailing whitespaces.
         """
-        var r_idx = len(self)
+        var r_idx = self.byte_length()
         # TODO (#933): should use this once llvm intrinsics can be used at comp time
         # for s in self.__reversed__():
         #     if not s.isspace():
@@ -1851,7 +1876,7 @@ struct String(
         """
 
         var l_idx = 0
-        while l_idx < len(self) and self[l_idx] in chars:
+        while l_idx < self.byte_length() and self[l_idx] in chars:
             l_idx += 1
 
         return self[l_idx:]
@@ -1868,7 +1893,9 @@ struct String(
         #     if not s.isspace():
         #         break
         #     l_idx += 1
-        while l_idx < len(self) and _isspace(self._buffer.unsafe_get(l_idx)):
+        while l_idx < self.byte_length() and _isspace(
+            self._buffer.unsafe_get(l_idx)
+        ):
             l_idx += 1
         return self[l_idx:]
 
@@ -1886,9 +1913,9 @@ struct String(
         var res = List[UInt8]()
         var val_ptr = val.unsafe_ptr()
         var self_ptr = self.unsafe_ptr()
-        res.reserve(len(val) * len(self) + 1)
-        for i in range(len(self)):
-            for j in range(len(val)):
+        res.reserve(val.byte_length() * self.byte_length() + 1)
+        for i in range(self.byte_length()):
+            for j in range(val.byte_length()):
                 res.append(val_ptr[j])
             res.append(self_ptr[i])
         res.append(0)
@@ -1925,7 +1952,7 @@ struct String(
 
         var char_ptr = copy.unsafe_ptr()
 
-        for i in range(len(self)):
+        for i in range(self.byte_length()):
             var char: UInt8 = char_ptr[i]
             if check_case(char):
                 var lower = _toggle_ascii_case(char)
@@ -1947,7 +1974,7 @@ struct String(
         """
         if end == -1:
             return StringRef(
-                self.unsafe_ptr() + start, len(self) - start
+                self.unsafe_ptr() + start, self.byte_length() - start
             ).startswith(prefix._strref_dangerous())
 
         return StringRef(self.unsafe_ptr() + start, end - start).startswith(
@@ -1968,7 +1995,7 @@ struct String(
         """
         if end == -1:
             return StringRef(
-                self.unsafe_ptr() + start, len(self) - start
+                self.unsafe_ptr() + start, self.byte_length() - start
             ).endswith(suffix._strref_dangerous())
 
         return StringRef(self.unsafe_ptr() + start, end - start).endswith(
@@ -1995,7 +2022,7 @@ struct String(
             or a copy of the original string otherwise.
         """
         if self.startswith(prefix):
-            return self[len(prefix) :]
+            return self[prefix.byte_length() :]
         return self
 
     fn removesuffix(self, suffix: String, /) -> String:
@@ -2018,7 +2045,7 @@ struct String(
             or a copy of the original string otherwise.
         """
         if suffix and self.endswith(suffix):
-            return self[: -len(suffix)]
+            return self[: -suffix.byte_length()]
         return self
 
     fn __int__(self) raises -> Int:
@@ -2044,7 +2071,7 @@ struct String(
         """
         if n <= 0:
             return ""
-        var len_self = len(self)
+        var len_self = self.byte_length()
         var count = len_self * n + 1
         var buf = Self._buffer_type(capacity=count)
         buf.resize(count, 0)
@@ -2097,7 +2124,10 @@ struct String(
 
         var current_automatic_arg_index = 0
         for e in entries:
-            debug_assert(pos_in_self < len(self), "pos_in_self >= len(self)")
+            debug_assert(
+                pos_in_self < self.byte_length(),
+                "pos_in_self >= self.byte_length()",
+            )
             res += self[pos_in_self : e[].first_curly]
 
             if e[].is_escaped_brace():
@@ -2120,8 +2150,8 @@ struct String(
 
             pos_in_self = e[].last_curly + 1
 
-        if pos_in_self < len(self):
-            res += self[pos_in_self : len(self)]
+        if pos_in_self < self.byte_length():
+            res += self[pos_in_self : self.byte_length()]
 
         return res^
 
@@ -2396,7 +2426,7 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
         var entries = List[Self]()
         var start = Optional[Int](None)
         var skip_next = False
-        for i in range(len(format_src)):
+        for i in range(format_src.byte_length()):
             if skip_next:
                 skip_next = False
                 continue
@@ -2453,7 +2483,7 @@ struct _FormatCurlyEntry(CollectionElement, CollectionElementNew):
                     start = None
                 else:
                     # python escapes double curlies
-                    if (i + 1) < len(format_src):
+                    if (i + 1) < format_src.byte_length():
                         if format_src[i + 1] == "}":
                             var curren_entry = Self(
                                 first_curly=i, last_curly=i + 1, field=True
diff --git a/stdlib/src/tempfile/tempfile.mojo b/stdlib/src/tempfile/tempfile.mojo
index 9864ba1b5f..bd3c9d69f2 100644
--- a/stdlib/src/tempfile/tempfile.mojo
+++ b/stdlib/src/tempfile/tempfile.mojo
@@ -31,7 +31,9 @@ fn _get_random_name(size: Int = 8) -> String:
     alias characters = String("abcdefghijklmnopqrstuvwxyz0123456789_")
     var name_list = List[UInt8](capacity=size + 1)
     for _ in range(size):
-        var rand_index = int(random.random_ui64(0, len(characters) - 1))
+        var rand_index = int(
+            random.random_ui64(0, characters.byte_length() - 1)
+        )
         name_list.append(ord(characters[rand_index]))
     name_list.append(0)
     return String(name_list^)

From b10fbc128d7eb8515e61ea775addd6e6a14d7ad3 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 16:27:49 -0400
Subject: [PATCH 21/27] fix uses of String.__len__ and remove deprecation
 warning

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 4 ----
 stdlib/src/pathlib/path.mojo   | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index d630cbdd33..909e8c6475 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1238,10 +1238,6 @@ struct String(
         """
         return self.byte_length() > 0
 
-    @deprecated(
-        "A future version will make this method return Unicode codepoints "
-        "PREFER: String.byte_length()"
-    )
     fn __len__(self) -> Int:
         """Gets the string length, in bytes (for now) PREFER:
         String.byte_length(), a future version will make this method return
diff --git a/stdlib/src/pathlib/path.mojo b/stdlib/src/pathlib/path.mojo
index 9d67ddf916..43976519c7 100644
--- a/stdlib/src/pathlib/path.mojo
+++ b/stdlib/src/pathlib/path.mojo
@@ -162,7 +162,7 @@ struct Path(
         Returns:
             True if the path length is greater than zero, and False otherwise.
         """
-        return len(self.path) > 0
+        return self.path.byte_length() > 0
 
     fn format_to(self, inout writer: Formatter):
         """

From d579222f19ce89dc2886e83899998049a1e62f38 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 17:48:18 -0400
Subject: [PATCH 22/27] add _byte_length with deprecation warning

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo         | 14 ++++++++++++++
 stdlib/src/builtin/string_literal.mojo | 13 +++++++++++++
 stdlib/src/utils/string_slice.mojo     | 11 +++++++++++
 3 files changed, 38 insertions(+)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 909e8c6475..d96c7b5a11 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1484,6 +1484,7 @@ struct String(
         #   guaranteed to be valid.
         return StringSlice(unsafe_from_utf8=self.as_bytes_slice())
 
+    @always_inline
     fn byte_length(self) -> Int:
         """Get the string length in bytes.
 
@@ -1495,6 +1496,19 @@ struct String(
         """
         return max(len(self._buffer) - 1, 0)
 
+    @always_inline
+    @deprecated("use byte_length() instead")
+    fn _byte_length(self) -> Int:
+        """Get the string length in bytes.
+
+        Returns:
+            The length of this string in bytes, excluding null terminator.
+
+        Notes:
+            This does not include the trailing null terminator in the count.
+        """
+        return max(len(self._buffer) - 1, 0)
+
     fn _steal_ptr(inout self) -> UnsafePointer[UInt8]:
         """Transfer ownership of pointer to the underlying memory.
         The caller is responsible for freeing up the memory.
diff --git a/stdlib/src/builtin/string_literal.mojo b/stdlib/src/builtin/string_literal.mojo
index 0dcdb0d4c9..650f075b2e 100644
--- a/stdlib/src/builtin/string_literal.mojo
+++ b/stdlib/src/builtin/string_literal.mojo
@@ -276,6 +276,19 @@ struct StringLiteral(
         """
         return __mlir_op.`pop.string.size`(self.value)
 
+    @always_inline
+    @deprecated("use byte_length() instead")
+    fn _byte_length(self) -> Int:
+        """Get the string length in bytes.
+
+        Returns:
+            The length of this StringLiteral in bytes.
+
+        Notes:
+            This does not include the trailing null terminator in the count.
+        """
+        return __mlir_op.`pop.string.size`(self.value)
+
     @always_inline("nodebug")
     fn unsafe_ptr(self) -> UnsafePointer[UInt8]:
         """Get raw pointer to the underlying data.
diff --git a/stdlib/src/utils/string_slice.mojo b/stdlib/src/utils/string_slice.mojo
index 90ef29eceb..d6b9663786 100644
--- a/stdlib/src/utils/string_slice.mojo
+++ b/stdlib/src/utils/string_slice.mojo
@@ -386,6 +386,17 @@ struct StringSlice[
 
         return len(self.as_bytes_slice())
 
+    @always_inline
+    @deprecated("use byte_length() instead")
+    fn _byte_length(self) -> Int:
+        """Get the length of this string slice in bytes.
+
+        Returns:
+            The length of this string slice in bytes.
+        """
+
+        return len(self.as_bytes_slice())
+
     fn _strref_dangerous(self) -> StringRef:
         """Returns an inner pointer to the string as a StringRef.
 

From e51df82a0da9e2f92ffaf502a858b49925265fc3 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 21:32:56 -0400
Subject: [PATCH 23/27] split isspace and iter into another PR

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo     | 124 ++++++++++++++++++++++---
 stdlib/src/utils/string_slice.mojo | 141 -----------------------------
 2 files changed, 113 insertions(+), 152 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index d96c7b5a11..9d97fd5268 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -720,6 +720,76 @@ fn _utf8_byte_type(b: UInt8) -> UInt8:
     return countl_zero(~(b & 0b1111_0000))
 
 
+@value
+struct _StringIter[
+    is_mutable: Bool, //,
+    lifetime: AnyLifetime[is_mutable].type,
+    forward: Bool = True,
+]:
+    """Iterator for String.
+
+    Parameters:
+        is_mutable: Whether the slice is mutable.
+        lifetime: The lifetime of the underlying string data.
+        forward: The iteration direction. `False` is backwards.
+    """
+
+    var index: Int
+    var continuation_bytes: Int
+    var ptr: UnsafePointer[UInt8]
+    var length: Int
+
+    fn __init__(
+        inout self, *, unsafe_pointer: UnsafePointer[UInt8], length: Int
+    ):
+        self.index = 0 if forward else length
+        self.ptr = unsafe_pointer
+        self.length = length
+        self.continuation_bytes = 0
+        for i in range(length):
+            if _utf8_byte_type(int(unsafe_pointer[i])) == 1:
+                self.continuation_bytes += 1
+
+    fn __iter__(self) -> Self:
+        return self
+
+    fn __next__(inout self) -> StringSlice[lifetime]:
+        @parameter
+        if forward:
+            var byte_len = 1
+            if self.continuation_bytes > 0:
+                var byte_type = _utf8_byte_type(int(self.ptr[self.index]))
+                if byte_type != 0:
+                    byte_len = int(byte_type)
+                    self.continuation_bytes -= byte_len - 1
+            self.index += byte_len
+            return StringSlice[lifetime](
+                unsafe_from_utf8_ptr=self.ptr + (self.index - byte_len),
+                len=byte_len,
+            )
+        else:
+            var byte_len = 1
+            if self.continuation_bytes > 0:
+                var byte_type = _utf8_byte_type(int(self.ptr[self.index - 1]))
+                if byte_type != 0:
+                    while byte_type == 1:
+                        byte_len += 1
+                        var b = int(self.ptr[self.index - byte_len])
+                        byte_type = _utf8_byte_type(b)
+                    self.continuation_bytes -= byte_len - 1
+            self.index -= byte_len
+            return StringSlice[lifetime](
+                unsafe_from_utf8_ptr=self.ptr + self.index, len=byte_len
+            )
+
+    fn __len__(self) -> Int:
+        @parameter
+        if forward:
+            return self.length - self.index - self.continuation_bytes
+        else:
+            return self.index - self.continuation_bytes
+
+
 struct String(
     Sized,
     Stringable,
@@ -1203,25 +1273,23 @@ struct String(
             count=other_len + 1,
         )
 
-    fn __iter__(ref [_]self) -> _StringSliceIter[__lifetime_of(self)]:
+    fn __iter__(ref [_]self) -> _StringIter[__lifetime_of(self)]:
         """Iterate over elements of the string, returning immutable references.
 
         Returns:
             An iterator of references to the string elements.
         """
-        return _StringSliceIter[__lifetime_of(self)](
+        return _StringIter[__lifetime_of(self)](
             unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
         )
 
-    fn __reversed__(
-        ref [_]self,
-    ) -> _StringSliceIter[__lifetime_of(self), False]:
+    fn __reversed__(ref [_]self) -> _StringIter[__lifetime_of(self), False]:
         """Iterate backwards over the string, returning immutable references.
 
         Returns:
             A reversed iterator of references to the string elements.
         """
-        return _StringSliceIter[__lifetime_of(self), forward=False](
+        return _StringIter[__lifetime_of(self), forward=False](
             unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
         )
 
@@ -1595,17 +1663,51 @@ struct String(
         )
 
     fn isspace(self) -> Bool:
-        """Determines whether every character in the given String is a
-        python whitespace String. This corresponds to Python's
+        """Determines whether the given String is a python
+        whitespace String. This corresponds to Python's
         [universal separators](
             https://docs.python.org/3/library/stdtypes.html#str.splitlines)
-        `" \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029"`.
+        `" \\t\\n\\r\\f\\v\\x1c\\x1e\\x85\\u2028\\u2029"`.
 
         Returns:
-            True if the whole String is made up of whitespace characters
+            True if the String is one of the whitespace characters
                 listed above, otherwise False.
         """
-        return self.as_string_slice().isspace()
+        # TODO add line and paragraph separator as stringliteral
+        # once unicode escape secuences are accepted
+        var next_line = List[UInt8](0xC2, 0x85)
+        """TODO: \\x85"""
+        var unicode_line_sep = List[UInt8](0xE2, 0x80, 0xA8)
+        """TODO: \\u2028"""
+        var unicode_paragraph_sep = List[UInt8](0xE2, 0x80, 0xA9)
+        """TODO: \\u2029"""
+
+        @always_inline
+        fn _compare(
+            item1: UnsafePointer[UInt8], item2: UnsafePointer[UInt8], amnt: Int
+        ) -> Bool:
+            var ptr1 = DTypePointer(item1)
+            var ptr2 = DTypePointer(item2)
+            return memcmp(ptr1, ptr2, amnt) == 0
+
+        if len(self) == 0:
+            return False
+
+        for s in self:
+            var no_null_len = len(s)
+            var ptr = s.unsafe_ptr()
+            if no_null_len == 1 and not _isspace(ptr[0]):
+                return False
+            elif no_null_len == 2 and not _compare(
+                ptr, next_line.unsafe_ptr(), 2
+            ):
+                return False
+            elif no_null_len == 3 and not (
+                _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
+                or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
+            ):
+                return False
+        return True
 
     fn split(self, sep: String, maxsplit: Int = -1) raises -> List[String]:
         """Split the string by a separator.
diff --git a/stdlib/src/utils/string_slice.mojo b/stdlib/src/utils/string_slice.mojo
index d6b9663786..ca15404743 100644
--- a/stdlib/src/utils/string_slice.mojo
+++ b/stdlib/src/utils/string_slice.mojo
@@ -27,76 +27,6 @@ alias StaticString = StringSlice[ImmutableStaticLifetime]
 """An immutable static string slice."""
 
 
-@value
-struct _StringSliceIter[
-    is_mutable: Bool, //,
-    lifetime: AnyLifetime[is_mutable].type,
-    forward: Bool = True,
-]:
-    """Iterator for String.
-
-    Parameters:
-        is_mutable: Whether the slice is mutable.
-        lifetime: The lifetime of the underlying string data.
-        forward: The iteration direction. `False` is backwards.
-    """
-
-    var index: Int
-    var continuation_bytes: Int
-    var ptr: UnsafePointer[UInt8]
-    var length: Int
-
-    fn __init__(
-        inout self, *, unsafe_pointer: UnsafePointer[UInt8], length: Int
-    ):
-        self.index = 0 if forward else length
-        self.ptr = unsafe_pointer
-        self.length = length
-        self.continuation_bytes = 0
-        for i in range(length):
-            if _utf8_byte_type(unsafe_pointer[i]) == 1:
-                self.continuation_bytes += 1
-
-    fn __iter__(self) -> Self:
-        return self
-
-    fn __next__(inout self) -> StringSlice[lifetime]:
-        @parameter
-        if forward:
-            var byte_len = 1
-            if self.continuation_bytes > 0:
-                var byte_type = _utf8_byte_type(self.ptr[self.index])
-                if byte_type != 0:
-                    byte_len = int(byte_type)
-                    self.continuation_bytes -= byte_len - 1
-            self.index += byte_len
-            return StringSlice[lifetime](
-                unsafe_from_utf8_ptr=self.ptr + (self.index - byte_len),
-                len=byte_len,
-            )
-        else:
-            var byte_len = 1
-            if self.continuation_bytes > 0:
-                var byte_type = _utf8_byte_type(self.ptr[self.index - 1])
-                if byte_type != 0:
-                    while byte_type == 1:
-                        byte_len += 1
-                        var b = self.ptr[self.index - byte_len]
-                        byte_type = _utf8_byte_type(b)
-                    self.continuation_bytes -= byte_len - 1
-            self.index -= byte_len
-            return StringSlice[lifetime](
-                unsafe_from_utf8_ptr=self.ptr + self.index, len=byte_len
-            )
-
-    fn __len__(self) -> Int:
-        @parameter
-        if forward:
-            return self.length - self.index - self.continuation_bytes
-        else:
-            return self.index - self.continuation_bytes
-
-
 struct StringSlice[
     is_mutable: Bool, //,
     lifetime: AnyLifetime[is_mutable].type,
@@ -331,28 +261,6 @@ struct StringSlice[
         """
         return not self == rhs
 
-    fn __iter__(ref [_]self) -> _StringSliceIter[__lifetime_of(self)]:
-        """Iterate over elements of the string, returning immutable references.
-
-        Returns:
-            An iterator of references to the string elements.
-        """
-        return _StringSliceIter[__lifetime_of(self)](
-            unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
-        )
-
-    fn __reversed__(
-        ref [_]self,
-    ) -> _StringSliceIter[__lifetime_of(self), False]:
-        """Iterate backwards over the string, returning immutable references.
-
-        Returns:
-            A reversed iterator of references to the string elements.
-        """
-        return _StringSliceIter[__lifetime_of(self), forward=False](
-            unsafe_pointer=self.unsafe_ptr(), length=self.byte_length()
-        )
-
     # ===------------------------------------------------------------------===#
     # Methods
     # ===------------------------------------------------------------------===#
@@ -414,52 +322,3 @@ struct StringSlice[
         without the string getting deallocated early.
         """
         pass
-
-    fn isspace(self) -> Bool:
-        """Determines whether every character in the given StringSlice is a
-        python whitespace String. This corresponds to Python's
-        [universal separators](
-            https://docs.python.org/3/library/stdtypes.html#str.splitlines)
-        `" \\t\\n\\r\\f\\v\\x1c\\x1d\\x1e\\x85\\u2028\\u2029"`.
-
-        Returns:
-            True if the whole StringSlice is made up of whitespace characters
-                listed above, otherwise False.
-        """
-
-        if self.byte_length() == 0:
-            return False
-
-        # TODO add line and paragraph separator as stringliteral
-        # once unicode escape secuences are accepted
-        var next_line = List[UInt8](0xC2, 0x85)
-        """TODO: \\x85"""
-        var unicode_line_sep = List[UInt8](0xE2, 0x80, 0xA8)
-        """TODO: \\u2028"""
-        var unicode_paragraph_sep = List[UInt8](0xE2, 0x80, 0xA9)
-        """TODO: \\u2029"""
-
-        @always_inline
-        fn _compare(
-            item1: UnsafePointer[UInt8], item2: UnsafePointer[UInt8], amnt: Int
-        ) -> Bool:
-            var ptr1 = DTypePointer(item1)
-            var ptr2 = DTypePointer(item2)
-            return memcmp(ptr1, ptr2, amnt) == 0
-
-        for s in self:
-            var no_null_len = s.byte_length()
-            var ptr = s.unsafe_ptr()
-            if no_null_len == 1 and _isspace(ptr[0]):
-                continue
-            elif no_null_len == 2 and _compare(ptr, next_line.unsafe_ptr(), 2):
-                continue
-            elif no_null_len == 3 and (
-                _compare(ptr, unicode_line_sep.unsafe_ptr(), 3)
-                or _compare(ptr, unicode_paragraph_sep.unsafe_ptr(), 3)
-            ):
-                continue
-            else:
-                return False
-        _ = next_line, unicode_line_sep, unicode_paragraph_sep
-        return True

From 1c952c4c64ad3433c8e585fa2149c0de443baab0 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 21:35:25 -0400
Subject: [PATCH 24/27] fix dangling import and isspace use

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index 9d97fd5268..e8a848fa74 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -25,7 +25,6 @@ from memory import DTypePointer, LegacyPointer, UnsafePointer, memcmp, memcpy
 
 from utils import Span, StaticIntTuple, StringRef, StringSlice
 from utils._format import Formattable, Formatter, ToFormatter
-from utils.string_slice import _StringSliceIter
 
 # ===----------------------------------------------------------------------=== #
 # ord
@@ -1798,7 +1797,7 @@ struct String(
             # Python adds all "whitespace chars" as one separator
             # if no separator was specified
             for s in self[lhs:]:
-                if not s.isspace():
+                if not str(s).isspace():  # TODO: with StringSlice.isspace()
                     break
                 lhs += s.byte_length()
             # if it went until the end of the String, then
@@ -1812,7 +1811,7 @@ struct String(
                 break
             rhs = lhs + 1
             for s in self[lhs + 1 :]:
-                if s.isspace():
+                if str(s).isspace():  # TODO: with StringSlice.isspace()
                     break
                 rhs += s.byte_length()
 

From 633ba241916fa7babcb66459e05e569b4a67d66a Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Mon, 8 Jul 2024 21:41:16 -0400
Subject: [PATCH 25/27] fix isspace

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 stdlib/src/builtin/string.mojo | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index e8a848fa74..e0cace5f62 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1689,11 +1689,11 @@ struct String(
             var ptr2 = DTypePointer(item2)
             return memcmp(ptr1, ptr2, amnt) == 0
 
-        if len(self) == 0:
+        if self.byte_length() == 0:
             return False
 
         for s in self:
-            var no_null_len = len(s)
+            var no_null_len = s.byte_length()
             var ptr = s.unsafe_ptr()
             if no_null_len == 1 and not _isspace(ptr[0]):
                 return False

From b7630b677adc26757253691ecc8226136dc1bd05 Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 9 Jul 2024 11:45:23 -0400
Subject: [PATCH 26/27] add suggestions

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 docs/changelog.md              |  5 +++++
 stdlib/src/builtin/string.mojo | 29 ++---------------------------
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 961c586dee..30aadc4f97 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -139,6 +139,11 @@ what we publish.
 
 - Added `StringSlice(..)` initializer from a `StringLiteral`.
 
+- Added a `byte_length()` method to `String`, `StringSlice`, and `StringLiteral`
+and deprecated their private `_byte_length()` methods. Added a warning to
+`String.__len__` method that it will return length in Unicode codepoints in the
+future and `StringSlice.__len__` now does return the Unicode codepoints length.
+
 - Added new `StaticString` type alias. This can be used in place of
   `StringLiteral` for runtime string arguments.
 
diff --git a/stdlib/src/builtin/string.mojo b/stdlib/src/builtin/string.mojo
index e0cace5f62..8c514bdfb8 100644
--- a/stdlib/src/builtin/string.mojo
+++ b/stdlib/src/builtin/string.mojo
@@ -1075,17 +1075,7 @@ struct String(
         Returns:
             A new string containing the character at the specified position.
         """
-        # FIXME: this should work with unicode codepoints, but this doesn't yet
-        # work at compile time because of issue #933
-        # var i = 0
-        # var buf = Self._buffer_type(capacity=5)
-        # for s in self:
-        #     if i != idx:
-        #         continue
-        #     for j in range(s.byte_length()):
-        #         buf[j] = self._buffer[i + j]
-        # buf.append(0)
-        # buf.resize(len(buf))
+        # TODO(#933): implement this for unicode when we support llvm intrinsic evaluation at compile time
         var normalized_idx = normalize_index["String"](idx, self)
         var buf = Self._buffer_type(capacity=1)
         buf.append(self._buffer[normalized_idx])
@@ -1104,22 +1094,7 @@ struct String(
         var start: Int
         var end: Int
         var step: Int
-        # FIXME: this should work with unicode codepoints, but this doesn't yet
-        # work at compile time because of issue #933
-        # var idx = 0
-        # var start = span.start.value() if span.start else 0
-        # var end = span.end.value() if span.end else 0
-        # var buffer = Self._buffer_type()
-        # for s in self:
-        #     var amnt_bytes = s.byte_length()
-        #     if not (start <= idx < end) or idx % span.step != 0:
-        #         idx += amnt_bytes
-        #         continue
-        #     for i in range(amnt_bytes):
-        #         buffer[idx + i] = self._buffer[idx + i]
-        # buffer.append(0)
-        # buffer.resize(len(buffer))
-        # return Self(buffer^)
+        # TODO(#933): implement this for unicode when we support llvm intrinsic evaluation at compile time
 
         start, end, step = span.indices(self.byte_length())
         var r = range(start, end, step)

From 10d88ecf3b08832ba7435b05f9457fa81e14592e Mon Sep 17 00:00:00 2001
From: martinvuyk <martin.vuyklop@gmail.com>
Date: Tue, 9 Jul 2024 12:09:59 -0400
Subject: [PATCH 27/27] add suggestions

Signed-off-by: martinvuyk <martin.vuyklop@gmail.com>
---
 docs/changelog.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index 30aadc4f97..7759de019b 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -143,6 +143,7 @@ what we publish.
 and deprecated their private `_byte_length()` methods. Added a warning to
 `String.__len__` method that it will return length in Unicode codepoints in the
 future and `StringSlice.__len__` now does return the Unicode codepoints length.
+([PR #2960](https://github.com/modularml/mojo/pull/2960) by [@martinvuyk](https://github.com/martinvuyk))
 
 - Added new `StaticString` type alias. This can be used in place of
   `StringLiteral` for runtime string arguments.