Skip to content

Commit

Permalink
Add Stringlike .split() tests
Browse files Browse the repository at this point in the history
Signed-off-by: martinvuyk <[email protected]>
  • Loading branch information
martinvuyk committed Dec 16, 2024
1 parent 9f4544a commit 7c7c22e
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 167 deletions.
121 changes: 72 additions & 49 deletions stdlib/test/builtin/test_string_literal.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -369,64 +369,87 @@ def test_center():


def test_split():
var d = "hello world".split()
assert_true(len(d) == 2)
assert_true(d[0] == "hello")
assert_true(d[1] == "world")
d = "hello \t\n\n\v\fworld".split("\n")
assert_true(len(d) == 3)
assert_true(d[0] == "hello \t" and d[1] == "" and d[2] == "\v\fworld")
fn st(value: StringLiteral) -> StringLiteral:
return value

# Should add all whitespace-like chars as one
# test all unicode separators
# 0 is to build a String with null terminator
# alias next_line = List[UInt8](0xC2, 0x85, 0)
# """TODO: \\x85"""
# alias unicode_line_sep = List[UInt8](0xE2, 0x80, 0xA8, 0)
# """TODO: \\u2028"""
# alias unicode_paragraph_sep = List[UInt8](0xE2, 0x80, 0xA9, 0)
# """TODO: \\u2029"""
# TODO add line and paragraph separator as StringLiteral once unicode
# escape secuences are accepted
univ_sep_var = (
st(" ")
+ st("\t")
+ st("\n")
+ st("\r")
+ st("\v")
+ st("\f")
+ st("\x1c")
+ st("\x1d")
+ st("\x1e")
# + st(next_line)
# + st(unicode_line_sep)
# + st(unicode_paragraph_sep)
)
s = univ_sep_var + "hello" + univ_sep_var + "world" + univ_sep_var
assert_equal(s.split(), List[String]("hello", "world"))

# should split into empty strings between separators
d = "1,,,3".split(",")
assert_true(len(d) == 4)
assert_true(d[0] == "1" and d[1] == "" and d[2] == "" and d[3] == "3")
d = "abababaaba".split("aba")
assert_true(len(d) == 4)
assert_true(d[0] == "" and d[1] == "b" and d[2] == "" and d[3] == "")
assert_equal(st("1,,,3").split(","), List[String]("1", "", "", "3"))
assert_equal(st(",,,").split(","), List[String]("", "", "", ""))
assert_equal(st(" a b ").split(" "), List[String]("", "a", "b", ""))
assert_equal(st("abababaaba").split("aba"), List[String]("", "b", "", ""))
assert_true(len(st("").split()) == 0)
assert_true(len(st(" ").split()) == 0)
assert_true(len(st("").split(" ")) == 1)
assert_true(len(st(",").split(",")) == 2)
assert_true(len(st(" ").split(" ")) == 2)
assert_true(len(st("").split("")) == 2)
assert_true(len(st(" ").split(" ")) == 3)
assert_true(len(st(" ").split(" ")) == 4)

# should split into maxsplit + 1 items
d = "1,2,3".split(",", 0)
assert_true(len(d) == 1)
assert_true(d[0] == "1,2,3")
d = "1,2,3".split(",", 1)
assert_true(len(d) == 2)
assert_true(d[0] == "1" and d[1] == "2,3")

assert_true(len("".split()) == 0)
assert_true(len(" ".split()) == 0)
assert_true(len("".split(" ")) == 1)
assert_true(len(" ".split(" ")) == 2)
assert_true(len(" ".split(" ")) == 3)
assert_true(len(" ".split(" ")) == 4)
assert_equal(st("1,2,3").split(",", 0), List[String]("1,2,3"))
assert_equal(st("1,2,3").split(",", 1), List[String]("1", "2,3"))

with assert_raises():
_ = "".split("")

# Matches should be properly split in multiple case
var d2 = " "
var in2 = "modcon is coming soon"
var res2 = in2.split(d2)
assert_equal(len(res2), 4)
assert_equal(res2[0], "modcon")
assert_equal(res2[1], "is")
assert_equal(res2[2], "coming")
assert_equal(res2[3], "soon")
# Split in middle
assert_equal(st("faang").split(st("n")), List[String]("faa", "g"))

# No match from the delimiter
var d3 = "x"
var in3 = "hello world"
var res3 = in3.split(d3)
assert_equal(len(res3), 1)
assert_equal(res3[0], "hello world")
assert_equal(st("hello world").split(st("x")), List[String]("hello world"))

# Multiple character delimiter
var d4 = "ll"
var in4 = "hello"
var res4 = in4.split(d4)
assert_equal(len(res4), 2)
assert_equal(res4[0], "he")
assert_equal(res4[1], "o")
assert_equal(st("hello").split(st("ll")), List[String]("he", "o"))

res = List[String]("", "bb", "", "", "", "bbb", "")
assert_equal(st("abbaaaabbba").split("a"), res)
assert_equal(st("abbaaaabbba").split("a", 8), res)
s1 = st("abbaaaabbba").split("a", 5)
assert_equal(s1, List[String]("", "bb", "", "", "", "bbba"))
assert_equal(st("aaa").split("a", 0), List[String]("aaa"))
assert_equal(st("a").split("a"), List[String]("", ""))
assert_equal(st("1,2,3").split("3", 0), List[String]("1,2,3"))
assert_equal(st("1,2,3").split("3", 1), List[String]("1,2,", ""))
assert_equal(st("1,2,3,3").split("3", 2), List[String]("1,2,", ",", ""))
assert_equal(st("1,2,3,3,3").split("3", 2), List[String]("1,2,", ",", ",3"))

assert_equal(st("Hello 🔥!").split(), List[String]("Hello", "🔥!"))

s2 = st("Лорем ипсум долор сит амет").split(" ")
assert_equal(s2, List[String]("Лорем", "ипсум", "долор", "сит", "амет"))
s3 = st("Лорем ипсум долор сит амет").split("м")
assert_equal(s3, List[String]("Лоре", " ипсу", " долор сит а", "ет"))

assert_equal(st("123").split(""), List[String]("", "1", "2", "3", ""))
assert_equal("".join(st("123").split("")), "123")
assert_equal(st(",1,2,3,").split(","), st("123").split(""))
assert_equal(",".join(st("123").split("")), ",1,2,3,")


def test_splitlines():
Expand Down
173 changes: 55 additions & 118 deletions stdlib/test/collections/test_string.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -702,14 +702,8 @@ def test_rfind():


def test_split():
# empty separators default to whitespace
var d = String("hello world").split()
assert_true(len(d) == 2)
assert_true(d[0] == "hello")
assert_true(d[1] == "world")
d = String("hello \t\n\n\v\fworld").split("\n")
assert_true(len(d) == 3)
assert_true(d[0] == "hello \t" and d[1] == "" and d[2] == "\v\fworld")
fn st(value: StringLiteral) -> String:
return value

# Should add all whitespace-like chars as one
# test all unicode separators
Expand All @@ -722,130 +716,73 @@ def test_split():
"""TODO: \\u2029"""
# TODO add line and paragraph separator as StringLiteral once unicode
# escape secuences are accepted
var univ_sep_var = (
String(" ")
+ String("\t")
+ String("\n")
+ String("\r")
+ String("\v")
+ String("\f")
+ String("\x1c")
+ String("\x1d")
+ String("\x1e")
univ_sep_var = (
st(" ")
+ st("\t")
+ st("\n")
+ st("\r")
+ st("\v")
+ st("\f")
+ st("\x1c")
+ st("\x1d")
+ st("\x1e")
+ String(next_line)
+ String(unicode_line_sep)
+ String(unicode_paragraph_sep)
)
var s = univ_sep_var + "hello" + univ_sep_var + "world" + univ_sep_var
d = s.split()
assert_true(len(d) == 2)
assert_true(d[0] == "hello" and d[1] == "world")
s = univ_sep_var + "hello" + univ_sep_var + "world" + univ_sep_var
assert_equal(s.split(), List[String]("hello", "world"))

# should split into empty strings between separators
d = String("1,,,3").split(",")
assert_true(len(d) == 4)
assert_true(d[0] == "1" and d[1] == "" and d[2] == "" and d[3] == "3")
d = String(",,,").split(",")
assert_true(len(d) == 4)
assert_true(d[0] == "" and d[1] == "" and d[2] == "" and d[3] == "")
d = String(" a b ").split(" ")
assert_true(len(d) == 4)
assert_true(d[0] == "" and d[1] == "a" and d[2] == "b" and d[3] == "")
d = String("abababaaba").split("aba")
assert_true(len(d) == 4)
assert_true(d[0] == "" and d[1] == "b" and d[2] == "" and d[3] == "")
assert_equal(st("1,,,3").split(","), List[String]("1", "", "", "3"))
assert_equal(st(",,,").split(","), List[String]("", "", "", ""))
assert_equal(st(" a b ").split(" "), List[String]("", "a", "b", ""))
assert_equal(st("abababaaba").split("aba"), List[String]("", "b", "", ""))
assert_true(len(st("").split()) == 0)
assert_true(len(st(" ").split()) == 0)
assert_true(len(st("").split(" ")) == 1)
assert_true(len(st(",").split(",")) == 2)
assert_true(len(st(" ").split(" ")) == 2)
assert_true(len(st("").split("")) == 2)
assert_true(len(st(" ").split(" ")) == 3)
assert_true(len(st(" ").split(" ")) == 4)

# should split into maxsplit + 1 items
d = String("1,2,3").split(",", 0)
assert_true(len(d) == 1)
assert_true(d[0] == "1,2,3")
d = String("1,2,3").split(",", 1)
assert_true(len(d) == 2)
assert_true(d[0] == "1" and d[1] == "2,3")

assert_true(len(String("").split()) == 0)
assert_true(len(String(" ").split()) == 0)
assert_true(len(String("").split(" ")) == 1)
assert_true(len(String(" ").split(" ")) == 2)
assert_true(len(String(" ").split(" ")) == 3)
assert_true(len(String(" ").split(" ")) == 4)

with assert_raises():
_ = String("").split("")
assert_equal(st("1,2,3").split(",", 0), List[String]("1,2,3"))
assert_equal(st("1,2,3").split(",", 1), List[String]("1", "2,3"))

# Split in middle
var d1 = String("n")
var in1 = String("faang")
var res1 = in1.split(d1)
assert_equal(len(res1), 2)
assert_equal(res1[0], "faa")
assert_equal(res1[1], "g")

# Matches should be properly split in multiple case
var d2 = String(" ")
var in2 = String("modcon is coming soon")
var res2 = in2.split(d2)
assert_equal(len(res2), 4)
assert_equal(res2[0], "modcon")
assert_equal(res2[1], "is")
assert_equal(res2[2], "coming")
assert_equal(res2[3], "soon")
assert_equal(st("faang").split(st("n")), List[String]("faa", "g"))

# No match from the delimiter
var d3 = String("x")
var in3 = String("hello world")
var res3 = in3.split(d3)
assert_equal(len(res3), 1)
assert_equal(res3[0], "hello world")
assert_equal(st("hello world").split(st("x")), List[String]("hello world"))

# Multiple character delimiter
var d4 = String("ll")
var in4 = String("hello")
var res4 = in4.split(d4)
assert_equal(len(res4), 2)
assert_equal(res4[0], "he")
assert_equal(res4[1], "o")

# related to #2879
# TODO: replace string comparison when __eq__ is implemented for List
assert_equal(
String("abbaaaabbba").split("a").__str__(),
"['', 'bb', '', '', '', 'bbb', '']",
)
assert_equal(
String("abbaaaabbba").split("a", 8).__str__(),
"['', 'bb', '', '', '', 'bbb', '']",
)
assert_equal(
String("abbaaaabbba").split("a", 5).__str__(),
"['', 'bb', '', '', '', 'bbba']",
)
assert_equal(String("aaa").split("a", 0).__str__(), "['aaa']")
assert_equal(String("a").split("a").__str__(), "['', '']")
assert_equal(String("1,2,3").split("3", 0).__str__(), "['1,2,3']")
assert_equal(String("1,2,3").split("3", 1).__str__(), "['1,2,', '']")
assert_equal(String("1,2,3,3").split("3", 2).__str__(), "['1,2,', ',', '']")
assert_equal(
String("1,2,3,3,3").split("3", 2).__str__(), "['1,2,', ',', ',3']"
)

var in5 = String("Hello 🔥!")
var res5 = in5.split()
assert_equal(len(res5), 2)
assert_equal(res5[0], "Hello")
assert_equal(res5[1], "🔥!")

var in6 = String("Лорем ипсум долор сит амет")
var res6 = in6.split(" ")
assert_equal(len(res6), 5)
assert_equal(res6[0], "Лорем")
assert_equal(res6[1], "ипсум")
assert_equal(res6[2], "долор")
assert_equal(res6[3], "сит")
assert_equal(res6[4], "амет")

with assert_raises(contains="Separator cannot be empty."):
_ = String("1, 2, 3").split("")
assert_equal(st("hello").split(st("ll")), List[String]("he", "o"))

res = List[String]("", "bb", "", "", "", "bbb", "")
assert_equal(st("abbaaaabbba").split("a"), res)
assert_equal(st("abbaaaabbba").split("a", 8), res)
s1 = st("abbaaaabbba").split("a", 5)
assert_equal(s1, List[String]("", "bb", "", "", "", "bbba"))
assert_equal(st("aaa").split("a", 0), List[String]("aaa"))
assert_equal(st("a").split("a"), List[String]("", ""))
assert_equal(st("1,2,3").split("3", 0), List[String]("1,2,3"))
assert_equal(st("1,2,3").split("3", 1), List[String]("1,2,", ""))
assert_equal(st("1,2,3,3").split("3", 2), List[String]("1,2,", ",", ""))
assert_equal(st("1,2,3,3,3").split("3", 2), List[String]("1,2,", ",", ",3"))

assert_equal(st("Hello 🔥!").split(), List[String]("Hello", "🔥!"))

s2 = st("Лорем ипсум долор сит амет").split(" ")
assert_equal(s2, List[String]("Лорем", "ипсум", "долор", "сит", "амет"))
s3 = st("Лорем ипсум долор сит амет").split("м")
assert_equal(s3, List[String]("Лоре", " ипсу", " долор сит а", "ет"))

assert_equal(st("123").split(""), List[String]("", "1", "2", "3", ""))
assert_equal("".join(st("123").split("")), "123")
assert_equal(st(",1,2,3,").split(","), st("123").split(""))
assert_equal(",".join(st("123").split("")), ",1,2,3,")


def test_splitlines():
Expand Down

0 comments on commit 7c7c22e

Please sign in to comment.