From d051c2aabf69e0b07482097a724e299f56503c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Richard=20B=C3=B6hme?= Date: Sun, 20 Mar 2022 23:22:24 +0100 Subject: [PATCH] Implement String#[]= [#217] --- include/natalie/string_object.hpp | 1 + lib/natalie/compiler/binding_gen.rb | 1 + spec/core/string/element_set_spec.rb | 623 +++++++++++++++++++++++++++ src/string_object.cpp | 120 ++++++ test/natalie/string_test.rb | 6 + 5 files changed, 751 insertions(+) create mode 100644 spec/core/string/element_set_spec.rb diff --git a/include/natalie/string_object.hpp b/include/natalie/string_object.hpp index db7f43cee3..bb335b033c 100644 --- a/include/natalie/string_object.hpp +++ b/include/natalie/string_object.hpp @@ -179,6 +179,7 @@ class StringObject : public Object { Value ord(Env *); Value prepend(Env *, size_t, Value *); Value ref(Env *, Value); + Value refeq(Env *, Value, Value, Value); Value reverse(Env *); Value reverse_in_place(Env *); Value rstrip(Env *) const; diff --git a/lib/natalie/compiler/binding_gen.rb b/lib/natalie/compiler/binding_gen.rb index 1b75dd6abb..6a6d8b4d30 100644 --- a/lib/natalie/compiler/binding_gen.rb +++ b/lib/natalie/compiler/binding_gen.rb @@ -841,6 +841,7 @@ def generate_name gen.binding('String', '===', 'StringObject', 'eq', argc: 1, pass_env: true, pass_block: false, return_type: :bool) gen.binding('String', '=~', 'StringObject', 'eqtilde', argc: 1, pass_env: true, pass_block: false, return_type: :Object) gen.binding('String', '[]', 'StringObject', 'ref', argc: 1, pass_env: true, pass_block: false, return_type: :Object) +gen.binding('String', '[]=', 'StringObject', 'refeq', argc: 1..3, pass_env: true, pass_block: false, return_type: :Object) gen.binding('String', 'b', 'StringObject', 'b', argc: 0, pass_env: true, pass_block: false, return_type: :Object) gen.binding('String', 'bytes', 'StringObject', 'bytes', argc: 0, pass_env: true, pass_block: true, return_type: :Object) gen.binding('String', 'bytesize', 'StringObject', 'bytesize', argc: 0, pass_env: false, pass_block: false, return_type: :size_t) diff --git a/spec/core/string/element_set_spec.rb b/spec/core/string/element_set_spec.rb new file mode 100644 index 0000000000..28c1f13f25 --- /dev/null +++ b/spec/core/string/element_set_spec.rb @@ -0,0 +1,623 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +# TODO: Add missing String#[]= specs: +# String#[re, idx] = obj + +describe "String#[]= with Integer index" do + it "replaces the char at idx with other_str" do + a = "hello" + a[0] = "bam" + a.should == "bamello" + a[-2] = "" + a.should == "bamelo" + end + + ruby_version_is ''...'2.7' do + it "taints self if other_str is tainted" do + a = "hello" + a[0] = "".taint + a.should.tainted? + + a = "hello" + a[0] = "x".taint + a.should.tainted? + end + end + + it "raises an IndexError without changing self if idx is outside of self" do + str = "hello" + + -> { str[20] = "bam" }.should raise_error(IndexError) + str.should == "hello" + + -> { str[-20] = "bam" }.should raise_error(IndexError) + str.should == "hello" + + -> { ""[-1] = "bam" }.should raise_error(IndexError) + end + + # Behaviour is verified by matz in + # http://redmine.ruby-lang.org/issues/show/1750 + it "allows assignment to the zero'th element of an empty String" do + str = "" + str[0] = "bam" + str.should == "bam" + end + + it "raises IndexError if the string index doesn't match a position in the string" do + str = "hello" + -> { str['y'] = "bam" }.should raise_error(IndexError) + str.should == "hello" + end + + it "raises a FrozenError when self is frozen" do + a = "hello" + a.freeze + + -> { a[0] = "bam" }.should raise_error(FrozenError) + end + + it "calls to_int on index" do + str = "hello" + str[0.5] = "hi " + str.should == "hi ello" + + obj = mock('-1') + obj.should_receive(:to_int).and_return(-1) + str[obj] = "!" + str.should == "hi ell!" + end + + it "calls #to_str to convert other to a String" do + other_str = mock('-test-') + other_str.should_receive(:to_str).and_return("-test-") + + a = "abc" + a[1] = other_str + a.should == "a-test-c" + end + + it "raises a TypeError if other_str can't be converted to a String" do + -> { "test"[1] = [] }.should raise_error(TypeError) + -> { "test"[1] = mock('x') }.should raise_error(TypeError) + -> { "test"[1] = nil }.should raise_error(TypeError) + end + + it "raises a TypeError if passed an Integer replacement" do + -> { "abc"[1] = 65 }.should raise_error(TypeError) + end + + it "raises an IndexError if the index is greater than character size" do + -> { "あれ"[4] = "a" }.should raise_error(IndexError) + end + + it "calls #to_int to convert the index" do + index = mock("string element set") + index.should_receive(:to_int).and_return(1) + + str = "あれ" + str[index] = "a" + str.should == "あa" + end + + it "raises a TypeError if #to_int does not return an Integer" do + index = mock("string element set") + index.should_receive(:to_int).and_return('1') + + -> { "abc"[index] = "d" }.should raise_error(TypeError) + end + + it "raises an IndexError if #to_int returns a value out of range" do + index = mock("string element set") + index.should_receive(:to_int).and_return(4) + + -> { "ab"[index] = "c" }.should raise_error(IndexError) + end + + it "replaces a character with a multibyte character" do + str = "ありがとu" + str[4] = "う" + str.should == "ありがとう" + end + + it "replaces a multibyte character with a character" do + str = "ありがとう" + str[4] = "u" + str.should == "ありがとu" + end + + it "replaces a multibyte character with a multibyte character" do + str = "ありがとお" + str[4] = "う" + str.should == "ありがとう" + end + + # NATFIXME: Implement Encoding::US_ASCII + xit "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[0] = rep + str.encoding.should equal(Encoding::BINARY) + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "updates the string to a compatible encoding" do + str = " " + str[1] = [0xB9].pack("C*") + str.encoding.should == Encoding::ASCII_8BIT + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[0] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with String index" do + it "replaces fewer characters with more characters" do + str = "abcde" + str["cd"] = "ghi" + str.should == "abghie" + end + + it "replaces more characters with fewer characters" do + str = "abcde" + str["bcd"] = "f" + str.should == "afe" + end + + it "replaces characters with no characters" do + str = "abcde" + str["cd"] = "" + str.should == "abe" + end + + it "raises an IndexError if the search String is not found" do + str = "abcde" + -> { str["g"] = "h" }.should raise_error(IndexError) + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str["ga"] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str["が"] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str["が"] = "か" + str.should == "ありかとう" + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[" "] = rep + str.encoding.should equal(Encoding::BINARY) + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str["れ"] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with a Regexp index" do + it "replaces the matched text with the rhs" do + str = "hello" + str[/lo/] = "x" + str.should == "helx" + end + + it "raises IndexError if the regexp index doesn't match a position in the string" do + str = "hello" + -> { str[/y/] = "bam" }.should raise_error(IndexError) + str.should == "hello" + end + + it "calls #to_str to convert the replacement" do + rep = mock("string element set regexp") + rep.should_receive(:to_str).and_return("b") + + str = "abc" + str[/ab/] = rep + str.should == "bc" + end + + it "checks the match before calling #to_str to convert the replacement" do + rep = mock("string element set regexp") + rep.should_not_receive(:to_str) + + -> { "abc"[/def/] = rep }.should raise_error(IndexError) + end + + describe "with 3 arguments" do + it "calls #to_int to convert the second object" do + ref = mock("string element set regexp ref") + ref.should_receive(:to_int).and_return(1) + + str = "abc" + str[/a(b)/, ref] = "x" + str.should == "axc" + end + + it "raises a TypeError if #to_int does not return an Integer" do + ref = mock("string element set regexp ref") + ref.should_receive(:to_int).and_return(nil) + + -> { "abc"[/a(b)/, ref] = "x" }.should raise_error(TypeError) + end + + it "uses the 2nd of 3 arguments as which capture should be replaced" do + str = "aaa bbb ccc" + str[/a (bbb) c/, 1] = "ddd" + str.should == "aaa ddd ccc" + end + + it "allows the specified capture to be negative and count from the end" do + str = "abcd" + str[/(a)(b)(c)(d)/, -2] = "e" + str.should == "abed" + end + + it "checks the match index before calling #to_str to convert the replacement" do + rep = mock("string element set regexp") + rep.should_not_receive(:to_str) + + -> { "abc"[/a(b)/, 2] = rep }.should raise_error(IndexError) + end + + it "raises IndexError if the specified capture isn't available" do + str = "aaa bbb ccc" + -> { str[/a (bbb) c/, 2] = "ddd" }.should raise_error(IndexError) + -> { str[/a (bbb) c/, -2] = "ddd" }.should raise_error(IndexError) + end + + describe "when the optional capture does not match" do + it "raises an IndexError before setting the replacement" do + str1 = "a b c" + str2 = str1.dup + -> { str2[/a (b) (Z)?/, 2] = "d" }.should raise_error(IndexError) + str2.should == str1 + end + end + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str[/ga/] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str[/が/] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str[/が/] = "か" + str.should == "ありかとう" + end + + # NATFIXME: Implement Encoding::US_ASCII + xit "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[/ /] = rep + str.encoding.should equal(Encoding::BINARY) + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[/れ/] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with a Range index" do + describe "with an empty replacement" do + it "does not replace a character with a zero-index, zero exclude-end range" do + str = "abc" + str[0...0] = "" + str.should == "abc" + end + + it "does not replace a character with a zero exclude-end range" do + str = "abc" + str[1...1] = "" + str.should == "abc" + end + + it "replaces a character with zero-index, zero non-exclude-end range" do + str = "abc" + str[0..0] = "" + str.should == "bc" + end + + it "replaces a character with a zero non-exclude-end range" do + str = "abc" + str[1..1] = "" + str.should == "ac" + end + end + + it "replaces the contents with a shorter String" do + str = "abcde" + str[0..-1] = "hg" + str.should == "hg" + end + + it "replaces the contents with a longer String" do + str = "abc" + str[0...4] = "uvwxyz" + str.should == "uvwxyz" + end + + it "replaces a partial string" do + str = "abcde" + str[1..3] = "B" + str.should == "aBe" + end + + it "raises a RangeError if negative Range begin is out of range" do + -> { "abc"[-4..-2] = "x" }.should raise_error(RangeError) + end + + it "raises a RangeError if positive Range begin is greater than String size" do + -> { "abc"[4..2] = "x" }.should raise_error(RangeError) + end + + it "uses the Range end as an index rather than a count" do + str = "abcdefg" + str[-5..3] = "xyz" + str.should == "abxyzefg" + end + + it "treats a negative out-of-range Range end with a positive Range begin as a zero count" do + str = "abc" + str[1..-4] = "x" + str.should == "axbc" + end + + it "treats a negative out-of-range Range end with a negative Range begin as a zero count" do + str = "abcd" + str[-1..-4] = "x" + str.should == "abcxd" + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str[2..3] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str[2...3] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters by negative indexes" do + str = "ありがとう" + str[-3...-2] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str[2..2] = "か" + str.should == "ありかとう" + end + + it "deletes a multibyte character" do + str = "ありとう" + str[2..3] = "" + str.should == "あり" + end + + it "inserts a multibyte character" do + str = "ありとう" + str[2...2] = "が" + str.should == "ありがとう" + end + + # NATFIXME: Implement Encoding::US_ASCII + xit "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[0..1] = rep + str.encoding.should equal(Encoding::BINARY) + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[0..1] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with Integer index, count" do + it "starts at idx and overwrites count characters before inserting the rest of other_str" do + a = "hello" + a[0, 2] = "xx" + a.should == "xxllo" + a = "hello" + a[0, 2] = "jello" + a.should == "jellollo" + end + + it "counts negative idx values from end of the string" do + a = "hello" + a[-1, 0] = "bob" + a.should == "hellbobo" + a = "hello" + a[-5, 0] = "bob" + a.should == "bobhello" + end + + it "overwrites and deletes characters if count is more than the length of other_str" do + a = "hello" + a[0, 4] = "x" + a.should == "xo" + a = "hello" + a[0, 5] = "x" + a.should == "x" + end + + it "deletes characters if other_str is an empty string" do + a = "hello" + a[0, 2] = "" + a.should == "llo" + end + + it "deletes characters up to the maximum length of the existing string" do + a = "hello" + a[0, 6] = "x" + a.should == "x" + a = "hello" + a[0, 100] = "" + a.should == "" + end + + it "appends other_str to the end of the string if idx == the length of the string" do + a = "hello" + a[5, 0] = "bob" + a.should == "hellobob" + end + + ruby_version_is ''...'2.7' do + it "taints self if other_str is tainted" do + a = "hello" + a[0, 0] = "".taint + a.should.tainted? + + a = "hello" + a[1, 4] = "x".taint + a.should.tainted? + end + end + + it "calls #to_int to convert the index and count objects" do + index = mock("string element set index") + index.should_receive(:to_int).and_return(-4) + + count = mock("string element set count") + count.should_receive(:to_int).and_return(2) + + str = "abcde" + str[index, count] = "xyz" + str.should == "axyzde" + end + + it "raises a TypeError if #to_int for index does not return an Integer" do + index = mock("string element set index") + index.should_receive(:to_int).and_return("1") + + -> { "abc"[index, 2] = "xyz" }.should raise_error(TypeError) + end + + it "raises a TypeError if #to_int for count does not return an Integer" do + count = mock("string element set count") + count.should_receive(:to_int).and_return("1") + + -> { "abc"[1, count] = "xyz" }.should raise_error(TypeError) + end + + it "calls #to_str to convert the replacement object" do + r = mock("string element set replacement") + r.should_receive(:to_str).and_return("xyz") + + str = "abcde" + str[2, 2] = r + str.should == "abxyze" + end + + it "raises a TypeError of #to_str does not return a String" do + r = mock("string element set replacement") + r.should_receive(:to_str).and_return(nil) + + -> { "abc"[1, 1] = r }.should raise_error(TypeError) + end + + it "raises an IndexError if |idx| is greater than the length of the string" do + -> { "hello"[6, 0] = "bob" }.should raise_error(IndexError) + -> { "hello"[-6, 0] = "bob" }.should raise_error(IndexError) + end + + it "raises an IndexError if count < 0" do + -> { "hello"[0, -1] = "bob" }.should raise_error(IndexError) + -> { "hello"[1, -1] = "bob" }.should raise_error(IndexError) + end + + it "raises a TypeError if other_str is a type other than String" do + -> { "hello"[0, 2] = nil }.should raise_error(TypeError) + -> { "hello"[0, 2] = [] }.should raise_error(TypeError) + -> { "hello"[0, 2] = 33 }.should raise_error(TypeError) + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str[2, 2] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str[2, 1] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str[2, 1] = "か" + str.should == "ありかとう" + end + + it "deletes a multibyte character" do + str = "ありとう" + str[2, 2] = "" + str.should == "あり" + end + + it "inserts a multibyte character" do + str = "ありとう" + str[2, 0] = "が" + str.should == "ありがとう" + end + + it "raises an IndexError if the character index is out of range of a multibyte String" do + -> { "あれ"[3, 0] = "り" }.should raise_error(IndexError) + end + + # NATFIXME: Implement Encoding::US_ASCII + xit "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[0, 1] = rep + str.encoding.should equal(Encoding::BINARY) + end + + # NATFIXME: Implement encoding compatibility (Encoding::compatible?) + xit "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[0, 1] = rep }.should raise_error(Encoding::CompatibilityError) + end +end diff --git a/src/string_object.cpp b/src/string_object.cpp index 74c8103285..eada2693b3 100644 --- a/src/string_object.cpp +++ b/src/string_object.cpp @@ -616,6 +616,126 @@ size_t StringObject::byte_index_to_char_index(ArrayObject *chars, size_t byte_in return char_index; } +Value StringObject::refeq(Env *env, Value arg1, Value arg2, Value value) { + assert_not_frozen(env); + + if (value == nullptr) { + value = arg2; + arg2 = nullptr; + } + + auto chars = this->chars(env); + auto process_begin = [chars, env](nat_int_t begin) -> nat_int_t { + nat_int_t start = begin; + if (begin < 0) + start += chars->size(); + + if (start < 0 || start > (nat_int_t)chars->size()) + env->raise("IndexError", "index {} out of string", begin); + + return start; + }; + + auto get_end_by_length = [env](nat_int_t begin, Value length_argument) -> nat_int_t { + if (length_argument) { + auto length = IntegerObject::convert_to_nat_int_t(env, length_argument); + if (length < 0) + env->raise("IndexError", "negative length {}", length); + return begin + length; + } else { + return begin + 1; + } + }; + + nat_int_t begin; + nat_int_t end = -1; + nat_int_t expand_length = 0; + if (arg1.is_fast_integer()) { + begin = process_begin(arg1.get_fast_integer()); + end = get_end_by_length(begin, arg2); + } else if (arg1->is_range()) { + assert(arg2 == nullptr); + auto range = arg1->as_range(); + begin = IntegerObject::convert_to_nat_int_t(env, range->begin()); + + // raises a RangeError if Range begin is greater than String size + if (::abs(begin) >= (nat_int_t)chars->size()) + env->raise("RangeError", "{} out of range", arg1->inspect_str(env)); + + // process the begin later to eventually raise the error above + begin = process_begin(begin); + + end = IntegerObject::convert_to_nat_int_t(env, range->end()); + + // treats a negative out-of-range Range end as a zero count + if (end < 0 && -end >= (nat_int_t)chars->size()) { + end = begin; + } else { + if (end < 0) + end += chars->size(); + + if (!range->exclude_end()) + ++end; + } + } else if (arg1->is_regexp()) { + auto regexp = arg1->as_regexp(); + auto match_result_value = regexp->match(env, this); + if (match_result_value->is_nil()) + env->raise("IndexError", "regexp not matched"); + auto match_result = match_result_value->as_match_data(); + + nat_int_t match_index_argument = 0; + if (arg2) + match_index_argument = IntegerObject::convert_to_nat_int_t(env, arg2); + + if (::abs(match_index_argument) >= (nat_int_t)match_result->size()) + env->raise("IndexError", "index {} out of regexp", match_index_argument); + + nat_int_t match_index = match_index_argument; + if (match_index_argument < 0) + match_index += match_result->size(); + + auto offset = match_result->offset(env, Value::integer(match_index))->as_array(); + if (offset->at(0)->is_nil()) + env->raise("IndexError", "regexp group {} not matched", match_index); + + begin = IntegerObject::convert_to_nat_int_t(env, offset->at(0)); + end = IntegerObject::convert_to_nat_int_t(env, offset->at(1)); + } else if (arg1->is_string()) { + assert(arg2 == nullptr); + auto query = arg1->as_string()->string(); + begin = m_string.find(query); + if (begin == -1) + env->raise("IndexError", "string not matched"); + begin = byte_index_to_char_index(chars, (size_t)begin); + end = begin + arg1->as_string()->chars(env)->size(); + } else { + begin = process_begin(IntegerObject::convert_to_nat_int_t(env, arg1)); + end = get_end_by_length(begin, arg2); + } + + nat_int_t chars_to_be_removed = end - begin; + if (end > (nat_int_t)chars->size()) + chars_to_be_removed = chars->size(); + + auto string = value->to_str(env); + auto arg_chars = string->chars(env); + size_t new_length = arg_chars->size() + (chars->size() - chars_to_be_removed); + + StringObject result; + for (size_t i = 0; i < new_length; ++i) { + if (i < (size_t)begin) + result.append(env, (*chars)[i]); + else if (i - begin < arg_chars->size()) + result.append(env, (*arg_chars)[i - begin]); + else + result.append(env, (*chars)[i - arg_chars->size() + (end - begin)]); + } + m_string = result.string(); + + return value; +} + Value StringObject::sub(Env *env, Value find, Value replacement_value, Block *block) { if (!block && !replacement_value) env->raise("ArgumentError", "wrong number of arguments (given 1, expected 2)"); diff --git a/test/natalie/string_test.rb b/test/natalie/string_test.rb index 4e557bcb91..ec692b669d 100644 --- a/test/natalie/string_test.rb +++ b/test/natalie/string_test.rb @@ -214,6 +214,12 @@ end end + describe '#[]=' do + fit 'returns passed string' do + ("abc"[1] = "x").should == "x" + end + end + describe '#succ' do context 'given a single character' do it 'returns the next character' do