oracle · nirvdrum · Jun 17, 2023 · Jun 20, 2023 · Jun 21, 2023 · Aug 8, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,7 +6,7 @@ New features:
 Bug fixes:
 
 * Fix `Dir.glob` returning blank string entry with leading `**/` in glob and `base:` argument (@rwstauner).
-* Fix class lookup after an object's class has been replaced by `IO#reopen` (@itarato, @eregon).
+* Fix class lookup after an object's class has been replaced by `IO#reopen` (@itarato, @nirvdrum, @eregon).
 * Fix `Marshal.load` and raise `ArgumentError` when dump is broken and is too short (#3108, @andrykonchin).
 * Fix `super` method lookup for unbounded attached methods (#3131,  @itarato).
 * Fix `Module#define_method(name, Method)` to respect `module_function` visibility (#3181, @andrykonchin).
@@ -41,6 +41,7 @@ Compatibility:
 Performance:
 
 * Improve `Truffle::FeatureLoader.loaded_feature_path` by removing expensive string ops from a loop. Speeds up feature lookup time (#3010, @itarato).
+* Improve `String#-@` performance by reducing unnecessary data copying and supporting substring lookups (@nirvdrum)
 
 Changes:
 

diff --git a/doc/user/truffleruby-additions.md b/doc/user/truffleruby-additions.md
@@ -60,7 +60,7 @@ TruffleRuby provides these non-standard methods and classes that provide additio
 
 ### Concurrent Maps
 
-`TruffleRuby::ConcurrentMap` is a key-value data structure, like a `Hash` and using `#hash` and `#eql?` to compare keys and identity to compare values. Unlike `Hash` it is unordered. All methods on `TruffleRuby::ConcurrentMap` are thread-safe but should have higher concurrency than a fully syncronized implementation. It is intended to be used by gems such as [`concurrent-ruby`](https://github.com/ruby-concurrency/concurrent-ruby) - please use via this gem rather than using directly.
+`TruffleRuby::ConcurrentMap` is a key-value data structure, like a `Hash` and using `#hash` and `#eql?` to compare keys and identity to compare values. Unlike `Hash` it is unordered. All methods on `TruffleRuby::ConcurrentMap` are thread-safe but should have higher concurrency than a fully synchronized implementation. It is intended to be used by gems such as [`concurrent-ruby`](https://github.com/ruby-concurrency/concurrent-ruby) - please use via this gem rather than using directly.
 
 * `map = TruffleRuby::ConcurrentMap.new([initial_capacity: ...], [load_factor: ...])`
 

diff --git a/spec/ruby/core/io/new_spec.rb b/spec/ruby/core/io/new_spec.rb
@@ -1,7 +1,7 @@
 require_relative '../../spec_helper'
 require_relative 'shared/new'
 
-# NOTE: should be syncronized with library/stringio/initialize_spec.rb
+# NOTE: should be synchronized with library/stringio/initialize_spec.rb
 
 describe "IO.new" do
   it_behaves_like :io_new, :new

diff --git a/spec/ruby/core/io/shared/new.rb b/spec/ruby/core/io/shared/new.rb
@@ -1,6 +1,6 @@
 require_relative '../fixtures/classes'
 
-# NOTE: should be syncronized with library/stringio/initialize_spec.rb
+# NOTE: should be synchronized with library/stringio/initialize_spec.rb
 
 # This group of specs may ONLY contain specs that do successfully create
 # an IO instance from the file descriptor returned by #new_fd helper.

diff --git a/src/main/java/org/truffleruby/RubyLanguage.java b/src/main/java/org/truffleruby/RubyLanguage.java
@@ -33,6 +33,7 @@
 import com.oracle.truffle.api.source.Source;
 import com.oracle.truffle.api.source.SourceSection;
 import com.oracle.truffle.api.strings.AbstractTruffleString;
+import com.oracle.truffle.api.strings.InternalByteArray;
 import com.oracle.truffle.api.strings.TruffleString;
 import org.graalvm.options.OptionDescriptors;
 import org.truffleruby.annotations.SuppressFBWarnings;
@@ -788,6 +789,11 @@ public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEnc
         return frozenStringLiterals.getFrozenStringLiteral(tstring, encoding);
     }
 
+    public ImmutableRubyString getFrozenStringLiteral(InternalByteArray byteArray, boolean isImmutable,
+            RubyEncoding encoding) {
+        return frozenStringLiterals.getFrozenStringLiteral(byteArray, isImmutable, encoding);
+    }
+
     public long getNextObjectID() {
         final long id = nextObjectID.getAndAdd(ObjectSpaceManager.OBJECT_ID_INCREMENT_BY);
 

diff --git a/src/main/java/org/truffleruby/core/encoding/TStringUtils.java b/src/main/java/org/truffleruby/core/encoding/TStringUtils.java
@@ -44,9 +44,13 @@ public static TruffleString.Encoding jcodingToTEncoding(Encoding jcoding) {
     }
 
     public static TruffleString fromByteArray(byte[] bytes, TruffleString.Encoding tencoding) {
+        return fromByteArray(bytes, 0, bytes.length, tencoding);
+    }
+
+    public static TruffleString fromByteArray(byte[] bytes, int offset, int length, TruffleString.Encoding tencoding) {
         CompilerAsserts.neverPartOfCompilation(
                 "Use createString(TruffleString.FromByteArrayNode, byte[], RubyEncoding) instead");
-        return TruffleString.fromByteArrayUncached(bytes, 0, bytes.length, tencoding, false);
+        return TruffleString.fromByteArrayUncached(bytes, offset, length, tencoding, false);
     }
 
     public static TruffleString fromByteArray(byte[] bytes, RubyEncoding rubyEncoding) {
@@ -75,8 +79,7 @@ public static TruffleString fromJavaString(String javaString, RubyEncoding encod
     public static byte[] getBytesOrCopy(AbstractTruffleString tstring, RubyEncoding encoding) {
         CompilerAsserts.neverPartOfCompilation("uncached");
         var bytes = tstring.getInternalByteArrayUncached(encoding.tencoding);
-        if (tstring instanceof TruffleString && bytes.getOffset() == 0 &&
-                bytes.getLength() == bytes.getArray().length) {
+        if (tstring.isImmutable() && bytes.getOffset() == 0 && bytes.getLength() == bytes.getArray().length) {
             return bytes.getArray();
         } else {
             return ArrayUtils.extractRange(bytes.getArray(), bytes.getOffset(), bytes.getEnd());
@@ -88,8 +91,8 @@ public static byte[] getBytesOrCopy(Node node, AbstractTruffleString tstring, Tr
             TruffleString.GetInternalByteArrayNode getInternalByteArrayNode,
             InlinedConditionProfile noCopyProfile) {
         var bytes = getInternalByteArrayNode.execute(tstring, encoding);
-        if (noCopyProfile.profile(node, tstring instanceof TruffleString && bytes.getOffset() == 0 &&
-                bytes.getLength() == bytes.getArray().length)) {
+        if (noCopyProfile.profile(node,
+                tstring.isImmutable() && bytes.getOffset() == 0 && bytes.getLength() == bytes.getArray().length)) {
             return bytes.getArray();
         } else {
             return ArrayUtils.extractRange(bytes.getArray(), bytes.getOffset(), bytes.getEnd());
@@ -149,4 +152,10 @@ public static String toJavaStringOrThrow(AbstractTruffleString tstring, RubyEnco
             return tstring.toJavaStringUncached();
         }
     }
+
+    public static boolean hasImmutableInternalByteArray(AbstractTruffleString string) {
+        // Immutable strings trivially have immutable byte arrays.
+        // Native strings also have immutable byte arrays because we need to copy the data into Java.
+        return string.isImmutable() || string.isNative();
+    }
 }
diff --git a/src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java b/src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java
@@ -11,6 +11,7 @@
 
 import com.oracle.truffle.api.CompilerDirectives;
 import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
+import com.oracle.truffle.api.strings.InternalByteArray;
 import com.oracle.truffle.api.strings.TruffleString;
 import org.truffleruby.collections.WeakValueCache;
 import org.truffleruby.core.encoding.RubyEncoding;
@@ -37,25 +38,23 @@ public FrozenStringLiterals(TStringCache tStringCache) {
 
     @TruffleBoundary
     public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEncoding encoding) {
-        if (tstring.isNative()) {
-            throw CompilerDirectives.shouldNotReachHere();
-        }
-
-        return getFrozenStringLiteral(TStringUtils.getBytesOrCopy(tstring, encoding), encoding);
+        return getFrozenStringLiteral(tstring.getInternalByteArrayUncached(encoding.tencoding),
+                TStringUtils.hasImmutableInternalByteArray(tstring),
+                encoding);
     }
 
     @TruffleBoundary
-    public ImmutableRubyString getFrozenStringLiteral(byte[] bytes, RubyEncoding encoding) {
+    public ImmutableRubyString getFrozenStringLiteral(InternalByteArray byteArray, boolean isImmutable,
+            RubyEncoding encoding) {
         // Ensure all ImmutableRubyString have a TruffleString from the TStringCache
-        var cachedTString = tstringCache.getTString(bytes, encoding);
+        var cachedTString = tstringCache.getTString(byteArray, isImmutable, encoding);
         var tstringWithEncoding = new TStringWithEncoding(cachedTString, encoding);
 
         final ImmutableRubyString string = values.get(tstringWithEncoding);
         if (string != null) {
             return string;
         } else {
-            return values.addInCacheIfAbsent(tstringWithEncoding,
-                    new ImmutableRubyString(cachedTString, encoding));
+            return values.addInCacheIfAbsent(tstringWithEncoding, new ImmutableRubyString(cachedTString, encoding));
         }
     }
 

diff --git a/src/main/java/org/truffleruby/core/string/StringNodes.java b/src/main/java/org/truffleruby/core/string/StringNodes.java
@@ -4357,10 +4357,11 @@ public abstract static class InternNode extends PrimitiveArrayArgumentsNode {
         @Specialization
         protected ImmutableRubyString internString(RubyString string,
                 @Cached RubyStringLibrary libString,
-                @Cached TruffleString.AsManagedNode asManagedNode) {
+                @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) {
             var encoding = libString.getEncoding(string);
-            TruffleString immutableManagedString = asManagedNode.execute(string.tstring, encoding.tencoding);
-            return getLanguage().getFrozenStringLiteral(immutableManagedString, encoding);
+            var byteArray = getInternalByteArrayNode.execute(string.tstring, encoding.tencoding);
+            return getLanguage().getFrozenStringLiteral(byteArray,
+                    TStringUtils.hasImmutableInternalByteArray(string.tstring), encoding);
         }
     }
 

diff --git a/src/main/java/org/truffleruby/core/string/TBytesKey.java b/src/main/java/org/truffleruby/core/string/TBytesKey.java
@@ -12,19 +12,44 @@
 import java.util.Arrays;
 import java.util.Objects;
 
+import com.oracle.truffle.api.strings.InternalByteArray;
 import com.oracle.truffle.api.strings.TruffleString;
+import org.truffleruby.core.array.ArrayUtils;
 import org.truffleruby.core.encoding.RubyEncoding;
+import org.truffleruby.core.encoding.TStringUtils;
 
 public final class TBytesKey {
 
     private final byte[] bytes;
+    private final int offset;
+    private final int length;
     private RubyEncoding encoding;
     private final int bytesHashCode;
 
-    public TBytesKey(byte[] bytes, RubyEncoding encoding) {
+    public TBytesKey(
+            byte[] bytes,
+            int offset,
+            int length,
+            int bytesHashCode,
+            RubyEncoding encoding) {
         this.bytes = bytes;
+        this.offset = offset;
+        this.length = length;
+        this.bytesHashCode = bytesHashCode;
         this.encoding = encoding;
-        this.bytesHashCode = Arrays.hashCode(bytes);
+    }
+
+    public TBytesKey(byte[] bytes, RubyEncoding encoding) {
+        this(bytes, 0, bytes.length, Arrays.hashCode(bytes), encoding);
+    }
+
+    public TBytesKey(InternalByteArray byteArray, RubyEncoding encoding) {
+        this(
+                byteArray.getArray(),
+                byteArray.getOffset(),
+                byteArray.getLength(),
+                hashCode(byteArray),
+                encoding);
     }
 
     @Override
@@ -37,15 +62,15 @@ public boolean equals(Object o) {
         if (o instanceof TBytesKey) {
             final TBytesKey other = (TBytesKey) o;
             if (encoding == null) {
-                if (Arrays.equals(bytes, other.bytes)) {
+                if (equalBytes(this, other)) {
                     // For getMatchedEncoding()
                     this.encoding = Objects.requireNonNull(other.encoding);
                     return true;
                 } else {
                     return false;
                 }
             } else {
-                return encoding == other.encoding && Arrays.equals(bytes, other.bytes);
+                return encoding == other.encoding && equalBytes(this, other);
             }
         }
 
@@ -62,4 +87,51 @@ public String toString() {
         return TruffleString.fromByteArrayUncached(bytes, encoding, false).toString();
     }
 
+    private static int hashCode(InternalByteArray byteArray) {
+        return hashCode(byteArray.getArray(), byteArray.getOffset(), byteArray.getLength());
+    }
+
+    // A variant of <code>Arrays.hashCode</code> that allows for selecting a range within the array.
+    private static int hashCode(byte[] bytes, int offset, int length) {
+        if (bytes == null) {
+            return 0;
+        }
+
+        int result = 1;
+        for (int i = offset; i < offset + length; i++) {
+            result = 31 * result + bytes[i];
+        }
+
+        return result;
+    }
+
+    private boolean equalBytes(TBytesKey a, TBytesKey b) {
+        if (a.isPerfectFit() && b.isPerfectFit()) {
+            return Arrays.equals(a.bytes, b.bytes);
+        }
+
+        return Arrays.equals(a.bytes, a.offset, a.offset + a.length, b.bytes, b.offset, b.offset + b.length);
+    }
+
+    private boolean isPerfectFit() {
+        return offset == 0 && length == bytes.length;
+    }
+
+    public TBytesKey makeCacheable(boolean isImmutable) {
+        if (isImmutable && isPerfectFit()) {
+            return new TBytesKey(bytes, encoding);
+        }
+
+        var simplified = ArrayUtils.extractRange(this.bytes, this.offset, this.offset + this.length);
+        return new TBytesKey(simplified, encoding);
+    }
+
+    public TBytesKey withNewEncoding(RubyEncoding encoding) {
+        return new TBytesKey(bytes, offset, length, bytesHashCode, encoding);
+    }
+
+    public TruffleString toTruffleString() {
+        return TStringUtils.fromByteArray(bytes, offset, length, encoding.tencoding);
+    }
+
 }
diff --git a/src/main/java/org/truffleruby/core/string/TStringCache.java b/src/main/java/org/truffleruby/core/string/TStringCache.java
@@ -9,6 +9,7 @@
  */
 package org.truffleruby.core.string;
 
+import com.oracle.truffle.api.strings.InternalByteArray;
 import com.oracle.truffle.api.strings.TruffleString;
 import org.truffleruby.collections.WeakValueCache;
 import org.truffleruby.core.encoding.Encodings;
@@ -69,20 +70,38 @@ private void register(TruffleString tstring, RubyEncoding encoding) {
         }
     }
 
-    public TruffleString getTString(TruffleString string, RubyEncoding encoding) {
-        return getTString(TStringUtils.getBytesOrCopy(string, encoding), encoding);
+    @TruffleBoundary
+    public TruffleString getTString(TruffleString string, RubyEncoding rubyEncoding) {
+        assert rubyEncoding != null;
+
+        var byteArray = string.getInternalByteArrayUncached(rubyEncoding.tencoding);
+        final TBytesKey key = new TBytesKey(byteArray, rubyEncoding);
+
+        return getTString(key, TStringUtils.hasImmutableInternalByteArray(string));
+    }
+
+    @TruffleBoundary
+    public TruffleString getTString(InternalByteArray byteArray, boolean isImmutable, RubyEncoding rubyEncoding) {
+        assert rubyEncoding != null;
+
+        return getTString(new TBytesKey(byteArray, rubyEncoding), isImmutable);
     }
 
     @TruffleBoundary
     public TruffleString getTString(byte[] bytes, RubyEncoding rubyEncoding) {
         assert rubyEncoding != null;
 
-        final TBytesKey key = new TBytesKey(bytes, rubyEncoding);
+        return getTString(new TBytesKey(bytes, rubyEncoding), true);
+    }
+
+    @TruffleBoundary
+    private TruffleString getTString(TBytesKey lookupKey, boolean isLookupKeyImmutable) {
+        final TruffleString tstring = bytesToTString.get(lookupKey);
+        var rubyEncoding = lookupKey.getMatchedEncoding();
 
-        final TruffleString tstring = bytesToTString.get(key);
         if (tstring != null) {
             ++tstringsReusedCount;
-            tstringBytesSaved += tstring.byteLength(rubyEncoding.tencoding);
+            tstringBytesSaved += tstring.byteLength(lookupKey.getMatchedEncoding().tencoding);
 
             return tstring;
         }
@@ -92,7 +111,7 @@ public TruffleString getTString(byte[] bytes, RubyEncoding rubyEncoding) {
         // reference equality optimizations. So, do another search but with a marker encoding. The only guarantee
         // we can make about the resulting TruffleString is that it would have the same logical byte[], but that's good enough
         // for our purposes.
-        TBytesKey keyNoEncoding = new TBytesKey(bytes, null);
+        TBytesKey keyNoEncoding = lookupKey.withNewEncoding(null);
         final TruffleString tstringWithSameBytesButDifferentEncoding = bytesToTString.get(keyNoEncoding);
 
         final TruffleString newTString;
@@ -104,12 +123,11 @@ public TruffleString getTString(byte[] bytes, RubyEncoding rubyEncoding) {
             ++byteArrayReusedCount;
             tstringBytesSaved += newTString.byteLength(rubyEncoding.tencoding);
         } else {
-            newTString = TStringUtils.fromByteArray(bytes, rubyEncoding);
+            newTString = lookupKey.toTruffleString();
         }
 
         // Use the new TruffleString bytes in the cache, so we do not keep bytes alive unnecessarily.
-        final TBytesKey newKey = new TBytesKey(TStringUtils.getBytesOrCopy(newTString, rubyEncoding), rubyEncoding);
-        return bytesToTString.addInCacheIfAbsent(newKey, newTString);
+        return bytesToTString.addInCacheIfAbsent(lookupKey.makeCacheable(isLookupKeyImmutable), newTString);
     }
 
     public boolean contains(TruffleString string, RubyEncoding encoding) {