From c28bec4d3d63849c9e60dee1e7174b9a180a7e55 Mon Sep 17 00:00:00 2001 From: Andrew J Said Date: Mon, 11 Dec 2023 08:56:10 +0000 Subject: [PATCH] Add an additional length check to default FrozenDictionary and FrozenSet (#92546) * Add an additional length check to FrozenDictionary and FrozenSet On construction of the collection, we compute an unsigned long which is effectively 64 boolean flags, each representing the presence of a key string of a particular length (mod 64). When reading from the collection, we can exit early if the key being tested does not map to a bit which has been switched on by the original computation. I believe this has similarities to how Bloom Filters work. This adds a relatively small cost on creation of the collection as small cost to each read operation. However it can speed up reads with certain data patterns especially when the difference between the maximum and minimum key length is large but there aren't many different lengths. --- .../Collections/Frozen/FrozenDictionary.cs | 8 ++++--- .../System/Collections/Frozen/FrozenSet.cs | 8 ++++--- .../String/OrdinalStringFrozenDictionary.cs | 22 +++++++++++-------- .../OrdinalStringFrozenDictionary_Full.cs | 7 +++++- ...ingFrozenDictionary_FullCaseInsensitive.cs | 7 +++++- ...ozenDictionary_FullCaseInsensitiveAscii.cs | 7 +++++- .../Frozen/String/OrdinalStringFrozenSet.cs | 22 +++++++++++-------- .../String/OrdinalStringFrozenSet_Full.cs | 7 +++++- ...inalStringFrozenSet_FullCaseInsensitive.cs | 7 +++++- ...tringFrozenSet_FullCaseInsensitiveAscii.cs | 7 +++++- 10 files changed, 72 insertions(+), 30 deletions(-) diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs index e4fbdcef00b3c..dc06ca0cd9287 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenDictionary.cs @@ -161,10 +161,12 @@ private static FrozenDictionary CreateFromDictionary // Calculate the minimum and maximum lengths of the strings in the dictionary. Several of the analyses need this. int minLength = int.MaxValue, maxLength = 0; + ulong lengthFilter = 0; foreach (string key in keys) { if (key.Length < minLength) minLength = key.Length; if (key.Length > maxLength) maxLength = key.Length; + lengthFilter |= (1UL << (key.Length % 64)); } Debug.Assert(minLength >= 0 && maxLength >= minLength); @@ -215,12 +217,12 @@ private static FrozenDictionary CreateFromDictionary if (analysis.IgnoreCase) { frozenDictionary = analysis.AllAsciiIfIgnoreCase - ? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff) - : new OrdinalStringFrozenDictionary_FullCaseInsensitive(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff); + ? new OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter) + : new OrdinalStringFrozenDictionary_FullCaseInsensitive(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter); } else { - frozenDictionary = new OrdinalStringFrozenDictionary_Full(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff); + frozenDictionary = new OrdinalStringFrozenDictionary_Full(keys, values, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter); } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenSet.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenSet.cs index 8c315f214fe03..50bbdeb18aad6 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenSet.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenSet.cs @@ -109,10 +109,12 @@ private static FrozenSet CreateFromSet(HashSet source) // Calculate the minimum and maximum lengths of the strings in the set. Several of the analyses need this. int minLength = int.MaxValue, maxLength = 0; + ulong lengthFilter = 0; foreach (string s in entries) { if (s.Length < minLength) minLength = s.Length; if (s.Length > maxLength) maxLength = s.Length; + lengthFilter |= (1UL << (s.Length % 64)); } Debug.Assert(minLength >= 0 && maxLength >= minLength); @@ -163,12 +165,12 @@ private static FrozenSet CreateFromSet(HashSet source) if (analysis.IgnoreCase) { frozenSet = analysis.AllAsciiIfIgnoreCase - ? new OrdinalStringFrozenSet_FullCaseInsensitiveAscii(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff) - : new OrdinalStringFrozenSet_FullCaseInsensitive(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff); + ? new OrdinalStringFrozenSet_FullCaseInsensitiveAscii(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter) + : new OrdinalStringFrozenSet_FullCaseInsensitive(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter); } else { - frozenSet = new OrdinalStringFrozenSet_Full(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff); + frozenSet = new OrdinalStringFrozenSet_Full(entries, stringComparer, analysis.MinimumLength, analysis.MaximumLengthDiff, lengthFilter); } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs index e510954f7c333..acc20a1b66ca7 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs @@ -64,6 +64,7 @@ internal OrdinalStringFrozenDictionary( private protected int HashCount { get; } private protected abstract bool Equals(string? x, string? y); private protected abstract int GetHashCode(string s); + private protected virtual bool CheckLengthQuick(string key) => true; private protected override string[] KeysCore => _keys; private protected override TValue[] ValuesCore => _values; private protected override Enumerator GetEnumeratorCore() => new Enumerator(_keys, _values); @@ -74,20 +75,23 @@ private protected override ref readonly TValue GetValueRefOrNullRefCore(string k { if ((uint)(key.Length - _minimumLength) <= (uint)_maximumLengthDiff) { - int hashCode = GetHashCode(key); - _hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex); - - while (index <= endIndex) + if (CheckLengthQuick(key)) { - if (hashCode == _hashTable.HashCodes[index]) + int hashCode = GetHashCode(key); + _hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex); + + while (index <= endIndex) { - if (Equals(key, _keys[index])) + if (hashCode == _hashTable.HashCodes[index]) { - return ref _values[index]; + if (Equals(key, _keys[index])) + { + return ref _values[index]; + } } - } - index++; + index++; + } } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_Full.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_Full.cs index 56ce7ff720bd5..03da234352f32 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_Full.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_Full.cs @@ -7,14 +7,18 @@ namespace System.Collections.Frozen { internal sealed class OrdinalStringFrozenDictionary_Full : OrdinalStringFrozenDictionary { + private readonly ulong _lengthFilter; + internal OrdinalStringFrozenDictionary_Full( string[] keys, TValue[] values, IEqualityComparer comparer, int minimumLength, - int maximumLengthDiff) + int maximumLengthDiff, + ulong lengthFilter) : base(keys, values, comparer, minimumLength, maximumLengthDiff) { + _lengthFilter = lengthFilter; } // This override is necessary to force the jit to emit the code in such a way that it @@ -24,5 +28,6 @@ internal OrdinalStringFrozenDictionary_Full( private protected override bool Equals(string? x, string? y) => string.Equals(x, y); private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(s.AsSpan()); + private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0; } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitive.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitive.cs index 3f09ba59dc7ee..9280d82f05d77 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitive.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitive.cs @@ -7,14 +7,18 @@ namespace System.Collections.Frozen { internal sealed class OrdinalStringFrozenDictionary_FullCaseInsensitive : OrdinalStringFrozenDictionary { + private readonly ulong _lengthFilter; + internal OrdinalStringFrozenDictionary_FullCaseInsensitive( string[] keys, TValue[] values, IEqualityComparer comparer, int minimumLength, - int maximumLengthDiff) + int maximumLengthDiff, + ulong lengthFilter) : base(keys, values, comparer, minimumLength, maximumLengthDiff) { + _lengthFilter = lengthFilter; } // This override is necessary to force the jit to emit the code in such a way that it @@ -24,5 +28,6 @@ internal OrdinalStringFrozenDictionary_FullCaseInsensitive( private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y); private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(s.AsSpan()); + private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0; } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii.cs index b029567243ced..f32a7c64fdd8e 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii.cs @@ -7,14 +7,18 @@ namespace System.Collections.Frozen { internal sealed class OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii : OrdinalStringFrozenDictionary { + private readonly ulong _lengthFilter; + internal OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii( string[] keys, TValue[] values, IEqualityComparer comparer, int minimumLength, - int maximumLengthDiff) + int maximumLengthDiff, + ulong lengthFilter) : base(keys, values, comparer, minimumLength, maximumLengthDiff) { + _lengthFilter = lengthFilter; } // This override is necessary to force the jit to emit the code in such a way that it @@ -24,5 +28,6 @@ internal OrdinalStringFrozenDictionary_FullCaseInsensitiveAscii( private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y); private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCaseAscii(s.AsSpan()); + private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0; } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs index 62ce56ee3472e..278d1ee231b8c 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs @@ -54,6 +54,7 @@ internal OrdinalStringFrozenSet( private protected int HashCount { get; } private protected abstract bool Equals(string? x, string? y); private protected abstract int GetHashCode(string s); + private protected virtual bool CheckLengthQuick(string key) => true; private protected override string[] ItemsCore => _items; private protected override Enumerator GetEnumeratorCore() => new Enumerator(_items); private protected override int CountCore => _hashTable.Count; @@ -64,20 +65,23 @@ private protected override int FindItemIndex(string item) if (item is not null && // this implementation won't be used for null values (uint)(item.Length - _minimumLength) <= (uint)_maximumLengthDiff) { - int hashCode = GetHashCode(item); - _hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex); - - while (index <= endIndex) + if (CheckLengthQuick(item)) { - if (hashCode == _hashTable.HashCodes[index]) + int hashCode = GetHashCode(item); + _hashTable.FindMatchingEntries(hashCode, out int index, out int endIndex); + + while (index <= endIndex) { - if (Equals(item, _items[index])) + if (hashCode == _hashTable.HashCodes[index]) { - return index; + if (Equals(item, _items[index])) + { + return index; + } } - } - index++; + index++; + } } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_Full.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_Full.cs index 9c10bb2dd9b74..098b0ca0d35f8 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_Full.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_Full.cs @@ -7,13 +7,17 @@ namespace System.Collections.Frozen { internal sealed class OrdinalStringFrozenSet_Full : OrdinalStringFrozenSet { + private readonly ulong _lengthFilter; + internal OrdinalStringFrozenSet_Full( string[] entries, IEqualityComparer comparer, int minimumLength, - int maximumLengthDiff) + int maximumLengthDiff, + ulong lengthFilter) : base(entries, comparer, minimumLength, maximumLengthDiff) { + _lengthFilter = lengthFilter; } // This override is necessary to force the jit to emit the code in such a way that it @@ -23,5 +27,6 @@ internal OrdinalStringFrozenSet_Full( private protected override bool Equals(string? x, string? y) => string.Equals(x, y); private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinal(s.AsSpan()); + private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0; } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitive.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitive.cs index 345121f202159..6c9e7b0645c54 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitive.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitive.cs @@ -7,13 +7,17 @@ namespace System.Collections.Frozen { internal sealed class OrdinalStringFrozenSet_FullCaseInsensitive : OrdinalStringFrozenSet { + private readonly ulong _lengthFilter; + internal OrdinalStringFrozenSet_FullCaseInsensitive( string[] entries, IEqualityComparer comparer, int minimumLength, - int maximumLengthDiff) + int maximumLengthDiff, + ulong lengthFilter) : base(entries, comparer, minimumLength, maximumLengthDiff) { + _lengthFilter = lengthFilter; } // This override is necessary to force the jit to emit the code in such a way that it @@ -23,5 +27,6 @@ internal OrdinalStringFrozenSet_FullCaseInsensitive( private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y); private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCase(s.AsSpan()); + private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0; } } diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitiveAscii.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitiveAscii.cs index b032da0e7778c..462e4a7eea75b 100644 --- a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitiveAscii.cs +++ b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet_FullCaseInsensitiveAscii.cs @@ -7,13 +7,17 @@ namespace System.Collections.Frozen { internal sealed class OrdinalStringFrozenSet_FullCaseInsensitiveAscii : OrdinalStringFrozenSet { + private readonly ulong _lengthFilter; + internal OrdinalStringFrozenSet_FullCaseInsensitiveAscii( string[] entries, IEqualityComparer comparer, int minimumLength, - int maximumLengthDiff) + int maximumLengthDiff, + ulong lengthFilter) : base(entries, comparer, minimumLength, maximumLengthDiff) { + _lengthFilter = lengthFilter; } // This override is necessary to force the jit to emit the code in such a way that it @@ -23,5 +27,6 @@ internal OrdinalStringFrozenSet_FullCaseInsensitiveAscii( private protected override bool Equals(string? x, string? y) => StringComparer.OrdinalIgnoreCase.Equals(x, y); private protected override int GetHashCode(string s) => Hashing.GetHashCodeOrdinalIgnoreCaseAscii(s.AsSpan()); + private protected override bool CheckLengthQuick(string key) => (_lengthFilter & (1UL << (key.Length % 64))) > 0; } }