Skip to content

Commit dd17b6d

Browse files
authored
Optimize ImmutableHashSet<T>.SetEquals to avoid unnecessary allocations (#126309)
Fixes #90986, Part of #127279 ### Summary `ImmutableHashSet<T>.SetEquals` always creates a new intermediate `HashSet<T>` for the `other` collection, leading to avoidable allocations and GC pressure, especially for large datasets ### Optimization Logic * **O(1) Pre-Scan**: Immediately returns `false` if `other` is an `ICollection` with a smaller `Count`, avoiding any overhead. * **Fast-Path Pattern Matching**: Detects `ImmutableHashSet<T>` and `HashSet<T>` to bypass intermediate allocations. * **Comparer Guard**: Validates `EqualityComparer` compatibility before triggering fast paths to ensure logical consistency. * **Short-Circuit Validation**: Re-validates `Count` within specialized paths for an immediate exit before $O(n)$ enumeration. * **Reverse-Lookup Strategy**: An architectural shift where the ImmutableHashSet (The Source) iterates and queries the other collection if was Hashset. This leverages the O(1) lookup of the HashSet<T> instead of the O(log N) lookup of the immutable tree. * **Zero-Allocation Execution**: Direct iteration over compatible collections, eliminating the costly `new HashSet<T>(other)` fallback. * **Deferred fallback**: Reserves the expensive allocation solely for general `IEnumerable` types. <details> <summary><b>Click to expand Benchmark Source Code</b></summary> ```csharp using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Order; using BenchmarkDotNet.Running; using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; namespace ImmutableHashSetBenchmarks { [MemoryDiagnoser] [Orderer(SummaryOrderPolicy.FastestToSlowest)] [RankColumn] public class ImmutableHashSetSetEqualsBenchmark_Int { private ImmutableHashSet<int> _sourceSet = null!; private ImmutableHashSet<int> _immutableHashSetEqual = null!; private HashSet<int> _bclHashSetEqual = null!; private List<int> _listEqual = null!; private IEnumerable<int> _linqSelectEqual = null!; private int[] _arrayEqual = null!; private List<int> _listLastDiff = null!; private List<int> _listSmaller = null!; private ImmutableHashSet<int> _immutableLarger = null!; private int[] _smallerArray = null!; private HashSet<int> _smallerHashSetDiffComparer = null!; // Worst case: same count, last element different private ImmutableHashSet<int> _immutableHashSetLastDiff = null!; private HashSet<int> _bclHashSetLastDiff = null!; private List<int> _listWithDuplicates = null!; private List<int> _listWithDuplicatesMatch = null!; // Different comparers (fallback path) private HashSet<int> _bclHashSetDiffComparer = null!; // Count mismatch early exit private ImmutableHashSet<int> _immutableHashSetSmaller = null!; private HashSet<int> _bclHashSetSmaller = null!; // Lazy enumerable for worst case private IEnumerable<int> _lazyEnumerableLastDiff = null!; [Params(100000)] public int Size { get; set; } [GlobalSetup] public void Setup() { var elements = Enumerable.Range(0, Size).ToList(); var elementsWithLastDiff = Enumerable.Range(0, Size - 1).Concat(new[] { Size + 1000 }).ToList(); var smallerElements = Enumerable.Range(0, Size / 2).ToList(); var duplicates = Enumerable.Repeat(1, Size).ToList(); var smallerList = new List<int>(); for(int i = 0; i < Size - 1; i++) smallerList.Add(i); _sourceSet = ImmutableHashSet.CreateRange(elements); _immutableHashSetEqual = ImmutableHashSet.CreateRange(elements); _bclHashSetEqual = new HashSet<int>(elements); _listEqual = elements; _linqSelectEqual = elements.Select(x => x); // Lazy LINQ enumerable _arrayEqual = elements.ToArray(); _immutableHashSetLastDiff = ImmutableHashSet.CreateRange(elementsWithLastDiff); _bclHashSetLastDiff = new HashSet<int>(elementsWithLastDiff); _listLastDiff = elementsWithLastDiff; _bclHashSetDiffComparer = new HashSet<int>(elements, new ReverseComparer<int>()); _immutableHashSetSmaller = ImmutableHashSet.CreateRange(smallerElements); _bclHashSetSmaller = new HashSet<int>(smallerElements); _lazyEnumerableLastDiff = elementsWithLastDiff.Select(x => x); _immutableLarger = ImmutableHashSet.CreateRange(elements.Concat(new[] { -1 })); _listWithDuplicates = duplicates; _listWithDuplicatesMatch = elements.Concat(elements).ToList(); // Matches source but with duplicates _listSmaller = smallerList; _smallerArray = Enumerable.Range(0, Size - 1).ToArray(); _smallerHashSetDiffComparer = new HashSet<int>(_listSmaller, new ReverseComparer<int>()); } #region Fast Path: Same Type and Comparer (Optimized) [Benchmark(Description = "ImmutableHashSet (Match - Same Comparer)")] public bool Case_ImmutableHashSet_Match() => _sourceSet.SetEquals(_immutableHashSetEqual); [Benchmark(Description = "BCL HashSet (Match - Same Comparer)")] public bool Case_BclHashSet_Match() => _sourceSet.SetEquals(_bclHashSetEqual); [Benchmark(Description = "ImmutableHashSet (Mismatch - Same Count)")] public bool Case_ImmutableHashSet_LastDiff() => _sourceSet.SetEquals(_immutableHashSetLastDiff); [Benchmark(Description = "Case 04: BCL HashSet (Mismatch - Same Count)")] public bool Case_BclHashSet_LastDiff() => _sourceSet.SetEquals(_bclHashSetLastDiff); #endregion #region Early Exit: Count Mismatch [Benchmark(Description = "ImmutableHashSet (Smaller Count)")] public bool Case_ImmutableHashSet_SmallerCount() => _sourceSet.SetEquals(_immutableHashSetSmaller); [Benchmark(Description = "BCL HashSet (Smaller Count)")] public bool Case_BclHashSet_SmallerCount() => _sourceSet.SetEquals(_bclHashSetSmaller); [Benchmark(Description = "Array (Smaller Count)")] public bool Case_SmallerCollection_EarlyExit() { return _sourceSet.SetEquals(_smallerArray); } #endregion #region Fallback Path: Different Comparer [Benchmark(Description = "HashSet (Different Comparer)")] public bool Case_HashSet_DifferentComparer() => _sourceSet.SetEquals(_bclHashSetDiffComparer); [Benchmark(Description = "HashSet (Smaller Count - Different Comparer)")] public bool Case_HashSet_SmallerCount_DiffComparer() => _sourceSet.SetEquals(_smallerHashSetDiffComparer); #endregion #region Fallback Path: Non-Set Collections (IEnumerable/ICollection) [Benchmark(Description = "List (Match - Fallback)")] public bool Case_List_Match() => _sourceSet.SetEquals(_listEqual); [Benchmark(Description = "LINQ (Mismatch - Lazy IEnumerable)")] public bool Case_LazyEnumerable_LastDiff() => _sourceSet.SetEquals(_lazyEnumerableLastDiff); [Benchmark(Description = "LINQ (Match - Lazy IEnumerable)")] public bool Case_LazyEnumerable_Match() => _sourceSet.SetEquals(_linqSelectEqual); [Benchmark(Description = "List (Last Diff - Fallback)")] public bool Case_List_LastDiff() => _sourceSet.SetEquals(_listLastDiff); [Benchmark(Description = "Array (Match - Fallback)")] public bool Case_Array_Match() => _sourceSet.SetEquals(_arrayEqual); [Benchmark(Description = "ImmutableHashSet (Larger Count)")] public bool Case_LargerCount() => _sourceSet.SetEquals(_immutableLarger); #endregion #region Handling Duplicates (Fallback Path) [Benchmark(Description = "List with Duplicates (Mismatch)")] public bool Case_List_Duplicates_Mismatch() => _sourceSet.SetEquals(_listWithDuplicates); [Benchmark(Description = "List with Duplicates (Match)")] public bool Case_List_Duplicates_Match() => _sourceSet.SetEquals(_listWithDuplicatesMatch); #endregion } public class ReverseComparer<T> : IEqualityComparer<T> where T : IComparable<T> { public bool Equals(T? x, T? y) { if (x is null && y is null) return true; if (x is null || y is null) return false; return x.CompareTo(y) == 0; } public int GetHashCode(T? obj) { return obj?.GetHashCode() ?? 0; } } public class Program { public static void Main(string[] args) { BenchmarkRunner.Run<ImmutableHashSetSetEqualsBenchmark_Int>(); } } } ``` </details> <details> <summary><b>Click to expand Benchmark Results</b></summary> ### Benchmark Results (Before Optimization) | Method | Size | Mean | Error | StdDev | Rank | Gen0 | Gen1 | Gen2 | Allocated | |:--- |:---:|---:|---:|---:|---:|---:|---:|---:|---:| | 'BCL HashSet (Smaller Count)' | 100000 | 313.8 us | 6.01 us | 6.43 us | 1 | 15.6250 | 15.6250 | 15.6250 | 818.33 KB | | 'Array (Smaller Count)' | 100000 | 647.9 us | 11.20 us | 11.50 us | 2 | 26.3672 | 26.3672 | 26.3672 | 1697.7 KB | | 'List with Duplicates (Mismatch)' | 100000 | 954.1 us | 18.77 us | 41.60 us | 3 | 31.2500 | 31.2500 | 31.2500 | 1697.77 KB | | ' HashSet (Smaller Count - Different Comparer)' | 100000 | 1,449.3 us | 28.65 us | 74.46 us | 4 | 41.0156 | 41.0156 | 41.0156 | 1697.8 KB | | ' ImmutableHashSet (Smaller Count)' | 100000 | 4,733.2 us | 74.18 us | 69.39 us | 5 | 23.4375 | 23.4375 | 23.4375 | 818.58 KB | | ' BCL HashSet (Match - Same Comparer)' | 100000 | 7,084.0 us | 65.02 us | 57.64 us | 6 | 54.6875 | 54.6875 | 54.6875 | 1697.9 KB | | 'Array (Match - Fallback)' | 100000 | 7,821.7 us | 30.71 us | 27.23 us | 7 | 46.8750 | 46.8750 | 46.8750 | 1697.86 KB | | 'List (Match - Fallback)' | 100000 | 8,428.4 us | 30.82 us | 28.83 us | 8 | 46.8750 | 46.8750 | 46.8750 | 1697.9 KB | | 'BCL HashSet (Mismatch - Same Count)' | 100000 | 8,636.3 us | 52.37 us | 46.42 us | 8 | 46.8750 | 46.8750 | 46.8750 | 1697.86 KB | | 'List (Last Diff - Fallback)' | 100000 | 9,172.5 us | 35.85 us | 33.54 us | 9 | 46.8750 | 46.8750 | 46.8750 | 1697.9 KB | | 'List with Duplicates (Match)' | 100000 | 9,310.2 us | 128.11 us | 119.83 us | 9 | 109.3750 | 109.3750 | 109.3750 | 3521.42 KB | | ' ImmutableHashSet (Larger Count)' | 100000 | 9,477.3 us | 141.55 us | 125.48 us | 9 | 46.8750 | 46.8750 | 46.8750 | 1697.89 KB | | ' HashSet (Different Comparer)' | 100000 | 9,839.2 us | 99.14 us | 87.88 us | 9 | 46.8750 | 46.8750 | 46.8750 | 1697.79 KB | | 'LINQ (Mismatch - Lazy IEnumerable)' | 100000 | 11,274.4 us | 63.77 us | 56.53 us | 10 | 296.8750 | 156.2500 | 156.2500 | 4717.23 KB | | 'LINQ (Match - Lazy IEnumerable)' | 100000 | 11,341.5 us | 69.37 us | 61.49 us | 10 | 296.8750 | 156.2500 | 156.2500 | 4717.23 KB | | 'ImmutableHashSet (Mismatch - Same Count)' | 100000 | 17,015.5 us | 170.03 us | 150.73 us | 11 | 31.2500 | 31.2500 | 31.2500 | 1697.88 KB | | 'ImmutableHashSet (Match - Same Comparer)' | 100000 | 17,410.2 us | 334.48 us | 312.87 us | 11 | 31.2500 | 31.2500 | 31.2500 | 1697.87 KB | --- ### Benchmark Results (After Optimization) | Method | Size | Mean | Error | StdDev | Rank | Gen0 | Gen1 | Gen2 | Allocated | |:--- |:--- |---:|---:|---:|:---:|---:|---:|---:|---:| | 'ImmutableHashSet (Smaller Count)' | 100000 | 2.300 ns | 0.0478 ns | 0.0447 ns | 1 | - | - | - | - | | 'ImmutableHashSet (Larger Count)' | 100000 | 2.328 ns | 0.0650 ns | 0.0576 ns | 1 | - | - | - | - | | 'BCL HashSet (Smaller Count)' | 100000 | 2.595 ns | 0.0524 ns | 0.0491 ns | 2 | - | - | - | - | | 'HashSet (Smaller Count - Different Comparer)' | 100000 | 2.644 ns | 0.0464 ns | 0.0411 ns | 2 | - | - | - | - | | 'Array (Smaller Count)' | 100000 | 2.711 ns | 0.0568 ns | 0.0504 ns | 2 | - | - | - | - | | 'List with Duplicates (Mismatch)' | 100000 | 794,876.698 ns | 15,781.0452 ns | 35,941.4284 ns | 3 | 31.2500 | 31.2500 | 31.2500 | 1738498 B | | 'List (Last Diff - Fallback)' | 100000 | 4,722,211.915 ns | 55,323.2393 ns | 51,749.3924 ns | 4 | 54.6875 | 54.6875 | 54.6875 | 1738698 B | | 'List (Match - Fallback)' | 100000 | 4,778,905.952 ns | 33,894.4095 ns | 28,303.3670 ns | 4 | 54.6875 | 54.6875 | 54.6875 | 1738688 B | | 'List with Duplicates (Match)' | 100000 | 5,517,422.167 ns | 110,159.9473 ns | 171,505.7803 ns | 5 | 93.7500 | 93.7500 | 93.7500 | 3605853 B | | 'BCL HashSet (Match - Same Comparer)' | 100000 | 5,576,721.937 ns | 45,754.5403 ns | 38,207.1134 ns | 5 | - | - | - | - | | 'Case 04: BCL HashSet (Mismatch - Same Count)' | 100000 | 5,640,651.163 ns | 64,526.5199 ns | 60,358.1468 ns | 5 | - | - | - | - | | 'LINQ (Mismatch - Lazy IEnumerable)' | 100000 | 6,406,188.227 ns | 132,260.6999 ns | 379,480.7689 ns | 6 | 281.2500 | 140.6250 | 140.6250 | 4830429 B | | 'LINQ (Match - Lazy IEnumerable)' | 100000 | 6,784,385.648 ns | 135,159.5121 ns | 290,945.1304 ns | 7 | 250.0000 | 125.0000 | 125.0000 | 4830439 B | | 'Array (Match - Fallback)' | 100000 | 6,812,793.701 ns | 40,732.0373 ns | 36,107.8901 ns | 7 | 54.6875 | 54.6875 | 54.6875 | 1738653 B | | 'HashSet (Different Comparer)' | 100000 | 7,497,254.730 ns | 80,339.5419 ns | 75,149.6574 ns | 8 | 62.5000 | 62.5000 | 62.5000 | 1738753 B | | 'ImmutableHashSet (Mismatch - Same Count)' | 100000 | 12,946,989.847 ns | 94,279.9494 ns | 83,576.7194 ns | 9 | - | - | - | - | | 'ImmutableHashSet (Match - Same Comparer)' | 100000 | 13,615,905.022 ns | 57,544.4439 ns | 48,052.2169 ns | 10 | - | - | - | - | </details> ### Performance Analysis Summary (100,000 Elements) | Case / Method | Before (ns) | After (ns) | Speedup Ratio | Memory Improvement | |:---|---:|---:|---:|:---| | **ImmutableHashSet (Larger Count)** | 9,477,300 | 2.328 | **~4,071,005x** | **Zero Alloc** | | **ImmutableHashSet (Smaller Count)** | 4,733,200 | 2.300 | **~2,057,913x** | **Zero Alloc** | | **HashSet (Smaller - Diff Comparer)** | 1,449,300 | 2.644 | **~548,146x** | **Zero Alloc** | | **Array (Smaller Count)** | 647,900 | 2.711 | **~238,989x** | **Zero Alloc** | | **BCL HashSet (Smaller Count)** | 313,800 | 2.595 | **~120,924x** | **Zero Alloc** | | **HashSet (Different Comparer)** | 9,839,200 | 7,497,254 | **1.31x** | Stable (~1.7 MB) | | **LINQ (Match/Mismatch)** | 11,341,500 | 6,406,188 | **1.77x** | Stable (~4.8 MB) | | **BCL HashSet (Mismatch - Same Count)**| 8,636,300 | 5,640,651 | **1.53x** | **Zero Alloc** | | **ImmutableHashSet (Match)** | 17,410,200 | 13,615,905 | **1.28x** | **Zero Alloc** | | **ImmutableHashSet (Mismatch)** | 17,015,500 | 12,946,989 | **1.31x** | **Zero Alloc** | | **List (Match/Diff - Fallback)** | 9,172,500 | 4,722,211 | **1.94x** | Stable (~1.7 MB) | | **BCL HashSet (Match - Same Comp)** | 7,084,000 | 5,576,721 | **1.27x** | **Zero Alloc** | | **List (Duplicates - Mismatch)** | 954,100 | 794,876 | **1.20x** | Stable (~1.7 MB) | | **List (Duplicates - Match)** | 9,310,200 | 5,517,422 | **1.69x** | Stable (~3.6 MB) | | **Array (Match - Fallback)** | 7,821,700 | 6,812,793 | **1.15x** | Stable (~1.7 MB) |
1 parent da9be99 commit dd17b6d

1 file changed

Lines changed: 70 additions & 6 deletions

File tree

src/libraries/System.Collections.Immutable/src/System/Collections/Immutable/ImmutableHashSet_1.cs

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -736,26 +736,90 @@ private static bool Overlaps(IEnumerable<T> other, MutationInput origin)
736736
return false;
737737
}
738738

739-
/// <summary>
740-
/// Performs the set operation on a given data structure.
741-
/// </summary>
742739
private static bool SetEquals(IEnumerable<T> other, MutationInput origin)
743740
{
744741
Requires.NotNull(other, nameof(other));
745742

743+
switch (other)
744+
{
745+
case ImmutableHashSet<T> otherAsImmutableHashSet:
746+
if (otherAsImmutableHashSet.Count != origin.Count)
747+
{
748+
return false;
749+
}
750+
751+
if (EqualityComparer<IEqualityComparer<T>>.Default.Equals(origin.EqualityComparer, otherAsImmutableHashSet.KeyComparer))
752+
{
753+
return SetEqualsWithImmutableHashset(otherAsImmutableHashSet, origin);
754+
}
755+
break;
756+
757+
case HashSet<T> otherAsHashset:
758+
if (otherAsHashset.Count != origin.Count)
759+
{
760+
return false;
761+
}
762+
763+
if (EqualityComparer<IEqualityComparer<T>>.Default.Equals(origin.EqualityComparer, otherAsHashset.Comparer))
764+
{
765+
return SetEqualsWithHashset(otherAsHashset, origin);
766+
}
767+
break;
768+
769+
case ICollection<T> otherAsICollectionGeneric:
770+
// We check for < instead of != because other is not guaranteed to be a set, it could be a collection with duplicates.
771+
if (otherAsICollectionGeneric.Count < origin.Count)
772+
{
773+
return false;
774+
}
775+
break;
776+
777+
case ICollection otherAsICollection:
778+
if (otherAsICollection.Count < origin.Count)
779+
{
780+
return false;
781+
}
782+
break;
783+
}
784+
746785
var otherSet = new HashSet<T>(other, origin.EqualityComparer);
747-
if (origin.Count != otherSet.Count)
786+
if (otherSet.Count != origin.Count)
748787
{
749788
return false;
750789
}
751790

752-
foreach (T item in otherSet)
791+
return SetEqualsWithHashset(otherSet, origin);
792+
}
793+
794+
private static bool SetEqualsWithImmutableHashset(ImmutableHashSet<T> other, MutationInput origin)
795+
{
796+
Requires.NotNull(other, nameof(other));
797+
798+
using var e = new ImmutableHashSet<T>.Enumerator(origin.Root);
799+
while (e.MoveNext())
753800
{
754-
if (!Contains(item, origin))
801+
if (!other.Contains(e.Current))
755802
{
756803
return false;
757804
}
758805
}
806+
807+
return true;
808+
}
809+
810+
private static bool SetEqualsWithHashset(HashSet<T> other, MutationInput origin)
811+
{
812+
Requires.NotNull(other, nameof(other));
813+
814+
using var e = new ImmutableHashSet<T>.Enumerator(origin.Root);
815+
while (e.MoveNext())
816+
{
817+
if (!other.Contains(e.Current))
818+
{
819+
return false;
820+
}
821+
}
822+
759823
return true;
760824
}
761825

0 commit comments

Comments
 (0)