Skip to content

Commit 8b3e6fa

Browse files
committed
optimize case-insensitive performance
1 parent 23db1ea commit 8b3e6fa

19 files changed

Lines changed: 381 additions & 472 deletions

File tree

eng/StackExchange.Redis.Build/AsciiHash.md

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ static partial class bin
2828
{
2929
public const int Length = 3;
3030
public const long HashCS = ...
31-
public const long HashCI = ...
31+
public const long HashUC = ...
3232
public static ReadOnlySpan<byte> U8 => @"bin"u8;
3333
public static string Text => @"bin";
34-
public static bool IsCI(long hash, in RawResult value) => ...
35-
public static bool IsCS(long hash, in ReadOnlySpan<byte> value) => ...
34+
public static bool IsCS(in ReadOnlySpan<byte> value, long cs) => ...
35+
public static bool IsCI(in RawResult value, long uc) => ...
36+
3637
}
3738
```
38-
The `CS` and `CI` are case-sensitive and case-insensitive tools, respectively.
39+
The `CS` and `UC` are case-sensitive and case-insensitive (using upper-case) tools, respectively.
3940

4041
(this API is strictly an internal implementation detail, and can change at any time)
4142

@@ -46,18 +47,18 @@ var key = ...
4647
var hash = key.HashCS();
4748
switch (key.Length)
4849
{
49-
case bin.Length when bin.Is(hash, key):
50+
case bin.Length when bin.Is(key, hash):
5051
// handle bin
5152
break;
52-
case f32.Length when f32.Is(hash, key):
53+
case f32.Length when f32.Is(key, hash):
5354
// handle f32
5455
break;
5556
}
5657
```
5758

5859
The switch on the `Length` is optional, but recommended - these low values can often be implemented (by the compiler)
5960
as a simple jump-table, which is very fast. However, switching on the hash itself is also valid. All hash matches
60-
must also perform a sequence equality check - the `Is(hash, value)` convenience method validates both hash and equality.
61+
must also perform a sequence equality check - the `Is(value, hash)` convenience method validates both hash and equality.
6162

6263
Note that `switch` requires `const` values, hence why we use generated *types* rather than partial-properties
6364
that emit an instance with the known values. Also, the `"..."u8` syntax emits a span which is awkward to store, but
@@ -81,6 +82,13 @@ Now, `bin.Hash` can be supplied to a caller that takes an `AsciiHash` instance (
8182
which then has *instance* methods for case-sensitive and case-insensitive matching; the instance already knows
8283
the target hash and payload values.
8384

85+
The `AsciiHash` returned implements `IEquatable<AsciiHash>` implementing case-sensitive equality; there are
86+
also independent case-sensitive and case-insensitive comparers available via the static
87+
`CaseSensitiveEqualityComparer` and `CaseInsensitiveEqualityComparer` properties respectively.
88+
89+
Comparison values can be constructed on the fly on top of transient buffers using the constructors **that take
90+
arrays**. Note that the other constructors may allocate on a per-usage basis.
91+
8492
## Enum parsing (part 1)
8593

8694
When identifying multiple values, an `enum` may be more convenient. Consider:

eng/StackExchange.Redis.Build/AsciiHashGenerator.cs

Lines changed: 16 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ private void BuildEnumParsers(
490490
}
491491
else
492492
{
493-
NewLine().Append("global::RESPite.AsciiHash.Hash(").Append(method.From.Name).Append(", out var hashCS, out var hashCI);");
493+
NewLine().Append("global::RESPite.AsciiHash.Hash(").Append(method.From.Name).Append(", out var hashCS, out var hashUC);");
494494
}
495495

496496
if (string.IsNullOrEmpty(method.CaseSensitive.Name))
@@ -544,68 +544,36 @@ void Write(bool caseSensitive)
544544
.ThenBy(x => x.ParseText))
545545
{
546546
var len = member.ParseText.Length;
547-
AsciiHash.Hash(member.ParseText, out var hashCS, out var hashCI);
547+
AsciiHash.Hash(member.ParseText, out var hashCS, out var hashUC);
548548

549549
bool valueCaseSensitive = caseSensitive || !HasCaseSensitiveCharacters(member.ParseText);
550550

551551
line = NewLine().Append(len);
552552
if (valueCaseSensitive)
553553
{
554554
line.Append(" when hashCS is ").Append(hashCS);
555-
if (len > AsciiHash.MaxBytesHashIsEqualityCS)
556-
{
557-
line.Append(" && ");
558-
WriteValueTest(member.ParseText, true);
559-
}
560555
}
561556
else
562557
{
563-
// optimize for "all_lower" or "ALL_UPPER" matches; "Mixed_Match" comes last
564-
var ucText = member.ParseText.ToUpperInvariant();
565-
var lcText = member.ParseText.ToLowerInvariant();
566-
long hashUC = AsciiHash.HashCS(ucText), hashLC = AsciiHash.HashCS(lcText);
567-
568-
if (len <= AsciiHash.MaxBytesHashIsEqualityCS)
569-
{
570-
// note we know the lc and uc hash must be different
571-
line.Append(" when (hashCS is ").Append(hashUC).Append(" or ").Append(hashLC)
572-
.Append(") || (hashCI is ").Append(hashCI).Append(" && ");
573-
WriteValueTest(member.ParseText, false);
574-
line.Append(")");
575-
}
576-
else if (hashLC == hashCS && hashUC == hashCS)
577-
{
578-
// there are alphas, but not in the hashed portion
579-
line.Append(" when hashCS is ").Append(hashLC).Append(" && ");
580-
WriteValueTest(member.ParseText, false);
581-
}
582-
else
583-
{
584-
line.Append(" when (hashCS is ").Append(hashLC).Append(" && ");
585-
WriteValueTest(lcText, true);
586-
line.Append(") || (hashCS is ").Append(hashUC).Append(" && ");
587-
WriteValueTest(ucText, true);
588-
line.Append(") || (hashCI is ").Append(hashCI).Append(" && ");
589-
WriteValueTest(member.ParseText, false);
590-
line.Append(")");
591-
}
558+
line.Append(" when hashUC is ").Append(hashUC);
592559
}
593-
line.Append(" => ").Append(method.To.Type).Append(".").Append(member.EnumMember).Append(",");
594-
595-
void WriteValueTest(string value, bool testCS)
560+
if (len > AsciiHash.MaxBytesHashed)
596561
{
562+
line.Append(" && ");
597563
var csValue = SyntaxFactory
598564
.LiteralExpression(
599565
SyntaxKind.StringLiteralExpression,
600-
SyntaxFactory.Literal(value))
566+
SyntaxFactory.Literal(member.ParseText))
601567
.ToFullString();
602568

603569
line.Append("global::RESPite.AsciiHash.")
604-
.Append(testCS ? nameof(AsciiHash.SequenceEqualsCS) : nameof(AsciiHash.SequenceEqualsCI))
570+
.Append(valueCaseSensitive ? nameof(AsciiHash.SequenceEqualsCS) : nameof(AsciiHash.SequenceEqualsCI))
605571
.Append("(").Append(method.From.Name).Append(", ").Append(csValue);
606572
if (method.From.IsBytes) line.Append("u8");
607573
line.Append(")");
608574
}
575+
576+
line.Append(" => ").Append(method.To.Type).Append(".").Append(member.EnumMember).Append(",");
609577
}
610578

611579
NewLine().Append("_ => (").Append(method.To.Type).Append(")").Append(method.DefaultValue)
@@ -717,29 +685,29 @@ private static void BuildTypeImplementations(
717685
.LiteralExpression(SyntaxKind.StringLiteralExpression, SyntaxFactory.Literal(literal.Value))
718686
.ToFullString();
719687

720-
AsciiHash.Hash(literal.Value, out var hashCS, out var hashCI);
688+
AsciiHash.Hash(literal.Value, out var hashCS, out var hashUC);
721689
NewLine().Append("static partial class ").Append(literal.Name);
722690
NewLine().Append("{");
723691
indent++;
724692
NewLine().Append("public const int Length = ").Append(literal.Value.Length).Append(';');
725693
NewLine().Append("public const long HashCS = ").Append(hashCS).Append(';');
726-
NewLine().Append("public const long HashCI = ").Append(hashCI).Append(';');
694+
NewLine().Append("public const long HashUC = ").Append(hashUC).Append(';');
727695
NewLine().Append("public static ReadOnlySpan<byte> U8 => ").Append(csValue).Append("u8;");
728696
NewLine().Append("public const string Text = ").Append(csValue).Append(';');
729-
if (literal.Value.Length <= AsciiHash.MaxBytesHashIsEqualityCS)
697+
if (literal.Value.Length <= AsciiHash.MaxBytesHashed)
730698
{
731699
// the case-sensitive hash enforces all the values
732700
NewLine().Append(
733-
"public static bool IsCS(long hash, ReadOnlySpan<byte> value) => hash == HashCS & value.Length == Length;");
701+
"public static bool IsCS(ReadOnlySpan<byte> value, long cs) => cs == HashCS & value.Length == Length;");
734702
NewLine().Append(
735-
"public static bool IsCI(long hash, ReadOnlySpan<byte> value) => (hash == HashCI & value.Length == Length) && (global::RESPite.AsciiHash.HashCS(value) == HashCS || global::RESPite.AsciiHash.EqualsCI(value, U8));");
703+
"public static bool IsCI(ReadOnlySpan<byte> value, long uc) => uc == HashUC & value.Length == Length;");
736704
}
737705
else
738706
{
739707
NewLine().Append(
740-
"public static bool IsCS(long hash, ReadOnlySpan<byte> value) => hash == HashCS && value.SequenceEqual(U8);");
708+
"public static bool IsCS(ReadOnlySpan<byte> value, long cs) => cs == HashCS && value.SequenceEqual(U8);");
741709
NewLine().Append(
742-
"public static bool IsCI(long hash, ReadOnlySpan<byte> value) => (hash == HashCI & value.Length == Length) && global::RESPite.AsciiHash.EqualsCI(value, U8);");
710+
"public static bool IsCI(ReadOnlySpan<byte> value, long uc) => uc == HashUC && global::RESPite.AsciiHash.SequenceEqualsCI(value, U8);");
743711
}
744712

745713
indent--;

src/RESPite/PublicAPI/PublicAPI.Unshipped.txt

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
[SER004]RESPite.AsciiHash.AsciiHash(System.ReadOnlySpan<byte> value) -> void
1313
[SER004]RESPite.AsciiHash.BufferLength.get -> int
1414
[SER004]RESPite.AsciiHash.Equals(in RESPite.AsciiHash other) -> bool
15-
[SER004]RESPite.AsciiHash.IsCI(long hash, System.ReadOnlySpan<byte> value) -> bool
1615
[SER004]RESPite.AsciiHash.IsCI(System.ReadOnlySpan<byte> value) -> bool
17-
[SER004]RESPite.AsciiHash.IsCS(long hash, System.ReadOnlySpan<byte> value) -> bool
1816
[SER004]RESPite.AsciiHash.IsCS(System.ReadOnlySpan<byte> value) -> bool
1917
[SER004]RESPite.AsciiHash.Length.get -> int
2018
[SER004]RESPite.AsciiHash.Span.get -> System.ReadOnlySpan<byte>
@@ -59,13 +57,12 @@
5957
[SER004]static RESPite.AsciiHash.EqualsCI(System.ReadOnlySpan<char> first, System.ReadOnlySpan<char> second) -> bool
6058
[SER004]static RESPite.AsciiHash.EqualsCS(System.ReadOnlySpan<byte> first, System.ReadOnlySpan<byte> second) -> bool
6159
[SER004]static RESPite.AsciiHash.EqualsCS(System.ReadOnlySpan<char> first, System.ReadOnlySpan<char> second) -> bool
62-
[SER004]static RESPite.AsciiHash.Hash(scoped System.ReadOnlySpan<byte> value, out long cs, out long ci) -> void
63-
[SER004]static RESPite.AsciiHash.Hash(scoped System.ReadOnlySpan<char> value, out long cs, out long ci) -> void
64-
[SER004]static RESPite.AsciiHash.HashCI(scoped System.ReadOnlySpan<byte> value) -> long
65-
[SER004]static RESPite.AsciiHash.HashCI(scoped System.ReadOnlySpan<char> value) -> long
66-
[SER004]static RESPite.AsciiHash.HashCS(in System.Buffers.ReadOnlySequence<byte> value) -> long
60+
[SER004]static RESPite.AsciiHash.Hash(scoped System.ReadOnlySpan<byte> value, out long cs, out long uc) -> void
61+
[SER004]static RESPite.AsciiHash.Hash(scoped System.ReadOnlySpan<char> value, out long cs, out long uc) -> void
6762
[SER004]static RESPite.AsciiHash.HashCS(scoped System.ReadOnlySpan<byte> value) -> long
6863
[SER004]static RESPite.AsciiHash.HashCS(scoped System.ReadOnlySpan<char> value) -> long
64+
[SER004]static RESPite.AsciiHash.HashUC(scoped System.ReadOnlySpan<byte> value) -> long
65+
[SER004]static RESPite.AsciiHash.HashUC(scoped System.ReadOnlySpan<char> value) -> long
6966
[SER004]static RESPite.AsciiHash.SequenceEqualsCI(System.ReadOnlySpan<byte> first, System.ReadOnlySpan<byte> second) -> bool
7067
[SER004]static RESPite.AsciiHash.SequenceEqualsCI(System.ReadOnlySpan<char> first, System.ReadOnlySpan<char> second) -> bool
7168
[SER004]static RESPite.AsciiHash.SequenceEqualsCS(System.ReadOnlySpan<byte> first, System.ReadOnlySpan<byte> second) -> bool

src/RESPite/Shared/AsciiHash.Comparers.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public bool Equals(AsciiHash x, AsciiHash y)
1414
{
1515
var len = x.Length;
1616
return (len == y.Length & x._hashCS == y._hashCS)
17-
&& (len <= MaxBytesHashIsEqualityCS || x.Span.SequenceEqual(y.Span));
17+
&& (len <= MaxBytesHashed || x.Span.SequenceEqual(y.Span));
1818
}
1919

2020
public int GetHashCode(AsciiHash obj) => obj._hashCS.GetHashCode();
@@ -28,10 +28,10 @@ private CaseInsensitiveComparer() { }
2828
public bool Equals(AsciiHash x, AsciiHash y)
2929
{
3030
var len = x.Length;
31-
return (len == y.Length & x._hashLC == y._hashLC)
32-
&& (len <= MaxBytesHashIsEqualityCS || SequenceEqualsCI(x.Span, y.Span));
31+
return (len == y.Length & x._hashUC == y._hashUC)
32+
&& (len <= MaxBytesHashed || SequenceEqualsCI(x.Span, y.Span));
3333
}
3434

35-
public int GetHashCode(AsciiHash obj) => obj._hashLC.GetHashCode();
35+
public int GetHashCode(AsciiHash obj) => obj._hashUC.GetHashCode();
3636
}
3737
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
using System.Buffers.Binary;
2+
using System.Diagnostics.CodeAnalysis;
3+
using System.Text;
4+
5+
namespace RESPite;
6+
7+
public readonly partial struct AsciiHash : IEquatable<AsciiHash>
8+
{
9+
// ReSharper disable InconsistentNaming
10+
private readonly long _hashCS, _hashUC;
11+
// ReSharper restore InconsistentNaming
12+
private readonly int _index, _length;
13+
private readonly byte[] _arr;
14+
15+
public int Length => _length;
16+
17+
/// <summary>
18+
/// The optimal buffer length (with padding) to use for this value.
19+
/// </summary>
20+
public int BufferLength => (Length + 1 + 7) & ~7; // an extra byte, then round up to word-size
21+
22+
public ReadOnlySpan<byte> Span => new(_arr ?? [], _index, _length);
23+
24+
public AsciiHash(ReadOnlySpan<byte> value) : this(value.ToArray(), 0, value.Length) { }
25+
public AsciiHash(string value) : this(Encoding.ASCII.GetBytes(value)) { }
26+
27+
/// <inheritdoc/>
28+
public override int GetHashCode() => _hashCS.GetHashCode();
29+
30+
/// <inheritdoc/>
31+
public override string ToString() => _length == 0 ? "" : Encoding.ASCII.GetString(_arr, _index, _length);
32+
33+
/// <inheritdoc/>
34+
public override bool Equals(object? other) => other is AsciiHash hash && Equals(hash);
35+
36+
/// <inheritdoc cref="Equals(object)" />
37+
public bool Equals(in AsciiHash other)
38+
{
39+
return (_length == other.Length & _hashCS == other._hashCS)
40+
&& (_length <= MaxBytesHashed || Span.SequenceEqual(other.Span));
41+
}
42+
43+
bool IEquatable<AsciiHash>.Equals(AsciiHash other) => Equals(other);
44+
45+
public AsciiHash(byte[] arr) : this(arr, 0, -1) { }
46+
47+
public AsciiHash(byte[] arr, int index, int length)
48+
{
49+
_arr = arr ?? [];
50+
_index = index;
51+
_length = length < 0 ? (_arr.Length - index) : length;
52+
53+
var span = new ReadOnlySpan<byte>(_arr, _index, _length);
54+
Hash(span, out _hashCS, out _hashUC);
55+
}
56+
57+
public bool IsCS(ReadOnlySpan<byte> value)
58+
{
59+
var cs = HashCS(value);
60+
var len = _length;
61+
if (cs != _hashCS | value.Length != len) return false;
62+
return len <= MaxBytesHashed || Span.SequenceEqual(value);
63+
}
64+
65+
public bool IsCI(ReadOnlySpan<byte> value)
66+
{
67+
var uc = HashUC(value);
68+
var len = _length;
69+
if (uc != _hashUC | value.Length != len) return false;
70+
return len <= MaxBytesHashed || SequenceEqualsCI(Span, value);
71+
}
72+
}

0 commit comments

Comments
 (0)