Skip to content

Commit ca9fdb9

Browse files
perf(core): optimize polymorphic serialization with per-instance caching
Major performance improvements to polymorphic type handling: - Move caching from static fields to Reader/Writer instances to eliminate thread-safety overhead (Volatile/Interlocked) - Add fast path for single subtype scenarios (common case optimization) - Use Unsafe.As<T, TSub>() for reference types instead of pattern matching - Remove unnecessary System.Threading dependency Additional improvements: - Fix FastMap nullability warnings (default! → default) - Increase CI benchmark timeout to 180 minutes - Improve benchmark result detection with dynamic file discovery 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 7e39f0b commit ca9fdb9

7 files changed

Lines changed: 78 additions & 135 deletions

File tree

.github/workflows/ci.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ jobs:
120120
runs-on: ubuntu-latest
121121
needs: [ build-test ]
122122
if: github.event_name == 'pull_request'
123-
timeout-minutes: 60
123+
timeout-minutes: 180
124124
permissions:
125125
contents: read
126126
pull-requests: write
@@ -165,10 +165,10 @@ jobs:
165165
dotnet run -c Release --no-build
166166
167167
# Verify benchmark results exist (check for any github markdown file)
168-
MARKDOWN_COUNT=$(find BenchmarkDotNet.Artifacts/results -name "*-report-github.md" -type f 2>/dev/null | wc -l)
169-
if [[ $MARKDOWN_COUNT -gt 0 ]]; then
170-
echo "✅ Benchmark completed successfully - found $MARKDOWN_COUNT report file(s)"
171-
find BenchmarkDotNet.Artifacts/results -name "*-report-github.md" -type f
168+
MARKDOWN_FILE=$(find BenchmarkDotNet.Artifacts/results -name "*-report-github.md" -type f 2>/dev/null | head -n1)
169+
if [[ -n "$MARKDOWN_FILE" && -f "$MARKDOWN_FILE" ]]; then
170+
echo "✅ Benchmark completed successfully - found report file:"
171+
echo "$MARKDOWN_FILE"
172172
else
173173
echo "❌ Benchmark results not found"
174174
echo "Looking for markdown files in results directory..."

.github/workflows/report.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
benchmark:
3131
name: Run Performance Benchmarks
3232
runs-on: ubuntu-latest
33-
timeout-minutes: 60
33+
timeout-minutes: 180
3434

3535
steps:
3636
- name: Determine target tag/ref
@@ -111,12 +111,13 @@ jobs:
111111
echo "Running benchmarks for ${{ env.tag_name }}..."
112112
dotnet run -c Release --no-build
113113
114-
# Verify benchmark results exist (we're in src/Nino.Benchmark directory)
115-
if [[ -f "BenchmarkDotNet.Artifacts/results/Nino.Benchmark.SimpleTest-report-github.md" ]]; then
114+
# Verify benchmark results exist (check for any github markdown file)
115+
MARKDOWN_FILE=$(find BenchmarkDotNet.Artifacts/results -name "*-report-github.md" -type f 2>/dev/null | head -n1)
116+
if [[ -n "$MARKDOWN_FILE" && -f "$MARKDOWN_FILE" ]]; then
116117
echo "✅ Benchmark completed successfully"
118+
echo "Found: $MARKDOWN_FILE"
117119
else
118120
echo "❌ Benchmark results not found in $(pwd)"
119-
echo "Looking for: BenchmarkDotNet.Artifacts/results/Nino.Benchmark.SimpleTest-report-github.md"
120121
echo "Available files in results directory:"
121122
if [[ -d "BenchmarkDotNet.Artifacts/results/" ]]; then
122123
ls -la BenchmarkDotNet.Artifacts/results/

src/Nino.Core/FastMap.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ public void Remove(TKey key)
7070
Array.Copy(_values, index + 1, _values, index, _count - index - 1);
7171
}
7272

73-
_keys[--_count] = default!; // Clear the last element
74-
_values[_count] = default!;
73+
_keys[--_count] = default; // Clear the last element
74+
_values[_count] = default;
7575
}
7676

7777
private void Grow()
@@ -267,4 +267,4 @@ private int LinearSearch(TKey key)
267267
return -1;
268268
}
269269
}
270-
}
270+
}

src/Nino.Core/NinoDeserializer.cs

Lines changed: 40 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
using System;
22
using System.Diagnostics.CodeAnalysis;
33
using System.Runtime.CompilerServices;
4-
using System.Threading;
54

65
namespace Nino.Core
76
{
@@ -148,6 +147,9 @@ public static class CachedDeserializer<T>
148147
private static DeserializeDelegateRef<T> _deserializerRef;
149148
private static readonly FastMap<int, DeserializeDelegate<T>> SubTypeDeserializers = new();
150149
private static readonly FastMap<int, DeserializeDelegateRef<T>> SubTypeDeserializerRefs = new();
150+
private static int _singleSubTypeId = int.MinValue;
151+
private static DeserializeDelegate<T> _singleSubTypeDeserializer;
152+
private static DeserializeDelegateRef<T> _singleSubTypeDeserializerRef;
151153

152154
// Inline cache for polymorphic deserialization (separate caches for out/ref)
153155
// Cache expensive type checks
@@ -160,12 +162,6 @@ public static class CachedDeserializer<T>
160162
// ReSharper disable once StaticMemberInGenericType
161163
private static readonly bool IsSealed = typeof(T).IsSealed || typeof(T).IsValueType;
162164

163-
// Shared cache for polymorphic deserialization (Volatile for thread-safety)
164-
private static int _cachedTypeId;
165-
private static int _cachedTypeIdRef;
166-
private static DeserializeDelegate<T> _cachedDeserializer;
167-
private static DeserializeDelegateRef<T> _cachedDeserializerRef;
168-
169165
// ULTIMATE: JIT-eliminated constants for maximum performance
170166
// ReSharper disable once StaticMemberInGenericType
171167
internal static readonly bool IsSimpleType = !IsReferenceOrContainsReferences && !HasBaseType;
@@ -178,6 +174,9 @@ public static void SetDeserializer(int typeId, DeserializeDelegate<T> deserializ
178174
_deserializerRef = deserializerRef;
179175
SubTypeDeserializers.Add(typeId, _deserializer);
180176
SubTypeDeserializerRefs.Add(typeId, _deserializerRef);
177+
_singleSubTypeId = int.MinValue;
178+
_singleSubTypeDeserializer = null;
179+
_singleSubTypeDeserializerRef = null;
181180
}
182181

183182
public static void AddSubTypeDeserializer<TSub>(int subTypeId,
@@ -189,13 +188,27 @@ public static void AddSubTypeDeserializer<TSub>(int subTypeId,
189188
SubTypeDeserializerWrapper<TSub>.RefDeserializer = deserializerRef;
190189
SubTypeDeserializers.Add(subTypeId, SubTypeDeserializerWrapper<TSub>.DeserializeOutWrapper);
191190
SubTypeDeserializerRefs.Add(subTypeId, SubTypeDeserializerWrapper<TSub>.DeserializeRefWrapper);
191+
192+
if (SubTypeDeserializers.Count == 2)
193+
{
194+
_singleSubTypeId = subTypeId;
195+
_singleSubTypeDeserializer = SubTypeDeserializerWrapper<TSub>.DeserializeOutWrapper;
196+
_singleSubTypeDeserializerRef = SubTypeDeserializerWrapper<TSub>.DeserializeRefWrapper;
197+
}
198+
else
199+
{
200+
_singleSubTypeId = int.MinValue;
201+
_singleSubTypeDeserializer = null;
202+
_singleSubTypeDeserializerRef = null;
203+
}
192204
}
193205

194206
// Static wrapper class per TSub - allows better inlining than lambda
195207
private static class SubTypeDeserializerWrapper<TSub> where TSub : T
196208
{
197209
public static DeserializeDelegate<TSub> OutDeserializer;
198210
public static DeserializeDelegateRef<TSub> RefDeserializer;
211+
private static readonly bool IsValueType = typeof(TSub).IsValueType;
199212

200213
[MethodImpl(MethodImplOptions.AggressiveInlining)]
201214
public static void DeserializeOutWrapper(out T value, ref Reader reader)
@@ -207,13 +220,15 @@ public static void DeserializeOutWrapper(out T value, ref Reader reader)
207220
[MethodImpl(MethodImplOptions.AggressiveInlining)]
208221
public static void DeserializeRefWrapper(ref T value, ref Reader reader)
209222
{
210-
if (value is TSub val)
223+
if (IsValueType)
211224
{
212-
RefDeserializer(ref val, ref reader);
225+
TSub temp = default;
226+
RefDeserializer(ref temp, ref reader);
227+
value = temp;
213228
}
214229
else
215230
{
216-
ThrowInvalidCast(value?.GetType());
231+
RefDeserializer(ref Unsafe.As<T, TSub>(ref value), ref reader);
217232
}
218233
}
219234
}
@@ -271,10 +286,6 @@ public static void Deserialize(out T value, ref Reader reader)
271286
// DIRECT DELEGATE: Generated code path - no polymorphism possible
272287
_deserializer(out value, ref reader);
273288
}
274-
else if (SubTypeDeserializers.Count == 1)
275-
{
276-
SubTypeDeserializers.Values[0](out value, ref reader);
277-
}
278289
else
279290
{
280291
DeserializePolymorphic(out value, ref reader);
@@ -325,53 +336,26 @@ private static void DeserializePolymorphic(out T value, ref Reader reader)
325336
_deserializer!(out value, ref reader);
326337
return;
327338
}
328-
339+
329340
// FAST PATH: Exact type match for single subtype
330-
if (SubTypeDeserializers.Count == 2)
341+
if (typeId == _singleSubTypeId && _singleSubTypeDeserializer is not null)
331342
{
332-
var keys = SubTypeDeserializers.Keys;
333-
if (typeId == keys[0])
334-
{
335-
SubTypeDeserializers.Values[0](out value, ref reader);
336-
return;
337-
}
338-
339-
SubTypeDeserializers.Values[1](out value, ref reader);
343+
_singleSubTypeDeserializer(out value, ref reader);
340344
return;
341345
}
342-
346+
343347
// FAST PATH: Cache hit (optimized for monomorphic arrays)
344-
#if NET5_0_OR_GREATER && !UNITY_WEBGL
345-
// On 64-bit platforms, Volatile is atomic and faster (~1-2 cycles)
346-
var cachedId = Volatile.Read(ref _cachedTypeId);
347-
if (typeId == cachedId)
348-
{
349-
var cachedDeser = Volatile.Read(ref _cachedDeserializer);
350-
cachedDeser!(out value, ref reader);
351-
return;
352-
}
353-
#else
354-
// On 32-bit platforms and WebGL, use Interlocked for atomicity (~10-20 cycles)
355-
var cachedId = Interlocked.CompareExchange(ref _cachedTypeId, 0, 0);
356-
if (typeId == cachedId)
348+
if (typeId == reader.CachedTypeId && reader.CachedDeserializer is DeserializeDelegate<T> cachedDeserializer)
357349
{
358-
var cachedDeser = Interlocked.CompareExchange(ref _cachedDeserializer, null, null);
359-
cachedDeser!(out value, ref reader);
350+
cachedDeserializer(out value, ref reader);
360351
return;
361352
}
362-
#endif
363353

364354
// SLOW PATH: Full lookup in subtype map and update cache
365355
if (SubTypeDeserializers.TryGetValue(typeId, out var subTypeDeserializer))
366356
{
367-
#if NET5_0_OR_GREATER && !UNITY_WEBGL
368-
// Update cache for subsequent elements
369-
Volatile.Write(ref _cachedTypeId, typeId);
370-
Volatile.Write(ref _cachedDeserializer, subTypeDeserializer);
371-
#else
372-
Interlocked.Exchange(ref _cachedTypeId, typeId);
373-
Interlocked.Exchange(ref _cachedDeserializer, subTypeDeserializer);
374-
#endif
357+
reader.CachedTypeId = typeId;
358+
reader.CachedDeserializer = subTypeDeserializer;
375359
subTypeDeserializer(out value, ref reader);
376360
return;
377361
}
@@ -400,51 +384,25 @@ private static void DeserializeRefPolymorphic(ref T value, ref Reader reader)
400384
}
401385

402386
// FAST PATH: Exact type match for single subtype
403-
if (SubTypeDeserializerRefs.Count == 2)
387+
if (typeId == _singleSubTypeId && _singleSubTypeDeserializerRef is not null)
404388
{
405-
var keys = SubTypeDeserializerRefs.Keys;
406-
if (typeId == keys[0])
407-
{
408-
SubTypeDeserializerRefs.Values[0](ref value, ref reader);
409-
return;
410-
}
411-
412-
SubTypeDeserializerRefs.Values[1](ref value, ref reader);
389+
_singleSubTypeDeserializerRef(ref value, ref reader);
413390
return;
414391
}
415392

416393
// FAST PATH: Cache hit (optimized for monomorphic arrays)
417-
#if NET5_0_OR_GREATER && !UNITY_WEBGL
418-
// On 64-bit platforms, Volatile is atomic and faster (~1-2 cycles)
419-
var cachedIdRef = Volatile.Read(ref _cachedTypeIdRef);
420-
if (typeId == cachedIdRef)
421-
{
422-
var cachedDeserRef = Volatile.Read(ref _cachedDeserializerRef);
423-
cachedDeserRef!(ref value, ref reader);
424-
return;
425-
}
426-
#else
427-
// On 32-bit platforms and WebGL, use Interlocked for atomicity (~10-20 cycles)
428-
var cachedIdRef = Interlocked.CompareExchange(ref _cachedTypeIdRef, 0, 0);
429-
if (typeId == cachedIdRef)
394+
if (typeId == reader.CachedTypeIdRef &&
395+
reader.CachedDeserializerRef is DeserializeDelegateRef<T> cachedDeserializerRef)
430396
{
431-
var cachedDeserRef = Interlocked.CompareExchange(ref _cachedDeserializerRef, null, null);
432-
cachedDeserRef!(ref value, ref reader);
397+
cachedDeserializerRef(ref value, ref reader);
433398
return;
434399
}
435-
#endif
436400

437401
// SLOW PATH: Full lookup in subtype map and update cache
438402
if (SubTypeDeserializerRefs.TryGetValue(typeId, out var subTypeDeserializer))
439403
{
440-
#if NET5_0_OR_GREATER && !UNITY_WEBGL
441-
// Update cache for subsequent elements
442-
Volatile.Write(ref _cachedTypeIdRef, typeId);
443-
Volatile.Write(ref _cachedDeserializerRef, subTypeDeserializer);
444-
#else
445-
Interlocked.Exchange(ref _cachedTypeIdRef, typeId);
446-
Interlocked.Exchange(ref _cachedDeserializerRef, subTypeDeserializer);
447-
#endif
404+
reader.CachedTypeIdRef = typeId;
405+
reader.CachedDeserializerRef = subTypeDeserializer;
448406
subTypeDeserializer(ref value, ref reader);
449407
return;
450408
}

src/Nino.Core/NinoSerializer.cs

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -182,37 +182,30 @@ public static class CachedSerializer<T>
182182
// ReSharper disable once StaticMemberInGenericType
183183
internal static readonly bool IsSimpleType = !IsReferenceOrContainsReferences && !HasBaseType;
184184

185-
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.NoOptimization)] // Cold exception path
186-
private static void ThrowInvalidCast(Type actualType) =>
187-
throw new InvalidCastException($"Cannot cast {actualType?.FullName ?? "null"} to {typeof(T).FullName}");
188-
189185
public static void AddSubTypeSerializer<TSub>(SerializeDelegate<TSub> serializer) where TSub : T
190186
{
191187
// Use a static generic helper class to create an inlineable wrapper
192188
SubTypeSerializerWrapper<TSub>.SubSerializer = serializer;
193189
SubTypeSerializers.Add(typeof(TSub).TypeHandle.Value, SubTypeSerializerWrapper<TSub>.SerializeWrapper);
194190
}
195191

196-
// Shared cache for polymorphic serialization (Interlocked for thread-safety)
197-
private static IntPtr _cachedTypeHandle;
198-
private static SerializeDelegate<T> _cachedSerializer;
199-
200192
// Static wrapper class per TSub - allows better inlining than lambda
201193
private static class SubTypeSerializerWrapper<TSub> where TSub : T
202194
{
203195
public static SerializeDelegate<TSub> SubSerializer;
196+
private static readonly bool IsValueType = typeof(TSub).IsValueType;
204197

205198
[MethodImpl(MethodImplOptions.AggressiveInlining)]
206199
public static void SerializeWrapper(T val, ref Writer writer)
207200
{
208-
if (val is TSub sub)
201+
if (IsValueType)
209202
{
210-
// This can be inlined by JIT since it's a static method call with known target
211-
SubSerializer(sub, ref writer);
203+
SubSerializer((TSub)(object)val!, ref writer);
212204
}
213205
else
214206
{
215-
ThrowInvalidCast(val?.GetType());
207+
// Runtime handle check already guaranteed T is TSub so skip casts for reference types.
208+
SubSerializer(Unsafe.As<T, TSub>(ref val), ref writer);
216209
}
217210
}
218211
}
@@ -277,45 +270,23 @@ private static unsafe void SerializePolymorphic(T val, ref Writer writer)
277270
return;
278271
}
279272

280-
// FAST PATH: 1 subtype (common for simple polymorphic usage)
281273
if (SubTypeSerializers.Count == 1)
282274
{
283275
SubTypeSerializers.Values[0](val, ref writer);
284276
return;
285277
}
286278

287-
// FAST PATH: Cache hit (optimized for monomorphic arrays)
288-
#if NET5_0_OR_GREATER && !UNITY_WEBGL
289-
// On 64-bit platforms, Volatile is atomic and faster (~1-2 cycles)
290-
var cachedHandle = Volatile.Read(ref _cachedTypeHandle);
291-
if (actualTypeHandle == cachedHandle)
279+
if (actualTypeHandle == writer.CachedTypeHandle &&
280+
writer.CachedSerializer is SerializeDelegate<T> cachedSer)
292281
{
293-
var cachedSer = Volatile.Read(ref _cachedSerializer);
294-
cachedSer!(val, ref writer);
282+
cachedSer(val, ref writer);
295283
return;
296284
}
297-
#else
298-
// On 32-bit platforms and WebGL, use Interlocked for atomicity (~10-20 cycles)
299-
var cachedHandle = Interlocked.CompareExchange(ref _cachedTypeHandle, IntPtr.Zero, IntPtr.Zero);
300-
if (actualTypeHandle == cachedHandle)
301-
{
302-
var cachedSer = Interlocked.CompareExchange(ref _cachedSerializer, null, null);
303-
cachedSer!(val, ref writer);
304-
return;
305-
}
306-
#endif
307285

308-
// SLOW PATH: Full lookup in subtype map and update cache
309286
if (SubTypeSerializers.TryGetValue(actualTypeHandle, out var subTypeSerializer))
310287
{
311-
#if NET5_0_OR_GREATER && !UNITY_WEBGL
312-
// Update cache for subsequent elements
313-
Volatile.Write(ref _cachedTypeHandle, actualTypeHandle);
314-
Volatile.Write(ref _cachedSerializer, subTypeSerializer);
315-
#else
316-
Interlocked.Exchange(ref _cachedTypeHandle, actualTypeHandle);
317-
Interlocked.Exchange(ref _cachedSerializer, subTypeSerializer);
318-
#endif
288+
writer.CachedTypeHandle = actualTypeHandle;
289+
writer.CachedSerializer = subTypeSerializer;
319290
subTypeSerializer(val, ref writer);
320291
return;
321292
}

0 commit comments

Comments
 (0)