|
| 1 | +using System.Runtime.CompilerServices; |
| 2 | +using System.Text; |
| 3 | +using System.Text.Json; |
| 4 | + |
| 5 | +namespace CosmoSQLClient.MsSql; |
| 6 | + |
| 7 | +/// <summary> |
| 8 | +/// Assembles fragmented JSON chunks (as produced by SQL Server's <c>FOR JSON</c> queries) |
| 9 | +/// into complete, independently-parsed <see cref="JsonElement"/> objects. |
| 10 | +/// </summary> |
| 11 | +/// <remarks> |
| 12 | +/// <para> |
| 13 | +/// SQL Server's <c>FOR JSON PATH</c> / <c>FOR JSON AUTO</c> does not respect JSON value |
| 14 | +/// boundaries when splitting output across rows — a single row may contain half a property |
| 15 | +/// name, and the next row may start mid-value. Naively streaming row-by-row therefore |
| 16 | +/// yields unusable partial JSON. |
| 17 | +/// </para> |
| 18 | +/// <para> |
| 19 | +/// This class solves that by feeding chunks through .NET's <see cref="Utf8JsonReader"/> |
| 20 | +/// with <see cref="JsonReaderState"/> persistence. The reader correctly handles escaped |
| 21 | +/// strings, nested objects and arrays, and resumes from exactly the right position when |
| 22 | +/// more data arrives — so <c>{</c> characters inside string values are never misread as |
| 23 | +/// object boundaries. |
| 24 | +/// </para> |
| 25 | +/// <para> |
| 26 | +/// For a <c>FOR JSON</c> result like <c>[{"Id":1},{"Id":2},…]</c>, each top-level array |
| 27 | +/// element is yielded as a separate <see cref="JsonElement"/> the moment the closing <c>}</c> |
| 28 | +/// of that element is received, without waiting for the entire array to arrive. |
| 29 | +/// </para> |
| 30 | +/// </remarks> |
| 31 | +public static class JsonChunkAssembler |
| 32 | +{ |
| 33 | + // ── Public API ──────────────────────────────────────────────────────────── |
| 34 | + |
| 35 | + /// <summary> |
| 36 | + /// Stream individual JSON objects from a sequence of arbitrary-length string chunks. |
| 37 | + /// Designed for SQL Server <c>FOR JSON</c> output but works with any JSON array of objects. |
| 38 | + /// </summary> |
| 39 | + /// <param name="chunks"> |
| 40 | + /// Raw JSON fragments — e.g. each row's text from a <c>FOR JSON</c> result set. |
| 41 | + /// </param> |
| 42 | + /// <param name="ct">Optional cancellation token.</param> |
| 43 | + /// <returns> |
| 44 | + /// One <see cref="JsonElement"/> per top-level array element as soon as it is complete, |
| 45 | + /// without buffering the entire JSON in memory. |
| 46 | + /// </returns> |
| 47 | + public static async IAsyncEnumerable<JsonElement> AssembleJsonObjectsAsync( |
| 48 | + IAsyncEnumerable<string> chunks, |
| 49 | + [EnumeratorCancellation] CancellationToken ct = default) |
| 50 | + { |
| 51 | + // Growing byte buffer for all received chunk bytes. |
| 52 | + using var buf = new MemoryStream(32768); |
| 53 | + |
| 54 | + var state = default(JsonReaderState); |
| 55 | + long parsedTo = 0; // absolute byte offset consumed so far |
| 56 | + int depth = 0; // JSON nesting depth (1 = inside the top-level array) |
| 57 | + long elemStart = -1; // absolute offset of the current element's opening byte |
| 58 | + bool inArray = false; |
| 59 | + |
| 60 | + await foreach (var chunk in chunks.WithCancellation(ct)) |
| 61 | + { |
| 62 | + if (string.IsNullOrEmpty(chunk)) continue; |
| 63 | + |
| 64 | + // Append chunk bytes. |
| 65 | + buf.Position = buf.Length; |
| 66 | + var chunkBytes = Encoding.UTF8.GetBytes(chunk); |
| 67 | + buf.Write(chunkBytes, 0, chunkBytes.Length); |
| 68 | + |
| 69 | + // Process the buffer SYNCHRONOUSLY (Utf8JsonReader is a ref struct and |
| 70 | + // cannot cross yield/await boundaries — so we collect results then yield). |
| 71 | + var result = ProcessBuffer( |
| 72 | + buf.GetBuffer(), (int)parsedTo, (int)buf.Length, state, |
| 73 | + depth, elemStart, inArray); |
| 74 | + |
| 75 | + // Update mutable state from synchronous result. |
| 76 | + state = result.State; |
| 77 | + depth = result.Depth; |
| 78 | + inArray = result.InArray; |
| 79 | + elemStart = result.ElemStart; |
| 80 | + |
| 81 | + // Compact: if elements were yielded, discard consumed prefix bytes. |
| 82 | + if (result.ConsumedAbsolute > parsedTo) |
| 83 | + CompactBuffer(buf, result.ConsumedAbsolute, out parsedTo); |
| 84 | + else |
| 85 | + parsedTo += result.BytesConsumedFromOffset; |
| 86 | + |
| 87 | + // Yield complete elements OUTSIDE the Utf8JsonReader scope. |
| 88 | + foreach (var elem in result.Elements) |
| 89 | + yield return elem; |
| 90 | + } |
| 91 | + } |
| 92 | + |
| 93 | + /// <summary> |
| 94 | + /// Concatenate all chunks into a single JSON string. |
| 95 | + /// Suitable when the total JSON is small enough to fit in memory. |
| 96 | + /// </summary> |
| 97 | + public static async Task<string> BufferJsonAsync( |
| 98 | + IAsyncEnumerable<string> chunks, |
| 99 | + CancellationToken ct = default) |
| 100 | + { |
| 101 | + var sb = new StringBuilder(); |
| 102 | + await foreach (var chunk in chunks.WithCancellation(ct)) |
| 103 | + sb.Append(chunk); |
| 104 | + return sb.ToString(); |
| 105 | + } |
| 106 | + |
| 107 | + // ── Synchronous processing (no async/yield — safe for ref struct) ───────── |
| 108 | + |
| 109 | + private readonly ref struct BufferResult |
| 110 | + { |
| 111 | + public IReadOnlyList<JsonElement> Elements { get; init; } |
| 112 | + public JsonReaderState State { get; init; } |
| 113 | + public int Depth { get; init; } |
| 114 | + public long ElemStart { get; init; } |
| 115 | + public bool InArray { get; init; } |
| 116 | + /// <summary>Bytes consumed from the start of the slice passed in.</summary> |
| 117 | + public long BytesConsumedFromOffset { get; init; } |
| 118 | + /// <summary>Absolute offset to compact to (= original parsedTo + BytesConsumedFromOffset).</summary> |
| 119 | + public long ConsumedAbsolute { get; init; } |
| 120 | + } |
| 121 | + |
| 122 | + private static BufferResult ProcessBuffer( |
| 123 | + byte[] rawBuf, int parsedToInt, int bufLength, |
| 124 | + JsonReaderState state, int depth, long elemStart, bool inArray) |
| 125 | + { |
| 126 | + var elements = new List<JsonElement>(); |
| 127 | + |
| 128 | + var available = bufLength - parsedToInt; |
| 129 | + if (available <= 0) |
| 130 | + { |
| 131 | + return new BufferResult |
| 132 | + { |
| 133 | + Elements = elements, State = state, Depth = depth, |
| 134 | + ElemStart = elemStart, InArray = inArray, |
| 135 | + BytesConsumedFromOffset = 0, ConsumedAbsolute = parsedToInt, |
| 136 | + }; |
| 137 | + } |
| 138 | + |
| 139 | + var span = new ReadOnlySpan<byte>(rawBuf, parsedToInt, available); |
| 140 | + var reader = new Utf8JsonReader(span, isFinalBlock: false, state); |
| 141 | + |
| 142 | + long consumedAbsolute = parsedToInt; |
| 143 | + bool needBreak = false; |
| 144 | + |
| 145 | + while (!needBreak && reader.Read()) |
| 146 | + { |
| 147 | + var tt = reader.TokenType; |
| 148 | + |
| 149 | + if (tt == JsonTokenType.StartArray && !inArray && depth == 0) |
| 150 | + { |
| 151 | + inArray = true; |
| 152 | + depth = 1; |
| 153 | + continue; |
| 154 | + } |
| 155 | + |
| 156 | + if (!inArray) continue; |
| 157 | + |
| 158 | + if (tt is JsonTokenType.StartObject or JsonTokenType.StartArray) |
| 159 | + { |
| 160 | + if (depth == 1) |
| 161 | + elemStart = parsedToInt + reader.TokenStartIndex; |
| 162 | + depth++; |
| 163 | + } |
| 164 | + else if (tt is JsonTokenType.EndObject or JsonTokenType.EndArray) |
| 165 | + { |
| 166 | + depth--; |
| 167 | + |
| 168 | + if (depth == 1 && elemStart >= 0) |
| 169 | + { |
| 170 | + // Complete element — extract and parse bytes. |
| 171 | + long end = parsedToInt + reader.BytesConsumed; |
| 172 | + int len = (int)(end - elemStart); |
| 173 | + var elemBytes = new byte[len]; |
| 174 | + Array.Copy(rawBuf, (int)elemStart, elemBytes, 0, len); |
| 175 | + |
| 176 | + using var doc = JsonDocument.Parse(elemBytes); |
| 177 | + elements.Add(doc.RootElement.Clone()); |
| 178 | + |
| 179 | + elemStart = -1; |
| 180 | + consumedAbsolute = end; |
| 181 | + |
| 182 | + // Signal that the outer loop should compact and break after this. |
| 183 | + needBreak = true; |
| 184 | + } |
| 185 | + else if (depth == 0) |
| 186 | + { |
| 187 | + inArray = false; |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + long bytesConsumed = parsedToInt + reader.BytesConsumed - parsedToInt; |
| 193 | + |
| 194 | + return new BufferResult |
| 195 | + { |
| 196 | + Elements = elements, |
| 197 | + State = reader.CurrentState, |
| 198 | + Depth = depth, |
| 199 | + ElemStart = elemStart, |
| 200 | + InArray = inArray, |
| 201 | + BytesConsumedFromOffset = reader.BytesConsumed, |
| 202 | + ConsumedAbsolute = needBreak ? consumedAbsolute : parsedToInt + reader.BytesConsumed, |
| 203 | + }; |
| 204 | + } |
| 205 | + |
| 206 | + /// <summary> |
| 207 | + /// Discard fully-consumed prefix bytes from <paramref name="buf"/> to cap memory usage. |
| 208 | + /// </summary> |
| 209 | + private static void CompactBuffer(MemoryStream buf, long consumedAbsolute, out long newParsedTo) |
| 210 | + { |
| 211 | + long remaining = buf.Length - consumedAbsolute; |
| 212 | + var rawBuf = buf.GetBuffer(); |
| 213 | + if (remaining > 0) |
| 214 | + Buffer.BlockCopy(rawBuf, (int)consumedAbsolute, rawBuf, 0, (int)remaining); |
| 215 | + buf.SetLength(remaining); |
| 216 | + newParsedTo = 0; |
| 217 | + } |
| 218 | +} |
| 219 | + |
| 220 | + |
| 221 | +/// <summary> |
| 222 | +/// Assembles fragmented JSON chunks (as produced by SQL Server's <c>FOR JSON</c> queries) |
| 223 | +/// into complete, independently-parsed <see cref="JsonElement"/> objects. |
| 224 | +/// </summary> |
| 225 | +/// <remarks> |
| 226 | +/// <para> |
| 227 | +/// SQL Server's <c>FOR JSON PATH</c> / <c>FOR JSON AUTO</c> does not respect JSON value |
| 228 | +/// boundaries when splitting output across rows — a single row may contain half a property |
| 229 | +/// name, and the next row may start mid-value. Naively streaming row-by-row therefore |
| 230 | +/// yields unusable partial JSON. |
| 231 | +/// </para> |
| 232 | +/// <para> |
| 233 | +/// This class solves that by feeding chunks through .NET's <see cref="Utf8JsonReader"/> |
| 234 | +/// with <see cref="JsonReaderState"/> persistence. The reader correctly handles escaped |
| 235 | +/// strings, nested objects and arrays, and resumes from exactly the right position when |
| 236 | +/// more data arrives — so <c>{</c> characters inside string values are never misread as |
| 237 | +/// object boundaries. |
| 238 | +/// </para> |
| 239 | +/// <para> |
| 240 | +/// For a <c>FOR JSON</c> result like <c>[{"Id":1},{"Id":2},…]</c>, each top-level array |
| 241 | +/// element is yielded as a separate <see cref="JsonElement"/> the moment the closing <c>}</c> |
| 242 | +/// of that element is received, without waiting for the entire array to arrive. |
| 243 | +/// </para> |
0 commit comments