Skip to content

Commit fbd7f2b

Browse files
C# ProtoJSON: Avoid N^2 behavior in nested Any-of-Any where @type is not first.
PiperOrigin-RevId: 903421745
1 parent a298282 commit fbd7f2b

3 files changed

Lines changed: 132 additions & 9 deletions

File tree

csharp/src/Google.Protobuf.Test/JsonParserTest.cs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,59 @@ public void Any_Nested()
883883
Assert.AreEqual(message, parser.Parse(json, TestWellKnownTypes.Descriptor));
884884
}
885885

886+
// -----------------------------------------------------------------------------------------
887+
// Regression tests for O(N²) memory growth when @type appears last in nested Any values
888+
// (https://github.com/protocolbuffers/protobuf/pull/26851).
889+
// -----------------------------------------------------------------------------------------
890+
891+
private static string BuildNestedAnyTypeLastJson(int depth)
892+
{
893+
var sb = new System.Text.StringBuilder("{}");
894+
for (int i = 0; i < depth; i++)
895+
{
896+
string inner = sb.ToString();
897+
sb.Clear();
898+
sb.Append("{\"value\":");
899+
sb.Append(inner);
900+
sb.Append(",\"@type\":\"type.googleapis.com/google.protobuf.Any\"}");
901+
}
902+
return sb.ToString();
903+
}
904+
905+
[Test]
906+
public void Any_TypeUrlLast_DeepNesting()
907+
{
908+
var registry = TypeRegistry.FromMessages(Any.Descriptor);
909+
var parser = new JsonParser(new JsonParser.Settings(1000, registry));
910+
911+
var result200 = parser.Parse<Any>(BuildNestedAnyTypeLastJson(200));
912+
Assert.AreEqual("type.googleapis.com/google.protobuf.Any", result200.TypeUrl);
913+
914+
var result400 = parser.Parse<Any>(BuildNestedAnyTypeLastJson(400));
915+
Assert.AreEqual("type.googleapis.com/google.protobuf.Any", result400.TypeUrl);
916+
}
917+
918+
[Test]
919+
public void Any_TypeUrlLast_ManyFields()
920+
{
921+
var registry = TypeRegistry.FromMessages(TestAllTypes.Descriptor);
922+
var parser = new JsonParser(new JsonParser.Settings(10, registry));
923+
924+
const int fieldCount = 10_000;
925+
var sb = new System.Text.StringBuilder();
926+
sb.Append("{\"repeatedInt32\":[");
927+
for (int i = 0; i < fieldCount; i++)
928+
{
929+
if (i > 0) sb.Append(',');
930+
sb.Append(i);
931+
}
932+
sb.Append("],\"@type\":\"type.googleapis.com/protobuf_unittest3.TestAllTypes\"}");
933+
934+
var any = parser.Parse<Any>(sb.ToString());
935+
var unpacked = any.Unpack<TestAllTypes>();
936+
Assert.AreEqual(fieldCount, unpacked.RepeatedInt32.Count);
937+
}
938+
886939
[Test]
887940
public void DataAfterObject()
888941
{

csharp/src/Google.Protobuf/JsonParser.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer)
514514
{
515515
// Record the token stream until we see the @type property. At that point, we can take the value, consult
516516
// the type registry for the relevant message, and replay the stream, omitting the @type property.
517-
var tokens = new List<JsonToken>();
517+
tokenizer.StartRecording();
518518

519519
var token = tokenizer.Next();
520520
if (token.Type != JsonToken.TokenType.StartObject)
@@ -529,7 +529,6 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer)
529529
token.StringValue != JsonFormatter.AnyTypeUrlField ||
530530
tokenizer.ObjectDepth != typeUrlObjectDepth)
531531
{
532-
tokens.Add(token);
533532
token = tokenizer.Next();
534533

535534
// If we get to the end of the object and haven't seen a type URL, just return.
@@ -538,11 +537,14 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer)
538537
// other properties but no type URL.
539538
if (tokenizer.ObjectDepth < typeUrlObjectDepth)
540539
{
540+
tokenizer.StopRecording();
541541
return;
542542
}
543543
}
544544

545545
// Don't add the @type property or its value to the recorded token list
546+
tokenizer.StopRecording();
547+
546548
token = tokenizer.Next();
547549
if (token.Type != JsonToken.TokenType.StringValue)
548550
{
@@ -567,8 +569,9 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer)
567569
}
568570

569571
// Now replay the token stream we've already read and anything that remains of the object, just parsing it
570-
// as normal. Our original tokenizer should end up at the end of the object.
571-
var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
572+
// as normal.
573+
JsonTokenizer replay = tokenizer.GetReplayTokenizer(tokenizer);
574+
572575
var body = descriptor.Parser.CreateTemplate();
573576
if (descriptor.IsWellKnownType)
574577
{

csharp/src/Google.Protobuf/JsonTokenizer.cs

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace Google.Protobuf
3030
/// </remarks>
3131
internal abstract class JsonTokenizer
3232
{
33-
private JsonToken bufferedToken;
33+
protected JsonToken bufferedToken;
3434

3535
/// <summary>
3636
/// Creates a tokenizer that reads from the given text reader.
@@ -58,6 +58,10 @@ internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTo
5858
/// </summary>
5959
internal int RecursionDepth { get; set; }
6060

61+
internal abstract void StartRecording();
62+
internal abstract void StopRecording();
63+
internal abstract JsonTokenizer GetReplayTokenizer(JsonTokenizer continuation);
64+
6165
/// <summary>
6266
/// Returns the depth of the stack, purely in objects (not collections).
6367
/// Informally, this is the number of remaining unclosed '{' characters we have.
@@ -153,26 +157,62 @@ internal void SkipValue()
153157
/// <summary>
154158
/// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
155159
/// </summary>
156-
private class JsonReplayTokenizer : JsonTokenizer
160+
internal class JsonReplayTokenizer : JsonTokenizer
157161
{
158162
private readonly IList<JsonToken> tokens;
159163
private readonly JsonTokenizer nextTokenizer;
160164
private int nextTokenIndex;
165+
private readonly int endIndex;
161166

162167
internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
163168
{
164169
this.tokens = tokens;
165170
this.nextTokenizer = nextTokenizer;
171+
this.endIndex = -1;
172+
}
173+
174+
internal JsonReplayTokenizer(IList<JsonToken> tokens, int startIndex, int endIndex, JsonTokenizer nextTokenizer)
175+
{
176+
this.tokens = tokens;
177+
this.nextTokenIndex = startIndex;
178+
this.endIndex = endIndex;
179+
this.nextTokenizer = nextTokenizer;
180+
}
181+
182+
private int recordStartIndex;
183+
private int recordCount;
184+
private bool recording;
185+
186+
internal override void StartRecording()
187+
{
188+
recordStartIndex = bufferedToken != null ? nextTokenIndex - 1 : nextTokenIndex;
189+
recordCount = 0;
190+
recording = true;
191+
}
192+
193+
internal override void StopRecording()
194+
{
195+
recording = false;
196+
}
197+
198+
internal override JsonTokenizer GetReplayTokenizer(JsonTokenizer continuation)
199+
{
200+
return new JsonReplayTokenizer(tokens, recordStartIndex, recordStartIndex + recordCount, continuation);
166201
}
167202

168-
// FIXME: Object depth not maintained...
169203
protected override JsonToken NextImpl()
170204
{
171-
if (nextTokenIndex >= tokens.Count)
205+
int limit = endIndex < 0 ? tokens.Count : endIndex;
206+
if (nextTokenIndex >= limit)
172207
{
173208
return nextTokenizer.Next();
174209
}
175-
return tokens[nextTokenIndex++];
210+
var token = tokens[nextTokenIndex++];
211+
if (recording)
212+
{
213+
recordCount++;
214+
}
215+
return token;
176216
}
177217
}
178218

@@ -187,6 +227,23 @@ private sealed class JsonTextTokenizer : JsonTokenizer
187227
private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
188228
private readonly PushBackReader reader;
189229
private State state;
230+
private List<JsonToken> recordedTokens;
231+
232+
internal override void StartRecording()
233+
{
234+
recordedTokens = new List<JsonToken>();
235+
}
236+
237+
internal override void StopRecording()
238+
{
239+
}
240+
241+
internal override JsonTokenizer GetReplayTokenizer(JsonTokenizer continuation)
242+
{
243+
var result = FromReplayedTokens(recordedTokens, continuation);
244+
recordedTokens = null;
245+
return result;
246+
}
190247

191248
internal JsonTextTokenizer(TextReader reader)
192249
{
@@ -203,6 +260,16 @@ internal JsonTextTokenizer(TextReader reader)
203260
/// of it is the large switch statement, which sometimes returns and sometimes doesn't.
204261
/// </remarks>
205262
protected override JsonToken NextImpl()
263+
{
264+
var token = NextImplInner();
265+
if (recordedTokens != null)
266+
{
267+
recordedTokens.Add(token);
268+
}
269+
return token;
270+
}
271+
272+
private JsonToken NextImplInner()
206273
{
207274
if (state == State.ReaderExhausted)
208275
{

0 commit comments

Comments
 (0)