From 3d27b0fe68f6953643924bf38c870f4eccd5b39d Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Tue, 21 Apr 2026 14:18:22 -0700 Subject: [PATCH] C# ProtoJSON: Avoid N^2 behavior in nested Any-of-Any where @type is not first. PiperOrigin-RevId: 903421745 --- .../Google.Protobuf.Test/JsonParserTest.cs | 53 +++++++++ csharp/src/Google.Protobuf/JsonParser.cs | 12 ++- csharp/src/Google.Protobuf/JsonTokenizer.cs | 102 +++++++++++++++++- 3 files changed, 158 insertions(+), 9 deletions(-) diff --git a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs index 7e41a02aca051..dba837e9924dd 100644 --- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs +++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs @@ -883,6 +883,59 @@ public void Any_Nested() Assert.AreEqual(message, parser.Parse(json, TestWellKnownTypes.Descriptor)); } + // ----------------------------------------------------------------------------------------- + // Regression tests for O(N²) memory growth when @type appears last in nested Any values + // (https://github.com/protocolbuffers/protobuf/pull/26851). + // ----------------------------------------------------------------------------------------- + + private static string BuildNestedAnyTypeLastJson(int depth) + { + var sb = new System.Text.StringBuilder("{}"); + for (int i = 0; i < depth; i++) + { + string inner = sb.ToString(); + sb.Clear(); + sb.Append("{\"value\":"); + sb.Append(inner); + sb.Append(",\"@type\":\"type.googleapis.com/google.protobuf.Any\"}"); + } + return sb.ToString(); + } + + [Test] + public void Any_TypeUrlLast_DeepNesting() + { + var registry = TypeRegistry.FromMessages(Any.Descriptor); + var parser = new JsonParser(new JsonParser.Settings(1000, registry)); + + var result200 = parser.Parse(BuildNestedAnyTypeLastJson(200)); + Assert.AreEqual("type.googleapis.com/google.protobuf.Any", result200.TypeUrl); + + var result400 = parser.Parse(BuildNestedAnyTypeLastJson(400)); + Assert.AreEqual("type.googleapis.com/google.protobuf.Any", result400.TypeUrl); + } + + [Test] + public void Any_TypeUrlLast_ManyFields() + { + var registry = TypeRegistry.FromMessages(TestAllTypes.Descriptor); + var parser = new JsonParser(new JsonParser.Settings(10, registry)); + + const int fieldCount = 10_000; + var sb = new System.Text.StringBuilder(); + sb.Append("{\"repeatedInt32\":["); + for (int i = 0; i < fieldCount; i++) + { + if (i > 0) sb.Append(','); + sb.Append(i); + } + sb.Append("],\"@type\":\"type.googleapis.com/protobuf_unittest3.TestAllTypes\"}"); + + var any = parser.Parse(sb.ToString()); + var unpacked = any.Unpack(); + Assert.AreEqual(fieldCount, unpacked.RepeatedInt32.Count); + } + [Test] public void DataAfterObject() { diff --git a/csharp/src/Google.Protobuf/JsonParser.cs b/csharp/src/Google.Protobuf/JsonParser.cs index eae0f927ffad9..113ea7b727c6a 100644 --- a/csharp/src/Google.Protobuf/JsonParser.cs +++ b/csharp/src/Google.Protobuf/JsonParser.cs @@ -514,7 +514,7 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer) { // Record the token stream until we see the @type property. At that point, we can take the value, consult // the type registry for the relevant message, and replay the stream, omitting the @type property. - var tokens = new List(); + tokenizer.StartRecording(); var token = tokenizer.Next(); if (token.Type != JsonToken.TokenType.StartObject) @@ -529,7 +529,6 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer) token.StringValue != JsonFormatter.AnyTypeUrlField || tokenizer.ObjectDepth != typeUrlObjectDepth) { - tokens.Add(token); token = tokenizer.Next(); // If we get to the end of the object and haven't seen a type URL, just return. @@ -538,11 +537,15 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer) // other properties but no type URL. if (tokenizer.ObjectDepth < typeUrlObjectDepth) { + tokenizer.StopRecording(); return; } } // Don't add the @type property or its value to the recorded token list + tokenizer.StopRecording(); + tokenizer.DiscardLastToken(); + token = tokenizer.Next(); if (token.Type != JsonToken.TokenType.StringValue) { @@ -567,8 +570,9 @@ private void MergeAny(IMessage message, JsonTokenizer tokenizer) } // Now replay the token stream we've already read and anything that remains of the object, just parsing it - // as normal. Our original tokenizer should end up at the end of the object. - var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer); + // as normal. + JsonTokenizer replay = tokenizer.GetReplayTokenizer(tokenizer); + var body = descriptor.Parser.CreateTemplate(); if (descriptor.IsWellKnownType) { diff --git a/csharp/src/Google.Protobuf/JsonTokenizer.cs b/csharp/src/Google.Protobuf/JsonTokenizer.cs index 5611beaf412f2..9efcc7bfb8883 100644 --- a/csharp/src/Google.Protobuf/JsonTokenizer.cs +++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs @@ -30,7 +30,7 @@ namespace Google.Protobuf /// internal abstract class JsonTokenizer { - private JsonToken bufferedToken; + protected JsonToken bufferedToken; /// /// Creates a tokenizer that reads from the given text reader. @@ -58,6 +58,11 @@ internal static JsonTokenizer FromReplayedTokens(IList tokens, JsonTo /// internal int RecursionDepth { get; set; } + internal abstract void StartRecording(); + internal abstract void StopRecording(); + internal abstract JsonTokenizer GetReplayTokenizer(JsonTokenizer continuation); + internal virtual void DiscardLastToken() { } + /// /// Returns the depth of the stack, purely in objects (not collections). /// Informally, this is the number of remaining unclosed '{' characters we have. @@ -91,7 +96,7 @@ internal void PushBack(JsonToken token) /// The next token in the stream. This is never null. /// This method is called after an EndDocument token has been returned /// The input text does not comply with RFC 7159 - internal JsonToken Next() + internal virtual JsonToken Next() { JsonToken tokenToReturn; if (bufferedToken != null) @@ -153,22 +158,71 @@ internal void SkipValue() /// /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. /// - private class JsonReplayTokenizer : JsonTokenizer + internal class JsonReplayTokenizer : JsonTokenizer { private readonly IList tokens; private readonly JsonTokenizer nextTokenizer; private int nextTokenIndex; + private readonly int endIndex; internal JsonReplayTokenizer(IList tokens, JsonTokenizer nextTokenizer) { this.tokens = tokens; this.nextTokenizer = nextTokenizer; + this.endIndex = -1; + } + + internal JsonReplayTokenizer(IList tokens, int startIndex, int endIndex, JsonTokenizer nextTokenizer) + { + this.tokens = tokens; + this.nextTokenIndex = startIndex; + this.endIndex = endIndex; + this.nextTokenizer = nextTokenizer; + } + + private int recordStartIndex; + private int recordCount; + private bool recording; + + internal override void StartRecording() + { + recordStartIndex = bufferedToken != null ? nextTokenIndex - 1 : nextTokenIndex; + recordCount = 0; + recording = true; + } + + internal override JsonToken Next() + { + var token = base.Next(); + if (recording) + { + recordCount++; + } + return token; + } + + internal override void StopRecording() + { + recording = false; + } + + internal override JsonTokenizer GetReplayTokenizer(JsonTokenizer continuation) + { + return new JsonReplayTokenizer(tokens, recordStartIndex, recordStartIndex + recordCount, continuation); + } + + internal override void DiscardLastToken() + { + if (recording && recordCount > 0) + { + recordCount--; + } } - // FIXME: Object depth not maintained... protected override JsonToken NextImpl() { - if (nextTokenIndex >= tokens.Count) + int limit = endIndex < 0 ? tokens.Count : endIndex; + if (nextTokenIndex >= limit) { return nextTokenizer.Next(); } @@ -187,6 +241,44 @@ private sealed class JsonTextTokenizer : JsonTokenizer private readonly Stack containerStack = new Stack(); private readonly PushBackReader reader; private State state; + private List recordedTokens; + private bool recording; + + internal override void StartRecording() + { + recordedTokens = new List(); + recording = true; + } + + internal override JsonToken Next() + { + var token = base.Next(); + if (recording) + { + recordedTokens.Add(token); + } + return token; + } + + internal override void StopRecording() + { + recording = false; + } + + internal override JsonTokenizer GetReplayTokenizer(JsonTokenizer continuation) + { + var result = FromReplayedTokens(recordedTokens, continuation); + recordedTokens = null; + return result; + } + + internal override void DiscardLastToken() + { + if (recordedTokens != null && recordedTokens.Count > 0) + { + recordedTokens.RemoveAt(recordedTokens.Count - 1); + } + } internal JsonTextTokenizer(TextReader reader) {