Skip to content

Commit a0f92c4

Browse files
Fasttable: apply UnknownField fast path to empty slots when permissible.
PiperOrigin-RevId: 908298171
1 parent 7bd0939 commit a0f92c4

8 files changed

Lines changed: 335 additions & 8 deletions

File tree

upb/wire/decode_fast/BUILD

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ load("@bazel_skylib//rules:common_settings.bzl", "bool_flag")
99
load("@rules_cc//cc:cc_binary.bzl", "cc_binary")
1010
load("@rules_cc//cc:cc_test.bzl", "cc_test")
1111
load("@rules_cc//cc:defs.bzl", "cc_library")
12-
load("//upb/bazel:copts.bzl", "UPB_DEFAULT_COPTS", "UPB_DEFAULT_FEATURES")
12+
load("//upb/bazel:copts.bzl", "UPB_DEFAULT_COPTS", "UPB_DEFAULT_CPPOPTS", "UPB_DEFAULT_FEATURES")
1313

1414
package(default_applicable_licenses = ["//:license"])
1515

@@ -39,6 +39,7 @@ cc_library(
3939
"field_generic.c",
4040
"field_message.c",
4141
"field_string.c",
42+
"field_unknown.c",
4243
"field_varint.c",
4344
],
4445
hdrs = ["field_parsers.h"],
@@ -66,6 +67,7 @@ cc_library(
6667
"//upb/message:types",
6768
"//upb/mini_table",
6869
"//upb/port",
70+
"//upb/wire",
6971
"//upb/wire:decoder",
7072
"//upb/wire:eps_copy_input_stream",
7173
"//upb/wire:reader",
@@ -119,6 +121,9 @@ cc_test(
119121
deps = [
120122
":combinations",
121123
":select",
124+
"//upb/mem",
125+
"//upb/wire/test_util:field_types",
126+
"//upb/wire/test_util:make_mini_table",
122127
"@abseil-cpp//absl/base:core_headers",
123128
"@googletest//:gtest",
124129
"@googletest//:gtest_main",
@@ -182,3 +187,23 @@ cc_binary(
182187
"@abseil-cpp//absl/flags:parse",
183188
],
184189
)
190+
191+
cc_test(
192+
name = "test_unknown",
193+
srcs = ["test_unknown.cc"],
194+
copts = UPB_DEFAULT_CPPOPTS,
195+
features = UPB_DEFAULT_FEATURES,
196+
deps = [
197+
":combinations",
198+
"//upb/base",
199+
"//upb/mem",
200+
"//upb/message",
201+
"//upb/port",
202+
"//upb/wire",
203+
"//upb/wire/test_util:field_types",
204+
"//upb/wire/test_util:make_mini_table",
205+
"//upb/wire/test_util:wire_message",
206+
"@googletest//:gtest",
207+
"@googletest//:gtest_main",
208+
],
209+
)

upb/wire/decode_fast/combinations.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ UPB_INLINE upb_DecodeFast_Type upb_DecodeFast_GetType(uint32_t function_idx) {
216216
UPB_DECODEFAST_COMBINATION_IS_ENABLED(type, card, size)
217217
#endif
218218

219+
// A special value for function_idx to indicate that the field is unknown.
220+
#define kUpb_DecodeFast_Unknown (UINT32_MAX - 1)
221+
219222
#include "upb/port/undef.inc"
220223

221224
#endif // UPB_WIRE_INTERNAL_DECODE_FAST_COMBINATIONS_H_

upb/wire/decode_fast/field_parsers.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ const char* _upb_FastDecoder_FallbackToMiniTable(PARSE_PARAMS);
4343
UPB_PRESERVE_NONE
4444
const char* _upb_FastDecoder_DecodeGeneric(PARSE_PARAMS);
4545

46+
UPB_PRESERVE_NONE
47+
const char* _upb_FastDecoder_DecodeUnknown(PARSE_PARAMS);
48+
4649
#undef F
4750
#undef PARSE_PARAMS
4851

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Protocol Buffers - Google's data interchange format
2+
// Copyright 2025 Google LLC. All rights reserved.
3+
//
4+
// Use of this source code is governed by a BSD-style
5+
// license that can be found in the LICENSE file or at
6+
// https://developers.google.com/open-source/licenses/bsd
7+
8+
#include <stdint.h>
9+
10+
#include "upb/base/string_view.h"
11+
#include "upb/message/internal/message.h"
12+
#include "upb/message/message.h"
13+
#include "upb/wire/decode.h"
14+
#include "upb/wire/decode_fast/cardinality.h"
15+
#include "upb/wire/decode_fast/field_parsers.h"
16+
#include "upb/wire/eps_copy_input_stream.h"
17+
#include "upb/wire/internal/decoder.h"
18+
#include "upb/wire/reader.h"
19+
20+
// Must be last.
21+
#include "upb/port/def.inc"
22+
23+
UPB_FORCEINLINE bool _upb_FastDecoder_DoDecodeUnknown(
24+
struct upb_Decoder* d, const char** ptr, upb_Message* msg, intptr_t table,
25+
uint64_t hasbits, uint64_t data, upb_DecodeFastNext* ret) {
26+
const char* start = *ptr;
27+
28+
// Important: if the branch is correctly predicted, the ptr incremen is
29+
// treated as constant and subsequent loads will not have a data dependency on
30+
// the branch.
31+
if (UPB_LIKELY((data & 0x80) == 0)) {
32+
*ptr += 1;
33+
// Ensure the field number is not 0.
34+
// Use bitwise op to only examine first byte minus additional tag data.
35+
if (UPB_UNLIKELY((data & 0xF8) == 0)) {
36+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_Malformed, ret);
37+
}
38+
} else if ((data & 0x8000) == 0) {
39+
*ptr += 2;
40+
// Ensure the field number is not 0.
41+
// Use bitwise op to limit to first two bytes, and ignore continuation bit &
42+
// additional tag data.
43+
if (UPB_UNLIKELY((data & 0x7f78) == 0)) {
44+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_Malformed, ret);
45+
}
46+
} else {
47+
// Tag >=2048
48+
return UPB_DECODEFAST_EXIT(kUpb_DecodeFastNext_FallbackToMiniTable, ret);
49+
}
50+
51+
uint32_t wire_type = data & 0x07;
52+
53+
if (UPB_UNLIKELY(wire_type == kUpb_WireType_EndGroup ||
54+
wire_type == kUpb_WireType_StartGroup)) {
55+
// FastDecoder doesn't handle group fields, but it can be used to decode a
56+
// message that is itself a group. When decoding a group, the end of the
57+
// message is marked by an EndGroup tag. Since EndGroup tags are not in
58+
// the MiniTable, they are routed to the unknown field handler. We must
59+
// intercept them here to properly terminate the message.
60+
return UPB_DECODEFAST_EXIT(kUpb_DecodeFastNext_FallbackToMiniTable, ret);
61+
}
62+
63+
upb_EpsCopyInputStream_StartCapture(&d->input, start);
64+
65+
switch (wire_type) {
66+
case kUpb_WireType_Varint:
67+
*ptr = upb_WireReader_SkipVarint(*ptr, &d->input);
68+
if (UPB_UNLIKELY(!ptr)) {
69+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_Malformed, ret);
70+
}
71+
break;
72+
case kUpb_WireType_32Bit:
73+
UPB_PRIVATE(upb_EpsCopyInputStream_ConsumeBytes)(&d->input, 4);
74+
*ptr += 4;
75+
break;
76+
case kUpb_WireType_64Bit:
77+
UPB_PRIVATE(upb_EpsCopyInputStream_ConsumeBytes)(&d->input, 8);
78+
*ptr += 8;
79+
break;
80+
case kUpb_WireType_Delimited: {
81+
int size;
82+
*ptr = upb_WireReader_ReadSize(*ptr, &size, &d->input);
83+
if (UPB_UNLIKELY(!ptr || !upb_EpsCopyInputStream_CheckSize(&d->input,
84+
*ptr, size))) {
85+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_Malformed, ret);
86+
}
87+
*ptr += size;
88+
break;
89+
}
90+
default:
91+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_Malformed, ret);
92+
}
93+
94+
upb_StringView sv;
95+
if (UPB_UNLIKELY(!upb_EpsCopyInputStream_EndCapture(&d->input, *ptr, &sv))) {
96+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_Malformed, ret);
97+
}
98+
99+
upb_AddUnknownMode mode = kUpb_AddUnknown_Copy;
100+
if (d->options & kUpb_DecodeOption_AliasString) {
101+
if (sv.data != d->input.buffer_start) {
102+
mode = kUpb_AddUnknown_AliasAllowMerge;
103+
} else {
104+
mode = kUpb_AddUnknown_Alias;
105+
}
106+
}
107+
108+
if (!UPB_PRIVATE(_upb_Message_AddUnknown)(msg, sv.data, sv.size, &d->arena,
109+
mode)) {
110+
return UPB_DECODEFAST_ERROR(d, kUpb_DecodeStatus_OutOfMemory, ret);
111+
}
112+
113+
data = 0;
114+
return true;
115+
}
116+
117+
UPB_PRESERVE_NONE const char* _upb_FastDecoder_DecodeUnknown(
118+
struct upb_Decoder* d, const char* ptr, upb_Message* msg, intptr_t table,
119+
uint64_t hasbits, uint64_t data) {
120+
upb_DecodeFastNext ret = kUpb_DecodeFastNext_Dispatch;
121+
_upb_FastDecoder_DoDecodeUnknown(d, &ptr, msg, table, hasbits, data, &ret);
122+
UPB_DECODEFAST_NEXT(ret);
123+
}

upb/wire/decode_fast/function_array.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,15 @@ static _upb_FieldParser* funcs[] = {UPB_DECODEFAST_FUNCTIONS(ADDR_OF_FUNC)};
2727
#undef ADDR_OF_FUNC
2828

2929
_upb_FieldParser* upb_DecodeFast_GetFunctionPointer(uint32_t function_idx) {
30-
if (function_idx == UINT32_MAX) return &_upb_FastDecoder_DecodeGeneric;
31-
UPB_ASSERT(function_idx < UPB_ARRAY_SIZE(funcs));
32-
return funcs[function_idx];
30+
switch (function_idx) {
31+
case UINT32_MAX:
32+
return &_upb_FastDecoder_DecodeGeneric;
33+
case kUpb_DecodeFast_Unknown:
34+
return &_upb_FastDecoder_DecodeUnknown;
35+
default:
36+
UPB_ASSERT(function_idx < UPB_ARRAY_SIZE(funcs));
37+
return funcs[function_idx];
38+
}
3339
}
3440

3541
#include "upb/port/undef.inc"

upb/wire/decode_fast/select.c

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,14 @@ static bool upb_DecodeFast_GetFunctionData(const upb_MiniTable* m,
182182

183183
static bool upb_DecodeFast_TryFillEntry(const upb_MiniTable* m,
184184
const upb_MiniTableField* field,
185+
bool* out_supported_tag_size,
185186
upb_DecodeFast_TableEntry* entry) {
186187
UPB_ASSERT(!upb_MiniTableField_IsExtension(field));
187188
uint16_t tag;
188189
upb_DecodeFast_TagSize tag_size;
189-
return upb_DecodeFast_GetEncodedTag(field, &tag, &tag_size) &&
190+
*out_supported_tag_size =
191+
upb_DecodeFast_GetEncodedTag(field, &tag, &tag_size);
192+
return *out_supported_tag_size &&
190193
upb_DecodeFast_GetFunctionIndex(m, field, tag_size,
191194
&entry->function_idx) &&
192195
UPB_DECODEFAST_ISENABLED(
@@ -205,19 +208,47 @@ int upb_DecodeFast_BuildTable(const upb_MiniTable* m,
205208
table[i].function_data = 0;
206209
}
207210

211+
// Fasttable only handles fields with tag size of 1 or 2 bytes. If all known
212+
// fields with such tag sizes are covered, and the message is non-extensible,
213+
// we can short circuit misses to unknown field handling
214+
bool all_supported_tag_size_fields_map_to_assigned_slots = true;
208215
int max = 0;
209216
for (size_t i = 0, n = upb_MiniTable_FieldCount(m); i < n; i++) {
210217
const upb_MiniTableField* field = upb_MiniTable_GetFieldByIndex(m, i);
218+
bool supported_tag_size;
211219
upb_DecodeFast_TableEntry entry;
212-
if (!upb_DecodeFast_TryFillEntry(m, field, &entry)) continue;
220+
if (!upb_DecodeFast_TryFillEntry(m, field, &supported_tag_size, &entry)) {
221+
if (supported_tag_size) {
222+
// Check if this tag collides
223+
all_supported_tag_size_fields_map_to_assigned_slots = false;
224+
}
225+
continue;
226+
}
213227
int slot = upb_DecodeFastData_GetTableSlot(entry.function_data);
214228
if (table[slot].function_idx == UINT32_MAX) {
215229
table[slot] = entry;
216230
max = UPB_MAX(max, slot);
217231
}
218232
}
219233

220-
return max == 0 ? 0 : upb_RoundUpToPowerOfTwo(max + 1);
234+
int table_size = max == 0 ? 0 : upb_RoundUpToPowerOfTwo(max + 1);
235+
236+
// If the message is not extendable, we can swap the generic handler for a
237+
// fast unknown field handler in remaining open slots.
238+
// The fast unknown handler only covers 1/2 byte tags and falls back for >2
239+
// bytes; thus, we do not need to check for total exhaustiveness in field
240+
// coverage, only for 1/2 byte tags.
241+
if (all_supported_tag_size_fields_map_to_assigned_slots &&
242+
m->UPB_PRIVATE(ext) == kUpb_ExtMode_NonExtendable) {
243+
for (int i = 0; i < table_size; i++) {
244+
if (table[i].function_idx == UINT32_MAX) {
245+
table[i].function_idx = kUpb_DecodeFast_Unknown;
246+
table[i].function_data = 0;
247+
}
248+
}
249+
}
250+
251+
return table_size;
221252
}
222253

223254
uint8_t upb_DecodeFast_GetTableMask(int table_size) {
@@ -237,6 +268,9 @@ const char* upb_DecodeFast_GetFunctionName(uint32_t function_idx) {
237268
#undef FUNCSTR
238269

239270
if (function_idx == UINT32_MAX) return "_upb_FastDecoder_DecodeGeneric";
271+
if (function_idx == kUpb_DecodeFast_Unknown) {
272+
return "_upb_FastDecoder_DecodeUnknown";
273+
}
240274
UPB_ASSERT(function_idx < UPB_ARRAY_SIZE(names));
241275
return names[function_idx];
242276
}

upb/wire/decode_fast/select_test.cc

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
1+
// Protocol Buffers - Google's data interchange format
2+
// Copyright 2025 Google LLC. All rights reserved.
3+
//
4+
// Use of this source code is governed by a BSD-style
5+
// license that can be found in the LICENSE file or at
6+
// https://developers.google.com/open-source/licenses/bsd
7+
18
#include "upb/wire/decode_fast/select.h"
29

310
#include <cstddef>
411
#include <cstdint>
512
#include <cstdio>
13+
#include <cstring>
614

715
#include <gtest/gtest.h>
816
#include "absl/base/macros.h"
17+
#include "upb/mem/arena.hpp"
918
#include "upb/wire/decode_fast/combinations.h"
19+
#include "upb/wire/test_util/field_types.h"
20+
#include "upb/wire/test_util/make_mini_table.h"
1021

22+
namespace upb {
23+
namespace test {
1124
namespace {
1225

1326
TEST(SelectTest, FunctionIndicesMatch) {
@@ -22,6 +35,25 @@ TEST(SelectTest, FunctionIndicesMatch) {
2235
<< upb_DecodeFast_GetFunctionName(numbers[i]);
2336
}
2437
#undef IDX
25-
} // namespace
38+
}
39+
40+
TEST(SelectTest, UnknownSlotsAssignedForNonExtendable) {
41+
upb::Arena mt_arena;
42+
auto [mt, field] = MiniTable::MakeSingleFieldTable<field_types::Int32>(
43+
1, kUpb_DecodeFast_Scalar, mt_arena.ptr());
44+
45+
upb_DecodeFast_TableEntry table[32];
46+
int size = upb_DecodeFast_BuildTable(mt, table);
47+
48+
// Field 1 maps to slot 1: (1 << 3) | 0 (varint wire type) = 8.
49+
// Bits 3-7 are 00001, so slot 1.
50+
// Table size will be 2 (power of 2 >= max+1).
51+
EXPECT_EQ(size, 2);
52+
EXPECT_NE(table[1].function_idx, UINT32_MAX);
53+
EXPECT_NE(table[1].function_idx, kUpb_DecodeFast_Unknown);
54+
EXPECT_EQ(table[0].function_idx, kUpb_DecodeFast_Unknown);
55+
}
2656

2757
} // namespace
58+
} // namespace test
59+
} // namespace upb

0 commit comments

Comments
 (0)