Skip to content

Commit 1ffcb4e

Browse files
apacheGH-49559: [C++][Parquet] Fix uncontrolled recursion in WKBGeometryBounder::MergeGeometryInternal (apache#49558)
### Rationale for this change Fix `MergeGeometryInternal` stack overflow from deeply nested WKB GeometryCollection inputs. ### What changes are included in this PR? Added a depth limit (128) to `MergeGeometryInternal` to prevent stack overflow when parsing deeply nested WKB GeometryCollection inputs. ### Are these changes tested? A unit test `TestWKBBounderErrorForDeepNesting` has been included to ensure proper exception throwing upon exceeding the limit. ### Are there any user-facing changes? No. * GitHub Issue: apache#49559 Lead-authored-by: Gang Wu <ustcwg@gmail.com> Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: wgtmac <4684607+wgtmac@users.noreply.github.com> Signed-off-by: Gang Wu <ustcwg@gmail.com>
1 parent c24bc29 commit 1ffcb4e

3 files changed

Lines changed: 51 additions & 3 deletions

File tree

cpp/src/parquet/geospatial/util_internal.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,12 @@ void WKBGeometryBounder::MergeGeometry(::arrow::util::span<const uint8_t> bytes_
160160
}
161161
}
162162

163-
void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type) {
163+
void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type,
164+
int depth) {
165+
if (depth > 128) {
166+
throw ParquetException("WKB geometry has too many levels of nesting");
167+
}
168+
164169
uint8_t endian = src->ReadUInt8();
165170
#if ARROW_LITTLE_ENDIAN
166171
bool swap = endian != 0x01;
@@ -208,7 +213,7 @@ void WKBGeometryBounder::MergeGeometryInternal(WKBBuffer* src, bool record_wkb_t
208213
case GeometryType::kGeometryCollection: {
209214
uint32_t n_parts = src->ReadUInt32(swap);
210215
for (uint32_t i = 0; i < n_parts; i++) {
211-
MergeGeometryInternal(src, /*record_wkb_type*/ false);
216+
MergeGeometryInternal(src, /*record_wkb_type*/ false, depth + 1);
212217
}
213218
break;
214219
}

cpp/src/parquet/geospatial/util_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ class PARQUET_EXPORT WKBGeometryBounder {
216216
BoundingBox box_;
217217
std::unordered_set<int32_t> geospatial_types_;
218218

219-
void MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type);
219+
void MergeGeometryInternal(WKBBuffer* src, bool record_wkb_type, int depth = 0);
220220

221221
void MergeSequence(WKBBuffer* src, Dimensions dimensions, uint32_t n_coords, bool swap);
222222
};

cpp/src/parquet/geospatial/util_internal_test.cc

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,4 +545,47 @@ INSTANTIATE_TEST_SUITE_P(
545545
MakeWKBPointTestCase{{30, 10, 40, 300}, false, true},
546546
MakeWKBPointTestCase{{30, 10, 40, 300}, true, true}));
547547

548+
TEST(TestGeometryUtil, TestWKBBounderErrorForDeepNesting) {
549+
// Construct a nested GeometryCollection with 200 levels
550+
std::vector<uint8_t> nested_wkb;
551+
int num_levels = 200;
552+
553+
for (int i = 0; i < num_levels; i++) {
554+
nested_wkb.push_back(0x01); // little endian
555+
nested_wkb.push_back(0x07); // geometry collection
556+
nested_wkb.push_back(0x00);
557+
nested_wkb.push_back(0x00);
558+
nested_wkb.push_back(0x00);
559+
560+
nested_wkb.push_back(0x01); // 1 part
561+
nested_wkb.push_back(0x00);
562+
nested_wkb.push_back(0x00);
563+
nested_wkb.push_back(0x00);
564+
}
565+
566+
// Final part is an empty geometry collection
567+
nested_wkb.push_back(0x01); // little endian
568+
nested_wkb.push_back(0x07); // geometry collection
569+
nested_wkb.push_back(0x00);
570+
nested_wkb.push_back(0x00);
571+
nested_wkb.push_back(0x00);
572+
573+
nested_wkb.push_back(0x00); // 0 parts
574+
nested_wkb.push_back(0x00);
575+
nested_wkb.push_back(0x00);
576+
nested_wkb.push_back(0x00);
577+
578+
WKBGeometryBounder bounder;
579+
EXPECT_THROW(
580+
{
581+
try {
582+
bounder.MergeGeometry(nested_wkb);
583+
} catch (const ParquetException& e) {
584+
EXPECT_THAT(e.what(), ::testing::HasSubstr("too many levels of nesting"));
585+
throw;
586+
}
587+
},
588+
ParquetException);
589+
}
590+
548591
} // namespace parquet::geospatial

0 commit comments

Comments
 (0)