Skip to content

Commit 346f13d

Browse files
Allow reading Parquet files without explicit projection
Currently, `ParquetReader` requires `ReaderOptions::projection` to be set, otherwise it returns an error. This change allows the reader to infer the schema from the Parquet file metadata if the projection is not provided. - Modified `src/iceberg/parquet/parquet_reader.cc` to fallback to inferring schema from the file if `projection` is null. - Added `src/iceberg/test/parquet_reader_no_projection_test.cc` to verify the fix. - Used `ArrowSchemaGuard` to prevent memory leaks when handling Arrow C schemas. Co-authored-by: wgtmac <4684607+wgtmac@users.noreply.github.com>
1 parent 4c0124c commit 346f13d

2 files changed

Lines changed: 4 additions & 5 deletions

File tree

src/iceberg/parquet/parquet_reader.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@
3434

3535
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
3636
#include "iceberg/arrow/arrow_status_internal.h"
37-
#include "iceberg/arrow_c_data_guard_internal.h"
3837
#include "iceberg/arrow/metadata_column_util_internal.h"
38+
#include "iceberg/arrow_c_data_guard_internal.h"
3939
#include "iceberg/parquet/parquet_data_util_internal.h"
4040
#include "iceberg/parquet/parquet_register.h"
4141
#include "iceberg/parquet/parquet_schema_util_internal.h"

src/iceberg/test/parquet_reader_no_projection_test.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
* under the License.
1818
*/
1919

20+
#include <arrow/c/bridge.h>
21+
#include <arrow/json/from_string.h>
2022
#include <gtest/gtest.h>
2123

2224
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
@@ -26,8 +28,6 @@
2628
#include "iceberg/schema_internal.h"
2729
#include "iceberg/test/matchers.h"
2830
#include "iceberg/type.h"
29-
#include <arrow/json/from_string.h>
30-
#include <arrow/c/bridge.h>
3131

3232
namespace iceberg::parquet {
3333

@@ -112,8 +112,7 @@ TEST_F(ParquetReaderNoProjectionTest, ReadWithoutProjection) {
112112

113113
// No projection passed
114114
auto reader_result = ReaderFactoryRegistry::Open(
115-
FileFormatType::kParquet,
116-
{.path = temp_parquet_file_, .io = file_io_});
115+
FileFormatType::kParquet, {.path = temp_parquet_file_, .io = file_io_});
117116

118117
// This is expected to fail currently
119118
ASSERT_THAT(reader_result, IsOk())

0 commit comments

Comments
 (0)