Skip to content

Commit 0db527a

Browse files
authored
feat: add iceberg_data library alongside iceberg (apache#631)
Move data writers, deletes/, and puffin/ into a separate `iceberg_data` library that links the existing `iceberg` target. `delete_file_index` stays in `iceberg` because manifest_group embeds DeleteFileIndex::Builder with only core dependencies. * `iceberg` — unchanged target name for metadata/planning, expressions, manifests, catalog (incl. in-memory), utilities, file I/O abstractions, and delete_file_index. * `iceberg_data` — data/, deletes/, puffin/; links `iceberg`. `iceberg_bundle` links `iceberg_data` when the bundle is built. `iceberg_rest` links `iceberg` and cpr only.
1 parent d6130f7 commit 0db527a

21 files changed

Lines changed: 226 additions & 144 deletions

example/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ project(example)
2222

2323
set(CMAKE_CXX_STANDARD 23)
2424

25-
find_package(iceberg CONFIG REQUIRED)
25+
find_package(iceberg CONFIG REQUIRED COMPONENTS bundle rest)
2626

2727
add_executable(demo_example demo_example.cc)
2828

src/iceberg/CMakeLists.txt

Lines changed: 52 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
2020
set(ICEBERG_SOURCES
2121
arrow_c_data_guard_internal.cc
2222
catalog/memory/in_memory_catalog.cc
23-
data/data_writer.cc
24-
data/delete_loader.cc
25-
data/equality_delete_writer.cc
26-
data/position_delete_writer.cc
27-
data/writer.cc
2823
delete_file_index.cc
29-
deletes/roaring_position_bitmap.cc
30-
deletes/position_delete_index.cc
3124
expression/aggregate.cc
3225
expression/binder.cc
3326
expression/evaluator.cc
@@ -66,9 +59,6 @@ set(ICEBERG_SOURCES
6659
partition_field.cc
6760
partition_spec.cc
6861
partition_summary.cc
69-
puffin/file_metadata.cc
70-
puffin/puffin_format.cc
71-
puffin/json_serde.cc
7262
row/arrow_array_wrapper.cc
7363
row/manifest_wrapper.cc
7464
row/partition_values.cc
@@ -134,24 +124,22 @@ list(APPEND
134124
ICEBERG_STATIC_BUILD_INTERFACE_LIBS
135125
"$<IF:$<BOOL:${NANOARROW_VENDORED}>,nanoarrow::nanoarrow_static,$<IF:$<TARGET_EXISTS:nanoarrow::nanoarrow_static>,nanoarrow::nanoarrow_static,nanoarrow::nanoarrow_shared>>"
136126
nlohmann_json::nlohmann_json
137-
roaring::roaring
138127
ZLIB::ZLIB)
139128
list(APPEND
140129
ICEBERG_SHARED_BUILD_INTERFACE_LIBS
141130
"$<IF:$<BOOL:${NANOARROW_VENDORED}>,nanoarrow::nanoarrow_static,$<IF:$<TARGET_EXISTS:nanoarrow::nanoarrow_shared>,nanoarrow::nanoarrow_shared,nanoarrow::nanoarrow_static>>"
142131
nlohmann_json::nlohmann_json
143-
roaring::roaring
144132
ZLIB::ZLIB)
145133
list(APPEND
146134
ICEBERG_STATIC_INSTALL_INTERFACE_LIBS
147135
"$<IF:$<BOOL:${NANOARROW_VENDORED}>,iceberg::nanoarrow_static,$<IF:$<TARGET_EXISTS:nanoarrow::nanoarrow_static>,nanoarrow::nanoarrow_static,nanoarrow::nanoarrow_shared>>"
148136
"$<IF:$<BOOL:${NLOHMANN_JSON_VENDORED}>,iceberg::nlohmann_json,$<IF:$<TARGET_EXISTS:nlohmann_json::nlohmann_json>,nlohmann_json::nlohmann_json,nlohmann_json::nlohmann_json>>"
149-
"$<IF:$<BOOL:${CROARING_VENDORED}>,iceberg::roaring,roaring::roaring>")
137+
)
150138
list(APPEND
151139
ICEBERG_SHARED_INSTALL_INTERFACE_LIBS
152140
"$<IF:$<BOOL:${NANOARROW_VENDORED}>,iceberg::nanoarrow_static,$<IF:$<TARGET_EXISTS:nanoarrow::nanoarrow_shared>,nanoarrow::nanoarrow_shared,nanoarrow::nanoarrow_static>>"
153141
"$<IF:$<BOOL:${NLOHMANN_JSON_VENDORED}>,iceberg::nlohmann_json,$<IF:$<TARGET_EXISTS:nlohmann_json::nlohmann_json>,nlohmann_json::nlohmann_json,nlohmann_json::nlohmann_json>>"
154-
"$<IF:$<BOOL:${CROARING_VENDORED}>,iceberg::roaring,roaring::roaring>")
142+
)
155143

156144
add_iceberg_lib(iceberg
157145
SOURCES
@@ -169,6 +157,52 @@ add_iceberg_lib(iceberg
169157
OUTPUTS
170158
ICEBERG_LIBRARIES)
171159

160+
set(ICEBERG_DATA_SOURCES
161+
data/data_writer.cc
162+
data/delete_loader.cc
163+
data/equality_delete_writer.cc
164+
data/position_delete_writer.cc
165+
data/writer.cc
166+
deletes/position_delete_index.cc
167+
deletes/roaring_position_bitmap.cc
168+
puffin/file_metadata.cc
169+
puffin/json_serde.cc
170+
puffin/puffin_format.cc)
171+
172+
set(ICEBERG_DATA_STATIC_BUILD_INTERFACE_LIBS)
173+
set(ICEBERG_DATA_SHARED_BUILD_INTERFACE_LIBS)
174+
set(ICEBERG_DATA_STATIC_INSTALL_INTERFACE_LIBS)
175+
set(ICEBERG_DATA_SHARED_INSTALL_INTERFACE_LIBS)
176+
177+
list(APPEND ICEBERG_DATA_STATIC_BUILD_INTERFACE_LIBS
178+
"$<IF:$<TARGET_EXISTS:iceberg_static>,iceberg_static,iceberg_shared>"
179+
roaring::roaring)
180+
list(APPEND ICEBERG_DATA_SHARED_BUILD_INTERFACE_LIBS
181+
"$<IF:$<TARGET_EXISTS:iceberg_shared>,iceberg_shared,iceberg_static>"
182+
roaring::roaring)
183+
list(APPEND
184+
ICEBERG_DATA_STATIC_INSTALL_INTERFACE_LIBS
185+
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_static>,iceberg::iceberg_static,iceberg::iceberg_shared>"
186+
"$<IF:$<BOOL:${CROARING_VENDORED}>,iceberg::roaring,roaring::roaring>")
187+
list(APPEND
188+
ICEBERG_DATA_SHARED_INSTALL_INTERFACE_LIBS
189+
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_shared>,iceberg::iceberg_shared,iceberg::iceberg_static>"
190+
"$<IF:$<BOOL:${CROARING_VENDORED}>,iceberg::roaring,roaring::roaring>")
191+
192+
add_iceberg_lib(iceberg_data
193+
SOURCES
194+
${ICEBERG_DATA_SOURCES}
195+
EXTRA_INCLUDES
196+
${ICEBERG_INCLUDES}
197+
SHARED_LINK_LIBS
198+
${ICEBERG_DATA_SHARED_BUILD_INTERFACE_LIBS}
199+
STATIC_LINK_LIBS
200+
${ICEBERG_DATA_STATIC_BUILD_INTERFACE_LIBS}
201+
STATIC_INSTALL_INTERFACE_LIBS
202+
${ICEBERG_DATA_STATIC_INSTALL_INTERFACE_LIBS}
203+
SHARED_INSTALL_INTERFACE_LIBS
204+
${ICEBERG_DATA_SHARED_INSTALL_INTERFACE_LIBS})
205+
172206
iceberg_install_all_headers(iceberg)
173207

174208
add_subdirectory(catalog)
@@ -210,29 +244,29 @@ if(ICEBERG_BUILD_BUNDLE)
210244

211245
list(APPEND
212246
ICEBERG_BUNDLE_STATIC_BUILD_INTERFACE_LIBS
213-
"$<IF:$<TARGET_EXISTS:iceberg_static>,iceberg_static,iceberg_shared>"
247+
"$<IF:$<TARGET_EXISTS:iceberg_data_static>,iceberg_data_static,iceberg_data_shared>"
214248
"$<IF:$<TARGET_EXISTS:Arrow::arrow_static>,Arrow::arrow_static,Arrow::arrow_shared>"
215249
"$<IF:$<TARGET_EXISTS:Parquet::parquet_static>,Parquet::parquet_static,Parquet::parquet_shared>"
216250
"$<IF:$<TARGET_EXISTS:avro-cpp::avrocpp_static>,avro-cpp::avrocpp_static,avro-cpp::avrocpp_shared>"
217251
)
218252
list(APPEND
219253
ICEBERG_BUNDLE_SHARED_BUILD_INTERFACE_LIBS
220-
"$<IF:$<TARGET_EXISTS:iceberg_shared>,iceberg_shared,iceberg_static>"
254+
"$<IF:$<TARGET_EXISTS:iceberg_data_shared>,iceberg_data_shared,iceberg_data_static>"
221255
"$<IF:$<TARGET_EXISTS:Arrow::arrow_shared>,Arrow::arrow_shared,Arrow::arrow_static>"
222256
"$<IF:$<TARGET_EXISTS:Parquet::parquet_shared>,Parquet::parquet_shared,Parquet::parquet_static>"
223257
"$<IF:$<TARGET_EXISTS:avro-cpp::avrocpp_shared>,avro-cpp::avrocpp_shared,avro-cpp::avrocpp_static>"
224258
)
225259

226260
list(APPEND
227261
ICEBERG_BUNDLE_STATIC_INSTALL_INTERFACE_LIBS
228-
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_static>,iceberg::iceberg_static,iceberg::iceberg_shared>"
262+
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_data_static>,iceberg::iceberg_data_static,iceberg::iceberg_data_shared>"
229263
"$<IF:$<BOOL:${ARROW_VENDORED}>,iceberg::arrow_static,$<IF:$<TARGET_EXISTS:Arrow::arrow_static>,Arrow::arrow_static,Arrow::arrow_shared>>"
230264
"$<IF:$<BOOL:${ARROW_VENDORED}>,iceberg::parquet_static,$<IF:$<TARGET_EXISTS:Parquet::parquet_static>,Parquet::parquet_static,Parquet::parquet_shared>>"
231265
"$<IF:$<BOOL:${AVRO_VENDORED}>,iceberg::avrocpp_s,$<IF:$<TARGET_EXISTS:avro-cpp::avrocpp_static>,avro-cpp::avrocpp_static,avro-cpp::avrocpp_shared>>"
232266
)
233267
list(APPEND
234268
ICEBERG_BUNDLE_SHARED_INSTALL_INTERFACE_LIBS
235-
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_shared>,iceberg::iceberg_shared,iceberg::iceberg_static>"
269+
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_data_shared>,iceberg::iceberg_data_shared,iceberg::iceberg_data_static>"
236270
"$<IF:$<BOOL:${ARROW_VENDORED}>,iceberg::arrow_static,$<IF:$<TARGET_EXISTS:Arrow::arrow_shared>,Arrow::arrow_shared,Arrow::arrow_static>>"
237271
"$<IF:$<BOOL:${ARROW_VENDORED}>,iceberg::parquet_static,$<IF:$<TARGET_EXISTS:Parquet::parquet_shared>,Parquet::parquet_shared,Parquet::parquet_static>>"
238272
"$<IF:$<BOOL:${AVRO_VENDORED}>,iceberg::avrocpp_s,$<IF:$<TARGET_EXISTS:avro-cpp::avrocpp_shared>,avro-cpp::avrocpp_shared,avro-cpp::avrocpp_static>>"

src/iceberg/arrow_c_data_guard_internal.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,11 @@
2222
#include <nanoarrow/nanoarrow.h>
2323

2424
#include "iceberg/arrow_c_data.h"
25+
#include "iceberg/iceberg_export.h"
2526

2627
namespace iceberg::internal {
2728

28-
class ArrowArrayGuard {
29+
class ICEBERG_EXPORT ArrowArrayGuard {
2930
public:
3031
explicit ArrowArrayGuard(ArrowArray* array) : array_(array) {}
3132
~ArrowArrayGuard();
@@ -34,7 +35,7 @@ class ArrowArrayGuard {
3435
ArrowArray* array_;
3536
};
3637

37-
class ArrowSchemaGuard {
38+
class ICEBERG_EXPORT ArrowSchemaGuard {
3839
public:
3940
explicit ArrowSchemaGuard(ArrowSchema* schema) : schema_(schema) {}
4041
~ArrowSchemaGuard();
@@ -43,7 +44,7 @@ class ArrowSchemaGuard {
4344
ArrowSchema* schema_;
4445
};
4546

46-
class ArrowArrayViewGuard {
47+
class ICEBERG_EXPORT ArrowArrayViewGuard {
4748
public:
4849
explicit ArrowArrayViewGuard(ArrowArrayView* view) : view_(view) {}
4950
~ArrowArrayViewGuard();
@@ -52,7 +53,7 @@ class ArrowArrayViewGuard {
5253
ArrowArrayView* view_;
5354
};
5455

55-
class ArrowArrayBufferGuard {
56+
class ICEBERG_EXPORT ArrowArrayBufferGuard {
5657
public:
5758
explicit ArrowArrayBufferGuard(ArrowBuffer* buffer) : buffer_(buffer) {}
5859
~ArrowArrayBufferGuard();

src/iceberg/data/data_writer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@
3131
#include "iceberg/arrow_c_data.h"
3232
#include "iceberg/data/writer.h"
3333
#include "iceberg/file_format.h"
34-
#include "iceberg/iceberg_export.h"
34+
#include "iceberg/iceberg_data_export.h"
3535
#include "iceberg/result.h"
3636
#include "iceberg/row/partition_values.h"
3737
#include "iceberg/type_fwd.h"
3838

3939
namespace iceberg {
4040

4141
/// \brief Options for creating a DataWriter.
42-
struct ICEBERG_EXPORT DataWriterOptions {
42+
struct ICEBERG_DATA_EXPORT DataWriterOptions {
4343
std::string path;
4444
std::shared_ptr<Schema> schema;
4545
std::shared_ptr<PartitionSpec> spec;
@@ -51,7 +51,7 @@ struct ICEBERG_EXPORT DataWriterOptions {
5151
};
5252

5353
/// \brief Writer for Iceberg data files.
54-
class ICEBERG_EXPORT DataWriter : public FileWriter {
54+
class ICEBERG_DATA_EXPORT DataWriter : public FileWriter {
5555
public:
5656
~DataWriter() override;
5757

src/iceberg/data/delete_loader.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
#include <span>
2727
#include <string_view>
2828

29-
#include "iceberg/iceberg_export.h"
29+
#include "iceberg/iceberg_data_export.h"
3030
#include "iceberg/result.h"
3131
#include "iceberg/type_fwd.h"
3232

3333
namespace iceberg {
3434

3535
/// \brief Loads delete files and constructs in-memory delete indexes.
36-
class ICEBERG_EXPORT DeleteLoader {
36+
class ICEBERG_DATA_EXPORT DeleteLoader {
3737
public:
3838
/// \brief Create a DeleteLoader.
3939
/// \param io FileIO instance for reading delete files

src/iceberg/data/equality_delete_writer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@
3232
#include "iceberg/arrow_c_data.h"
3333
#include "iceberg/data/writer.h"
3434
#include "iceberg/file_format.h"
35-
#include "iceberg/iceberg_export.h"
35+
#include "iceberg/iceberg_data_export.h"
3636
#include "iceberg/result.h"
3737
#include "iceberg/row/partition_values.h"
3838
#include "iceberg/type_fwd.h"
3939

4040
namespace iceberg {
4141

4242
/// \brief Options for creating an EqualityDeleteWriter.
43-
struct ICEBERG_EXPORT EqualityDeleteWriterOptions {
43+
struct ICEBERG_DATA_EXPORT EqualityDeleteWriterOptions {
4444
std::string path;
4545
std::shared_ptr<Schema> schema;
4646
std::shared_ptr<PartitionSpec> spec;
@@ -54,7 +54,7 @@ struct ICEBERG_EXPORT EqualityDeleteWriterOptions {
5454
};
5555

5656
/// \brief Writer for Iceberg equality delete files.
57-
class ICEBERG_EXPORT EqualityDeleteWriter : public FileWriter {
57+
class ICEBERG_DATA_EXPORT EqualityDeleteWriter : public FileWriter {
5858
public:
5959
~EqualityDeleteWriter() override;
6060

src/iceberg/data/meson.build

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
install_headers(
19+
[
20+
'data_writer.h',
21+
'delete_loader.h',
22+
'equality_delete_writer.h',
23+
'position_delete_writer.h',
24+
'writer.h',
25+
],
26+
subdir: 'iceberg/data',
27+
)

src/iceberg/data/position_delete_writer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,15 @@
3131
#include "iceberg/arrow_c_data.h"
3232
#include "iceberg/data/writer.h"
3333
#include "iceberg/file_format.h"
34-
#include "iceberg/iceberg_export.h"
34+
#include "iceberg/iceberg_data_export.h"
3535
#include "iceberg/result.h"
3636
#include "iceberg/row/partition_values.h"
3737
#include "iceberg/type_fwd.h"
3838

3939
namespace iceberg {
4040

4141
/// \brief Options for creating a PositionDeleteWriter.
42-
struct ICEBERG_EXPORT PositionDeleteWriterOptions {
42+
struct ICEBERG_DATA_EXPORT PositionDeleteWriterOptions {
4343
std::string path;
4444
std::shared_ptr<Schema> schema;
4545
std::shared_ptr<PartitionSpec> spec;
@@ -51,7 +51,7 @@ struct ICEBERG_EXPORT PositionDeleteWriterOptions {
5151
};
5252

5353
/// \brief Writer for Iceberg position delete files.
54-
class ICEBERG_EXPORT PositionDeleteWriter : public FileWriter {
54+
class ICEBERG_DATA_EXPORT PositionDeleteWriter : public FileWriter {
5555
public:
5656
~PositionDeleteWriter() override;
5757

src/iceberg/data/writer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@
2727
#include <vector>
2828

2929
#include "iceberg/arrow_c_data.h"
30-
#include "iceberg/iceberg_export.h"
30+
#include "iceberg/iceberg_data_export.h"
3131
#include "iceberg/result.h"
3232
#include "iceberg/type_fwd.h"
3333

3434
namespace iceberg {
3535

3636
/// \brief Base interface for data file writers.
37-
class ICEBERG_EXPORT FileWriter {
37+
class ICEBERG_DATA_EXPORT FileWriter {
3838
public:
3939
virtual ~FileWriter();
4040

@@ -49,7 +49,7 @@ class ICEBERG_EXPORT FileWriter {
4949
virtual Status Close() = 0;
5050

5151
/// \brief File metadata for all files produced by this writer.
52-
struct ICEBERG_EXPORT WriteResult {
52+
struct ICEBERG_DATA_EXPORT WriteResult {
5353
/// Usually a writer produces a single data or delete file.
5454
/// Position delete writer may produce multiple file-scoped delete files.
5555
/// In the future, multiple files can be produced if file rolling is supported.

src/iceberg/deletes/position_delete_index.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include <memory>
2727

2828
#include "iceberg/deletes/roaring_position_bitmap.h"
29-
#include "iceberg/iceberg_export.h"
29+
#include "iceberg/iceberg_data_export.h"
3030

3131
namespace iceberg {
3232

@@ -35,7 +35,7 @@ namespace iceberg {
3535
/// This class provides a domain-specific API for position deletes
3636
/// in Iceberg MOR (merge-on-read) tables. Positions are 0-based
3737
/// row indices within a data file.
38-
class ICEBERG_EXPORT PositionDeleteIndex {
38+
class ICEBERG_DATA_EXPORT PositionDeleteIndex {
3939
public:
4040
PositionDeleteIndex() = default;
4141
~PositionDeleteIndex() = default;

0 commit comments

Comments
 (0)