Skip to content

Commit 2c53592

Browse files
shangxinliwgtmac
andauthored
feat: add tentative data & delete writer api (#463)
Co-authored-by: Gang Wu <ustcwg@gmail.com>
1 parent deec370 commit 2c53592

File tree

9 files changed

+340
-218
lines changed

9 files changed

+340
-218
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
2020
set(ICEBERG_SOURCES
2121
arrow_c_data_guard_internal.cc
2222
catalog/memory/in_memory_catalog.cc
23+
data/data_writer.cc
24+
data/equality_delete_writer.cc
25+
data/position_delete_writer.cc
2326
data/writer.cc
2427
delete_file_index.cc
2528
expression/aggregate.cc

src/iceberg/data/data_writer.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/data/data_writer.h"
21+
22+
namespace iceberg {
23+
24+
class DataWriter::Impl {
25+
public:
26+
};
27+
28+
DataWriter::~DataWriter() = default;
29+
30+
Status DataWriter::Write(ArrowArray* data) { return NotImplemented(""); }
31+
32+
Result<int64_t> DataWriter::Length() const { return NotImplemented(""); }
33+
34+
Status DataWriter::Close() { return NotImplemented(""); }
35+
36+
Result<FileWriter::WriteResult> DataWriter::Metadata() { return NotImplemented(""); }
37+
38+
} // namespace iceberg

src/iceberg/data/data_writer.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/data/data_writer.h
23+
/// Data writer for Iceberg tables.
24+
25+
#include <cstdint>
26+
#include <memory>
27+
#include <optional>
28+
#include <string>
29+
#include <unordered_map>
30+
31+
#include "iceberg/arrow_c_data.h"
32+
#include "iceberg/data/writer.h"
33+
#include "iceberg/file_format.h"
34+
#include "iceberg/iceberg_export.h"
35+
#include "iceberg/result.h"
36+
#include "iceberg/row/partition_values.h"
37+
#include "iceberg/type_fwd.h"
38+
39+
namespace iceberg {
40+
41+
/// \brief Options for creating a DataWriter.
42+
struct ICEBERG_EXPORT DataWriterOptions {
43+
std::string path;
44+
std::shared_ptr<Schema> schema;
45+
std::shared_ptr<PartitionSpec> spec;
46+
PartitionValues partition;
47+
FileFormatType format = FileFormatType::kParquet;
48+
std::shared_ptr<FileIO> io;
49+
std::optional<int32_t> sort_order_id;
50+
std::unordered_map<std::string, std::string> properties;
51+
};
52+
53+
/// \brief Writer for Iceberg data files.
54+
class ICEBERG_EXPORT DataWriter : public FileWriter {
55+
public:
56+
~DataWriter() override;
57+
58+
Status Write(ArrowArray* data) override;
59+
Result<int64_t> Length() const override;
60+
Status Close() override;
61+
Result<WriteResult> Metadata() override;
62+
63+
private:
64+
class Impl;
65+
std::unique_ptr<Impl> impl_;
66+
};
67+
68+
} // namespace iceberg
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/data/equality_delete_writer.h"
21+
22+
namespace iceberg {
23+
24+
class EqualityDeleteWriter::Impl {
25+
public:
26+
};
27+
28+
EqualityDeleteWriter::~EqualityDeleteWriter() = default;
29+
30+
Status EqualityDeleteWriter::Write(ArrowArray* data) { return NotImplemented(""); }
31+
32+
Result<int64_t> EqualityDeleteWriter::Length() const { return NotImplemented(""); }
33+
34+
Status EqualityDeleteWriter::Close() { return NotImplemented(""); }
35+
36+
Result<FileWriter::WriteResult> EqualityDeleteWriter::Metadata() {
37+
return NotImplemented("");
38+
}
39+
40+
std::span<const int32_t> EqualityDeleteWriter::equality_field_ids() const { return {}; }
41+
42+
} // namespace iceberg
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/data/equality_delete_writer.h
23+
/// Equality delete writer for Iceberg tables.
24+
25+
#include <cstdint>
26+
#include <memory>
27+
#include <optional>
28+
#include <span>
29+
#include <string>
30+
#include <unordered_map>
31+
32+
#include "iceberg/arrow_c_data.h"
33+
#include "iceberg/data/writer.h"
34+
#include "iceberg/file_format.h"
35+
#include "iceberg/iceberg_export.h"
36+
#include "iceberg/result.h"
37+
#include "iceberg/row/partition_values.h"
38+
#include "iceberg/type_fwd.h"
39+
40+
namespace iceberg {
41+
42+
/// \brief Options for creating an EqualityDeleteWriter.
43+
struct ICEBERG_EXPORT EqualityDeleteWriterOptions {
44+
std::string path;
45+
std::shared_ptr<Schema> schema;
46+
std::shared_ptr<PartitionSpec> spec;
47+
PartitionValues partition;
48+
FileFormatType format = FileFormatType::kParquet;
49+
std::shared_ptr<FileIO> io;
50+
std::vector<int32_t> equality_field_ids;
51+
std::optional<int32_t> sort_order_id;
52+
std::unordered_map<std::string, std::string> properties;
53+
};
54+
55+
/// \brief Writer for Iceberg equality delete files.
56+
class ICEBERG_EXPORT EqualityDeleteWriter : public FileWriter {
57+
public:
58+
~EqualityDeleteWriter() override;
59+
60+
Status Write(ArrowArray* data) override;
61+
Result<int64_t> Length() const override;
62+
Status Close() override;
63+
Result<WriteResult> Metadata() override;
64+
65+
std::span<const int32_t> equality_field_ids() const;
66+
67+
private:
68+
class Impl;
69+
std::unique_ptr<Impl> impl_;
70+
};
71+
72+
} // namespace iceberg
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/data/position_delete_writer.h"
21+
22+
namespace iceberg {
23+
24+
class PositionDeleteWriter::Impl {
25+
public:
26+
};
27+
28+
PositionDeleteWriter::~PositionDeleteWriter() = default;
29+
30+
Status PositionDeleteWriter::Write(ArrowArray* data) { return NotImplemented(""); }
31+
32+
Status PositionDeleteWriter::WriteDelete(std::string_view file_path, int64_t pos) {
33+
return NotImplemented("");
34+
}
35+
36+
Result<int64_t> PositionDeleteWriter::Length() const { return NotImplemented(""); }
37+
38+
Status PositionDeleteWriter::Close() { return NotImplemented(""); }
39+
40+
Result<FileWriter::WriteResult> PositionDeleteWriter::Metadata() {
41+
return NotImplemented("");
42+
}
43+
44+
} // namespace iceberg
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/data/position_delete_writer.h
23+
/// Position delete writer for Iceberg tables.
24+
25+
#include <cstdint>
26+
#include <memory>
27+
#include <string>
28+
#include <string_view>
29+
#include <unordered_map>
30+
31+
#include "iceberg/arrow_c_data.h"
32+
#include "iceberg/data/writer.h"
33+
#include "iceberg/file_format.h"
34+
#include "iceberg/iceberg_export.h"
35+
#include "iceberg/result.h"
36+
#include "iceberg/row/partition_values.h"
37+
#include "iceberg/type_fwd.h"
38+
39+
namespace iceberg {
40+
41+
/// \brief Options for creating a PositionDeleteWriter.
42+
struct ICEBERG_EXPORT PositionDeleteWriterOptions {
43+
std::string path;
44+
std::shared_ptr<Schema> schema;
45+
std::shared_ptr<PartitionSpec> spec;
46+
PartitionValues partition;
47+
FileFormatType format = FileFormatType::kParquet;
48+
std::shared_ptr<FileIO> io;
49+
std::shared_ptr<Schema> row_schema; // Optional row data schema
50+
std::unordered_map<std::string, std::string> properties;
51+
};
52+
53+
/// \brief Writer for Iceberg position delete files.
54+
class ICEBERG_EXPORT PositionDeleteWriter : public FileWriter {
55+
public:
56+
~PositionDeleteWriter() override;
57+
58+
Status Write(ArrowArray* data) override;
59+
Status WriteDelete(std::string_view file_path, int64_t pos);
60+
Result<int64_t> Length() const override;
61+
Status Close() override;
62+
Result<WriteResult> Metadata() override;
63+
64+
private:
65+
class Impl;
66+
std::unique_ptr<Impl> impl_;
67+
};
68+
69+
} // namespace iceberg

0 commit comments

Comments
 (0)