Skip to content

Commit 2a0e7d1

Browse files
andygroveclaude
andcommitted
feat: add @IcebergApi annotation to mark public API used by Iceberg
Add a custom Java annotation @IcebergApi to mark all classes, methods, constructors, and fields that form the public API used by Apache Iceberg. This makes it easy to identify which APIs need backward compatibility considerations when making changes. The annotation is applied to: - org.apache.comet.parquet: FileReader, RowGroupReader, ReadOptions, WrappedInputFile, ParquetColumnSpec, AbstractColumnReader, ColumnReader, BatchReader, MetadataColumnReader, ConstantColumnReader, Native, TypeUtil, Utils - org.apache.comet: CometSchemaImporter - org.apache.comet.vector: CometVector Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent cfef6c8 commit 2a0e7d1

17 files changed

Lines changed: 135 additions & 0 deletions

common/src/main/java/org/apache/comet/CometSchemaImporter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
import org.apache.arrow.memory.BufferAllocator;
2424

2525
/** This is a simple wrapper around SchemaImporter to make it accessible from Java Arrow. */
26+
@IcebergApi
2627
public class CometSchemaImporter extends AbstractCometSchemaImporter {
28+
@IcebergApi
2729
public CometSchemaImporter(BufferAllocator allocator) {
2830
super(allocator);
2931
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.comet;
21+
22+
import java.lang.annotation.Documented;
23+
import java.lang.annotation.ElementType;
24+
import java.lang.annotation.Retention;
25+
import java.lang.annotation.RetentionPolicy;
26+
import java.lang.annotation.Target;
27+
28+
/**
29+
* Indicates that the annotated element is part of the public API used by Apache Iceberg.
30+
*
31+
* <p>This annotation marks classes, methods, constructors, and fields that form the contract
32+
* between Comet and Iceberg. Changes to these APIs may break Iceberg's Comet integration,
33+
* so contributors should exercise caution and consider backward compatibility when modifying
34+
* annotated elements.
35+
*
36+
* <p>The Iceberg integration uses Comet's native Parquet reader for accelerated vectorized reads.
37+
* See the contributor guide documentation for details on how Iceberg uses these APIs.
38+
*
39+
* @see <a href="https://iceberg.apache.org/">Apache Iceberg</a>
40+
*/
41+
@Documented
42+
@Retention(RetentionPolicy.RUNTIME)
43+
@Target({ElementType.TYPE, ElementType.METHOD, ElementType.CONSTRUCTOR, ElementType.FIELD})
44+
public @interface IcebergApi {}

common/src/main/java/org/apache/comet/parquet/AbstractColumnReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import org.slf4j.Logger;
2325
import org.slf4j.LoggerFactory;
2426

@@ -31,6 +33,7 @@
3133
import org.apache.comet.vector.CometVector;
3234

3335
/** Base class for Comet Parquet column reader implementations. */
36+
@IcebergApi
3437
public abstract class AbstractColumnReader implements AutoCloseable {
3538
protected static final Logger LOG = LoggerFactory.getLogger(AbstractColumnReader.class);
3639

@@ -96,6 +99,7 @@ String getPath() {
9699
/**
97100
* Set the batch size of this reader to be 'batchSize'. Also initializes the native column reader.
98101
*/
102+
@IcebergApi
99103
public void setBatchSize(int batchSize) {
100104
assert nativeHandle == 0
101105
: "Native column reader shouldn't be initialized before " + "'setBatchSize' is called";
@@ -113,6 +117,7 @@ public void setBatchSize(int batchSize) {
113117
/** Returns the {@link CometVector} read by this reader. */
114118
public abstract CometVector currentBatch();
115119

120+
@IcebergApi
116121
@Override
117122
public void close() {
118123
if (nativeHandle != 0) {

common/src/main/java/org/apache/comet/parquet/BatchReader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import java.io.Closeable;
2325
import java.io.IOException;
2426
import java.net.URI;
@@ -87,6 +89,7 @@
8789
* }
8890
* </pre>
8991
*/
92+
@IcebergApi
9093
public class BatchReader extends RecordReader<Void, ColumnarBatch> implements Closeable {
9194
private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
9295
protected static final BufferAllocator ALLOCATOR = new RootAllocator();
@@ -189,6 +192,7 @@ public BatchReader(
189192
* @deprecated since 0.10.0, will be removed in 0.11.0.
190193
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
191194
*/
195+
@IcebergApi
192196
public BatchReader(AbstractColumnReader[] columnReaders) {
193197
// Todo: set useDecimal128 and useLazyMaterialization
194198
int numColumns = columnReaders.length;
@@ -387,6 +391,7 @@ public void init() throws URISyntaxException, IOException {
387391
* @deprecated since 0.10.0, will be removed in 0.11.0.
388392
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
389393
*/
394+
@IcebergApi
390395
public void setSparkSchema(StructType schema) {
391396
this.sparkSchema = schema;
392397
}
@@ -395,6 +400,7 @@ public void setSparkSchema(StructType schema) {
395400
* @deprecated since 0.10.0, will be removed in 0.11.0.
396401
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
397402
*/
403+
@IcebergApi
398404
public AbstractColumnReader[] getColumnReaders() {
399405
return columnReaders;
400406
}
@@ -498,6 +504,7 @@ public boolean nextBatch() throws IOException {
498504
return nextBatch(batchSize);
499505
}
500506

507+
@IcebergApi
501508
public boolean nextBatch(int batchSize) {
502509
long totalDecodeTime = 0, totalLoadTime = 0;
503510
for (int i = 0; i < columnReaders.length; i++) {

common/src/main/java/org/apache/comet/parquet/ColumnReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import java.io.IOException;
2325
import java.nio.ByteBuffer;
2426

@@ -50,6 +52,7 @@
5052
import org.apache.comet.vector.CometPlainVector;
5153
import org.apache.comet.vector.CometVector;
5254

55+
@IcebergApi
5356
public class ColumnReader extends AbstractColumnReader {
5457
protected static final Logger LOG = LoggerFactory.getLogger(ColumnReader.class);
5558
protected final BufferAllocator ALLOCATOR = new RootAllocator();
@@ -114,6 +117,7 @@ public class ColumnReader extends AbstractColumnReader {
114117
* @deprecated since 0.10.0, will be removed in 0.11.0.
115118
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
116119
*/
120+
@IcebergApi
117121
public void setPageReader(PageReader pageReader) throws IOException {
118122
this.pageReader = pageReader;
119123

@@ -129,6 +133,7 @@ public void setPageReader(PageReader pageReader) throws IOException {
129133
}
130134

131135
/** This method is called from Apache Iceberg. */
136+
@IcebergApi
132137
public void setRowGroupReader(RowGroupReader rowGroupReader, ParquetColumnSpec columnSpec)
133138
throws IOException {
134139
ColumnDescriptor descriptor = Utils.buildColumnDescriptor(columnSpec);

common/src/main/java/org/apache/comet/parquet/ConstantColumnReader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import java.math.BigInteger;
2325

2426
import org.apache.parquet.column.ColumnDescriptor;
@@ -31,6 +33,7 @@
3133
* A column reader that always return constant vectors. Used for reading partition columns, for
3234
* instance.
3335
*/
36+
@IcebergApi
3437
public class ConstantColumnReader extends MetadataColumnReader {
3538
/** Whether all the values in this constant column are nulls */
3639
private boolean isNull;
@@ -56,13 +59,15 @@ public class ConstantColumnReader extends MetadataColumnReader {
5659
* @deprecated since 0.10.0, will be removed in 0.11.0.
5760
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
5861
*/
62+
@IcebergApi
5963
public ConstantColumnReader(
6064
DataType type, ColumnDescriptor descriptor, Object value, boolean useDecimal128) {
6165
super(type, descriptor, useDecimal128, true);
6266
this.value = value;
6367
}
6468

6569
// Used by Iceberg
70+
@IcebergApi
6671
public ConstantColumnReader(
6772
DataType type, ParquetColumnSpec spec, Object value, boolean useDecimal128) {
6873
super(type, spec, useDecimal128, true);

common/src/main/java/org/apache/comet/parquet/FileReader.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import java.io.Closeable;
2325
import java.io.IOException;
2426
import java.io.InputStream;
@@ -101,6 +103,7 @@
101103
* A Parquet file reader. Mostly followed {@code ParquetFileReader} in {@code parquet-mr}, but with
102104
* customizations & optimizations for Comet.
103105
*/
106+
@IcebergApi
104107
public class FileReader implements Closeable {
105108
private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
106109

@@ -135,6 +138,7 @@ public class FileReader implements Closeable {
135138
}
136139

137140
/** This constructor is called from Apache Iceberg. */
141+
@IcebergApi
138142
public FileReader(
139143
WrappedInputFile file,
140144
ReadOptions cometOptions,
@@ -258,6 +262,7 @@ public void setRequestedSchema(List<ColumnDescriptor> projection) {
258262
}
259263

260264
/** This method is called from Apache Iceberg. */
265+
@IcebergApi
261266
public void setRequestedSchemaFromSpecs(List<ParquetColumnSpec> specList) {
262267
paths.clear();
263268
for (ParquetColumnSpec colSpec : specList) {
@@ -336,6 +341,7 @@ public long getFilteredRecordCount() {
336341
}
337342

338343
/** Skips the next row group. Returns false if there's no row group to skip. Otherwise, true. */
344+
@IcebergApi
339345
public boolean skipNextRowGroup() {
340346
return advanceToNextBlock();
341347
}
@@ -344,6 +350,7 @@ public boolean skipNextRowGroup() {
344350
* Returns the next row group to read (after applying row group filtering), or null if there's no
345351
* more row group.
346352
*/
353+
@IcebergApi
347354
public RowGroupReader readNextRowGroup() throws IOException {
348355
if (currentBlock == blocks.size()) {
349356
return null;
@@ -864,6 +871,7 @@ public void closeStream() throws IOException {
864871
}
865872
}
866873

874+
@IcebergApi
867875
@Override
868876
public void close() throws IOException {
869877
try {

common/src/main/java/org/apache/comet/parquet/MetadataColumnReader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import org.apache.arrow.c.ArrowArray;
2325
import org.apache.arrow.c.ArrowSchema;
2426
import org.apache.arrow.c.Data;
@@ -32,6 +34,7 @@
3234
import org.apache.comet.vector.CometVector;
3335

3436
/** A metadata column reader that can be extended by {@link RowIndexColumnReader} etc. */
37+
@IcebergApi
3538
public class MetadataColumnReader extends AbstractColumnReader {
3639
private final BufferAllocator allocator = new RootAllocator();
3740

@@ -46,6 +49,7 @@ public class MetadataColumnReader extends AbstractColumnReader {
4649
* @deprecated since 0.10.0, will be made package private in 0.11.0.
4750
* @see <a href="https://github.com/apache/datafusion-comet/issues/2079">Comet Issue #2079</a>
4851
*/
52+
@IcebergApi
4953
public MetadataColumnReader(
5054
DataType type, ColumnDescriptor descriptor, boolean useDecimal128, boolean isConstant) {
5155
// TODO: should we handle legacy dates & timestamps for metadata columns?
@@ -55,6 +59,7 @@ public MetadataColumnReader(
5559
}
5660

5761
// Used by Iceberg
62+
@IcebergApi
5863
public MetadataColumnReader(
5964
DataType type, ParquetColumnSpec spec, boolean useDecimal128, boolean isConstant) {
6065
// TODO: should we handle legacy dates & timestamps for metadata columns?
@@ -69,6 +74,7 @@ public void setBatchSize(int batchSize) {
6974
super.setBatchSize(batchSize);
7075
}
7176

77+
@IcebergApi
7278
@Override
7379
public void readBatch(int total) {
7480
if (vector == null) {
@@ -90,6 +96,7 @@ void setNumNulls(int total) {
9096
vector.setNumNulls(total);
9197
}
9298

99+
@IcebergApi
93100
@Override
94101
public CometVector currentBatch() {
95102
return vector;

common/src/main/java/org/apache/comet/parquet/Native.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.apache.comet.parquet;
2121

22+
import org.apache.comet.IcebergApi;
23+
2224
import java.nio.ByteBuffer;
2325
import java.util.Map;
2426

@@ -143,6 +145,7 @@ public static native void setPageV2(
143145
*
144146
* @param handle the handle to the native Parquet column reader
145147
*/
148+
@IcebergApi
146149
public static native void resetBatch(long handle);
147150

148151
/**
@@ -221,12 +224,14 @@ public static native void setPageV2(
221224
public static native void setDecimal(long handle, byte[] value);
222225

223226
/** Set position of row index vector for Iceberg Metadata Column */
227+
@IcebergApi
224228
public static native void setPosition(long handle, long value, int size);
225229

226230
/** Set row index vector for Spark row index metadata column and return vector size */
227231
public static native int setIndices(long handle, long offset, int size, long[] indices);
228232

229233
/** Set deleted info for Iceberg Metadata Column */
234+
@IcebergApi
230235
public static native void setIsDeleted(long handle, boolean[] isDeleted);
231236

232237
/**

0 commit comments

Comments
 (0)