Skip to content

Commit 5e37656

Browse files
committed
[core] Introduce BLOB_REF for shared blob data
1 parent 2b557d4 commit 5e37656

File tree

50 files changed

+1394
-39
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1394
-39
lines changed

paimon-api/src/main/java/org/apache/paimon/CoreOptions.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2254,6 +2254,14 @@ public InlineElement getDescription() {
22542254
"Specifies column names that should be stored as blob type. "
22552255
+ "This is used when you want to treat a BYTES column as a BLOB.");
22562256

2257+
public static final ConfigOption<String> BLOB_REF_FIELD =
2258+
key("blob-ref-field")
2259+
.stringType()
2260+
.noDefaultValue()
2261+
.withDescription(
2262+
"Specifies column names that should be stored as blob reference type. "
2263+
+ "This is used when you want to treat a BYTES column as a BLOB_REF.");
2264+
22572265
@Immutable
22582266
public static final ConfigOption<String> BLOB_DESCRIPTOR_FIELD =
22592267
key("blob-descriptor-field")
@@ -2935,7 +2943,13 @@ public Set<String> blobExternalStorageField() {
29352943
* subset of descriptor fields and therefore are also updatable.
29362944
*/
29372945
public Set<String> updatableBlobFields() {
2938-
return blobDescriptorField();
2946+
Set<String> fields = new HashSet<>(blobDescriptorField());
2947+
fields.addAll(blobRefField());
2948+
return fields;
2949+
}
2950+
2951+
public Set<String> blobRefField() {
2952+
return parseCommaSeparatedSet(BLOB_REF_FIELD);
29392953
}
29402954

29412955
/**
@@ -3274,6 +3288,15 @@ public static List<String> blobField(Map<String, String> options) {
32743288
return Arrays.stream(string.split(",")).map(String::trim).collect(Collectors.toList());
32753289
}
32763290

3291+
public static List<String> blobRefField(Map<String, String> options) {
3292+
String string = options.get(BLOB_REF_FIELD.key());
3293+
if (string == null) {
3294+
return Collections.emptyList();
3295+
}
3296+
3297+
return Arrays.stream(string.split(",")).map(String::trim).collect(Collectors.toList());
3298+
}
3299+
32773300
public boolean sequenceFieldSortOrderIsAscending() {
32783301
return options.get(SEQUENCE_FIELD_SORT_ORDER) == SortOrder.ASCENDING;
32793302
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.types;
20+
21+
import org.apache.paimon.annotation.Public;
22+
23+
/**
24+
* Data type of blob reference.
25+
*
26+
* <p>{@link BlobRefType} stores reference bytes inline in data files instead of writing payloads to
27+
* Paimon-managed {@code .blob} files.
28+
*
29+
* @since 1.5.0
30+
*/
31+
@Public
32+
public final class BlobRefType extends DataType {
33+
34+
private static final long serialVersionUID = 1L;
35+
36+
private static final String FORMAT = "BLOB_REF";
37+
38+
public BlobRefType(boolean isNullable) {
39+
super(isNullable, DataTypeRoot.BLOB_REF);
40+
}
41+
42+
public BlobRefType() {
43+
this(true);
44+
}
45+
46+
@Override
47+
public int defaultSize() {
48+
return BlobType.DEFAULT_SIZE;
49+
}
50+
51+
@Override
52+
public DataType copy(boolean isNullable) {
53+
return new BlobRefType(isNullable);
54+
}
55+
56+
@Override
57+
public String asSQLString() {
58+
return withNullability(FORMAT);
59+
}
60+
61+
@Override
62+
public <R> R accept(DataTypeVisitor<R> visitor) {
63+
return visitor.visit(this);
64+
}
65+
}

paimon-api/src/main/java/org/apache/paimon/types/DataTypeDefaultVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ public R visit(BlobType blobType) {
119119
return defaultMethod(blobType);
120120
}
121121

122+
@Override
123+
public R visit(BlobRefType blobRefType) {
124+
return defaultMethod(blobRefType);
125+
}
126+
122127
@Override
123128
public R visit(ArrayType arrayType) {
124129
return defaultMethod(arrayType);

paimon-api/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ private enum Keyword {
331331
LEGACY,
332332
VARIANT,
333333
BLOB,
334+
BLOB_REF,
334335
NOT
335336
}
336337

@@ -549,6 +550,8 @@ private DataType parseTypeByKeyword() {
549550
return new VariantType();
550551
case BLOB:
551552
return new BlobType();
553+
case BLOB_REF:
554+
return new BlobRefType();
552555
case VECTOR:
553556
return parseVectorType();
554557
default:

paimon-api/src/main/java/org/apache/paimon/types/DataTypeRoot.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ public enum DataTypeRoot {
104104

105105
BLOB(DataTypeFamily.PREDEFINED),
106106

107+
BLOB_REF(DataTypeFamily.PREDEFINED),
108+
107109
ARRAY(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),
108110

109111
VECTOR(DataTypeFamily.CONSTRUCTED, DataTypeFamily.COLLECTION),

paimon-api/src/main/java/org/apache/paimon/types/DataTypeVisitor.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ public interface DataTypeVisitor<R> {
6666

6767
R visit(BlobType blobType);
6868

69+
R visit(BlobRefType blobRefType);
70+
6971
R visit(ArrayType arrayType);
7072

7173
R visit(VectorType vectorType);

paimon-api/src/main/java/org/apache/paimon/types/DataTypes.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ public static BlobType BLOB() {
163163
return new BlobType();
164164
}
165165

166+
public static BlobRefType BLOB_REF() {
167+
return new BlobRefType();
168+
}
169+
166170
public static OptionalInt getPrecision(DataType dataType) {
167171
return dataType.accept(PRECISION_EXTRACTOR);
168172
}

paimon-arrow/src/main/java/org/apache/paimon/arrow/ArrowFieldTypeConversion.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.paimon.types.ArrayType;
2222
import org.apache.paimon.types.BigIntType;
2323
import org.apache.paimon.types.BinaryType;
24+
import org.apache.paimon.types.BlobRefType;
2425
import org.apache.paimon.types.BlobType;
2526
import org.apache.paimon.types.BooleanType;
2627
import org.apache.paimon.types.CharType;
@@ -163,6 +164,11 @@ public FieldType visit(BlobType blobType) {
163164
throw new UnsupportedOperationException();
164165
}
165166

167+
@Override
168+
public FieldType visit(BlobRefType blobRefType) {
169+
throw new UnsupportedOperationException();
170+
}
171+
166172
private TimeUnit getTimeUnit(int precision) {
167173
if (precision == 0) {
168174
return TimeUnit.SECOND;

paimon-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.paimon.types.ArrayType;
4848
import org.apache.paimon.types.BigIntType;
4949
import org.apache.paimon.types.BinaryType;
50+
import org.apache.paimon.types.BlobRefType;
5051
import org.apache.paimon.types.BlobType;
5152
import org.apache.paimon.types.BooleanType;
5253
import org.apache.paimon.types.CharType;
@@ -447,6 +448,11 @@ public Arrow2PaimonVectorConverter visit(BlobType blobType) {
447448
throw new UnsupportedOperationException();
448449
}
449450

451+
@Override
452+
public Arrow2PaimonVectorConverter visit(BlobRefType blobRefType) {
453+
throw new UnsupportedOperationException();
454+
}
455+
450456
@Override
451457
public Arrow2PaimonVectorConverter visit(ArrayType arrayType) {
452458
final Arrow2PaimonVectorConverter arrowVectorConvertor =

paimon-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriterFactoryVisitor.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.paimon.types.ArrayType;
2222
import org.apache.paimon.types.BigIntType;
2323
import org.apache.paimon.types.BinaryType;
24+
import org.apache.paimon.types.BlobRefType;
2425
import org.apache.paimon.types.BlobType;
2526
import org.apache.paimon.types.BooleanType;
2627
import org.apache.paimon.types.CharType;
@@ -156,6 +157,11 @@ public ArrowFieldWriterFactory visit(BlobType blobType) {
156157
throw new UnsupportedOperationException("Doesn't support BlobType.");
157158
}
158159

160+
@Override
161+
public ArrowFieldWriterFactory visit(BlobRefType blobRefType) {
162+
throw new UnsupportedOperationException("Doesn't support BlobRefType.");
163+
}
164+
159165
@Override
160166
public ArrowFieldWriterFactory visit(ArrayType arrayType) {
161167
ArrowFieldWriterFactory elementWriterFactory = arrayType.getElementType().accept(this);

0 commit comments

Comments
 (0)