Skip to content

Commit ec9fda0

Browse files
authored
refactor: adds StringColumnFormatSpec for string dimension configs (#19258)
1 parent 5f37b7b commit ec9fda0

14 files changed

Lines changed: 781 additions & 47 deletions

File tree

extensions-contrib/rabbit-stream-indexing-service/src/test/java/org/apache/druid/indexing/rabbitstream/RabbitStreamIndexTaskTuningConfigTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ public void testtoString() throws Exception
163163
"longEncoding=null, " +
164164
"complexMetricCompression=null, " +
165165
"autoColumnFormatSpec=null, " +
166+
"stringColumnFormatSpec=null, " +
166167
"jsonCompression=null, " +
167168
"segmentLoader=null" +
168169
"}, " +

processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java

Lines changed: 61 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,37 +23,21 @@
2323
import com.fasterxml.jackson.annotation.JsonIgnore;
2424
import com.fasterxml.jackson.annotation.JsonInclude;
2525
import com.fasterxml.jackson.annotation.JsonProperty;
26-
import org.apache.druid.error.DruidException;
27-
import org.apache.druid.guice.BuiltInTypesModule;
2826
import org.apache.druid.segment.DimensionHandler;
27+
import org.apache.druid.segment.IndexSpec;
28+
import org.apache.druid.segment.StringColumnFormatSpec;
2929
import org.apache.druid.segment.StringDimensionHandler;
3030
import org.apache.druid.segment.column.ColumnType;
3131

3232
import javax.annotation.Nullable;
33+
import java.util.Objects;
3334

3435
public class StringDimensionSchema extends DimensionSchema
3536
{
3637
private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true;
3738

3839
@Nullable
39-
public static Integer getDefaultMaxStringLength()
40-
{
41-
return BuiltInTypesModule.getMaxStringLength();
42-
}
43-
44-
@Nullable
45-
private static Integer validateMaxStringLength(String name, @Nullable Integer maxStringLength)
46-
{
47-
if (maxStringLength != null && maxStringLength < 0) {
48-
throw DruidException.forPersona(DruidException.Persona.USER)
49-
.ofCategory(DruidException.Category.INVALID_INPUT)
50-
.build("maxStringLength for column [%s] must be >= 0, got [%s]", name, maxStringLength);
51-
}
52-
return maxStringLength != null ? maxStringLength : getDefaultMaxStringLength();
53-
}
54-
55-
@Nullable
56-
private final Integer maxStringLength;
40+
private final StringColumnFormatSpec columnFormatSpec;
5741

5842
@JsonCreator
5943
public static StringDimensionSchema create(String name)
@@ -66,11 +50,11 @@ public StringDimensionSchema(
6650
@JsonProperty("name") String name,
6751
@JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling,
6852
@JsonProperty("createBitmapIndex") Boolean createBitmapIndex,
69-
@JsonProperty("maxStringLength") @Nullable Integer maxStringLength
53+
@JsonProperty("columnFormatSpec") @Nullable StringColumnFormatSpec columnFormatSpec
7054
)
7155
{
7256
super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex);
73-
this.maxStringLength = validateMaxStringLength(name, maxStringLength);
57+
this.columnFormatSpec = columnFormatSpec;
7458
}
7559

7660
public StringDimensionSchema(
@@ -87,12 +71,29 @@ public StringDimensionSchema(String name)
8771
this(name, null, DEFAULT_CREATE_BITMAP_INDEX, null);
8872
}
8973

74+
@Nullable
9075
@JsonProperty
9176
@JsonInclude(JsonInclude.Include.NON_NULL)
92-
@Nullable
93-
public Integer getMaxStringLength()
77+
public StringColumnFormatSpec getColumnFormatSpec()
78+
{
79+
return columnFormatSpec;
80+
}
81+
82+
@Override
83+
public DimensionSchema getEffectiveSchema(IndexSpec indexSpec)
9484
{
95-
return maxStringLength;
85+
// If there's no per-column or job-level string format config, nothing to resolve
86+
if (columnFormatSpec == null && indexSpec.getStringColumnFormatSpec() == null) {
87+
return this;
88+
}
89+
StringColumnFormatSpec effective =
90+
StringColumnFormatSpec.getEffectiveFormatSpec(columnFormatSpec, indexSpec);
91+
return new StringDimensionSchema(
92+
getName(),
93+
getMultiValueHandling(),
94+
hasBitmapIndex(),
95+
effective
96+
);
9697
}
9798

9899
@Override
@@ -117,6 +118,40 @@ public boolean canBeMultiValued()
117118
@Override
118119
public DimensionHandler getDimensionHandler()
119120
{
120-
return new StringDimensionHandler(getName(), getMultiValueHandling(), hasBitmapIndex(), false, maxStringLength);
121+
MultiValueHandling mvh = getMultiValueHandling();
122+
boolean bitmap = hasBitmapIndex();
123+
Integer maxStringLength = null;
124+
if (columnFormatSpec != null) {
125+
if (columnFormatSpec.getMultiValueHandling() != null) {
126+
mvh = columnFormatSpec.getMultiValueHandling();
127+
}
128+
if (columnFormatSpec.getIndexType() != null) {
129+
bitmap = columnFormatSpec.getIndexType().hasBitmapIndex();
130+
}
131+
maxStringLength = columnFormatSpec.getMaxStringLength();
132+
}
133+
return new StringDimensionHandler(getName(), mvh, bitmap, false, maxStringLength);
134+
}
135+
136+
@Override
137+
public boolean equals(Object o)
138+
{
139+
if (this == o) {
140+
return true;
141+
}
142+
if (o == null || getClass() != o.getClass()) {
143+
return false;
144+
}
145+
if (!super.equals(o)) {
146+
return false;
147+
}
148+
StringDimensionSchema that = (StringDimensionSchema) o;
149+
return Objects.equals(columnFormatSpec, that.columnFormatSpec);
150+
}
151+
152+
@Override
153+
public int hashCode()
154+
{
155+
return Objects.hash(super.hashCode(), columnFormatSpec);
121156
}
122157
}

processing/src/main/java/org/apache/druid/segment/IndexSpec.java

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ public static Builder builder()
7575
@Nullable
7676
private final NestedCommonFormatColumnFormatSpec autoColumnFormatSpec;
7777
@Nullable
78+
private final StringColumnFormatSpec stringColumnFormatSpec;
79+
@Nullable
7880
private final CompressionStrategy metadataCompression;
7981

8082
/**
@@ -109,6 +111,8 @@ public static Builder builder()
109111
* used to load the written segment
110112
* @param autoColumnFormatSpec specify the default {@link NestedCommonFormatColumnFormatSpec} to use for json and
111113
* auto columns. Defaults to null upon calling {@link #getEffectiveSpec()}.
114+
* @param stringColumnFormatSpec specify the default {@link StringColumnFormatSpec} to use for string columns.
115+
* Defaults to null upon calling {@link #getEffectiveSpec()}.
112116
*/
113117
@JsonCreator
114118
public IndexSpec(
@@ -121,7 +125,8 @@ public IndexSpec(
121125
@JsonProperty("complexMetricCompression") @Nullable CompressionStrategy complexMetricCompression,
122126
@Deprecated @JsonProperty("jsonCompression") @Nullable CompressionStrategy jsonCompression,
123127
@JsonProperty("segmentLoader") @Nullable SegmentizerFactory segmentLoader,
124-
@JsonProperty("autoColumnFormatSpec") @Nullable NestedCommonFormatColumnFormatSpec autoColumnFormatSpec
128+
@JsonProperty("autoColumnFormatSpec") @Nullable NestedCommonFormatColumnFormatSpec autoColumnFormatSpec,
129+
@JsonProperty("stringColumnFormatSpec") @Nullable StringColumnFormatSpec stringColumnFormatSpec
125130
)
126131
{
127132
this.bitmapSerdeFactory = bitmapSerdeFactory;
@@ -134,6 +139,7 @@ public IndexSpec(
134139
this.jsonCompression = jsonCompression;
135140
this.segmentLoader = segmentLoader;
136141
this.autoColumnFormatSpec = autoColumnFormatSpec;
142+
this.stringColumnFormatSpec = stringColumnFormatSpec;
137143
}
138144

139145
@JsonProperty("bitmap")
@@ -212,6 +218,14 @@ public NestedCommonFormatColumnFormatSpec getAutoColumnFormatSpec()
212218
return autoColumnFormatSpec;
213219
}
214220

221+
@JsonProperty
222+
@JsonInclude(JsonInclude.Include.NON_NULL)
223+
@Nullable
224+
public StringColumnFormatSpec getStringColumnFormatSpec()
225+
{
226+
return stringColumnFormatSpec;
227+
}
228+
215229
/**
216230
* Populate all null fields of {@link IndexSpec}, first from {@link #getDefault()} and finally falling back to hard
217231
* coded defaults if no overrides are defined.
@@ -298,6 +312,16 @@ public IndexSpec getEffectiveSpec()
298312
);
299313
}
300314

315+
if (stringColumnFormatSpec != null) {
316+
bob.withStringColumnFormatSpec(
317+
StringColumnFormatSpec.getEffectiveFormatSpec(stringColumnFormatSpec, this)
318+
);
319+
} else if (defaultSpec.stringColumnFormatSpec != null) {
320+
bob.withStringColumnFormatSpec(
321+
StringColumnFormatSpec.getEffectiveFormatSpec(defaultSpec.stringColumnFormatSpec, this)
322+
);
323+
}
324+
301325
return bob.build();
302326
}
303327

@@ -320,7 +344,8 @@ public boolean equals(Object o)
320344
Objects.equals(complexMetricCompression, indexSpec.complexMetricCompression) &&
321345
Objects.equals(jsonCompression, indexSpec.jsonCompression) &&
322346
Objects.equals(segmentLoader, indexSpec.segmentLoader) &&
323-
Objects.equals(autoColumnFormatSpec, indexSpec.autoColumnFormatSpec);
347+
Objects.equals(autoColumnFormatSpec, indexSpec.autoColumnFormatSpec) &&
348+
Objects.equals(stringColumnFormatSpec, indexSpec.stringColumnFormatSpec);
324349
}
325350

326351
@Override
@@ -336,7 +361,8 @@ public int hashCode()
336361
complexMetricCompression,
337362
jsonCompression,
338363
segmentLoader,
339-
autoColumnFormatSpec
364+
autoColumnFormatSpec,
365+
stringColumnFormatSpec
340366
);
341367
}
342368

@@ -352,6 +378,7 @@ public String toString()
352378
", longEncoding=" + longEncoding +
353379
", complexMetricCompression=" + complexMetricCompression +
354380
", autoColumnFormatSpec=" + autoColumnFormatSpec +
381+
", stringColumnFormatSpec=" + stringColumnFormatSpec +
355382
", jsonCompression=" + jsonCompression +
356383
", segmentLoader=" + segmentLoader +
357384
'}';
@@ -379,6 +406,8 @@ public static class Builder
379406
private SegmentizerFactory segmentLoader;
380407
@Nullable
381408
private NestedCommonFormatColumnFormatSpec autoColumnFormatSpec;
409+
@Nullable
410+
private StringColumnFormatSpec stringColumnFormatSpec;
382411

383412
public Builder withBitmapSerdeFactory(@Nullable BitmapSerdeFactory bitmapSerdeFactory)
384413
{
@@ -441,6 +470,12 @@ public Builder withAutoColumnFormatSpec(@Nullable NestedCommonFormatColumnFormat
441470
return this;
442471
}
443472

473+
public Builder withStringColumnFormatSpec(@Nullable StringColumnFormatSpec stringColumnFormatSpec)
474+
{
475+
this.stringColumnFormatSpec = stringColumnFormatSpec;
476+
return this;
477+
}
478+
444479
public IndexSpec build()
445480
{
446481
return new IndexSpec(
@@ -453,7 +488,8 @@ public IndexSpec build()
453488
complexMetricCompression,
454489
jsonCompression,
455490
segmentLoader,
456-
autoColumnFormatSpec
491+
autoColumnFormatSpec,
492+
stringColumnFormatSpec
457493
);
458494
}
459495
}

0 commit comments

Comments
 (0)