Skip to content

Commit 3433bb5

Browse files
committed
Restore compatibility constructors and fix compression metadata rewrites
1 parent 3abe39d commit 3433bb5

14 files changed

Lines changed: 361 additions & 44 deletions

File tree

pinot-core/src/test/java/org/apache/pinot/core/data/manager/TableIndexingTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,8 +472,8 @@ public void testAddIndex(TestCase testCase) {
472472
throw new IllegalArgumentException("Unexpected index type " + indexType);
473473
}
474474

475-
config =
476-
new FieldConfig(field.getName(), encoding, null, indexTypes, null, tstmpConfig, indexes, properties, null);
475+
config = new FieldConfig.Builder(field.getName()).withEncodingType(encoding).withIndexTypes(indexTypes)
476+
.withTimestampConfig(tstmpConfig).withIndexes(indexes).withProperties(properties).build();
477477

478478
tableConfig.getFieldConfigList().add(config);
479479

pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunctionTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,8 @@ protected TableConfig getTableConfig() {
341341
ObjectNode indexNode = JsonNodeFactory.instance.objectNode();
342342
indexNode.put("json", jsonIndexProps);
343343
FieldConfig jsonFieldConfig =
344-
new FieldConfig(JSON_STRING_SV_COLUMN, FieldConfig.EncodingType.DICTIONARY, null, null, null, null, indexNode,
345-
null, null);
344+
new FieldConfig.Builder(JSON_STRING_SV_COLUMN).withEncodingType(FieldConfig.EncodingType.DICTIONARY)
345+
.withIndexes(indexNode).build();
346346
fieldConfigList.add(jsonFieldConfig);
347347
TableConfig tableConfig =
348348
new TableConfigBuilder(TableType.OFFLINE).setTableName("test").setTimeColumnName(TIME_COLUMN)

pinot-core/src/test/java/org/apache/pinot/queries/BaseFSTBasedRegexpLikeQueriesTest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ private static List<FieldConfig> getFieldConfigs(String indexType) {
9191

9292
// Create FieldConfig with the index configuration
9393
return List.of(
94-
new FieldConfig(DOMAIN_NAMES_COL, EncodingType.DICTIONARY, null, null, null, null, indexes, null, null),
95-
new FieldConfig(URL_COL, EncodingType.DICTIONARY, null, null, null, null, indexes, null, null));
94+
new FieldConfig.Builder(DOMAIN_NAMES_COL).withEncodingType(EncodingType.DICTIONARY).withIndexes(indexes)
95+
.build(),
96+
new FieldConfig.Builder(URL_COL).withEncodingType(EncodingType.DICTIONARY).withIndexes(indexes).build());
9697
} catch (Exception e) {
9798
throw new RuntimeException("Failed to create field configs", e);
9899
}

pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/TextIndicesTest.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,12 @@ protected List<FieldConfig> getFieldConfigs() {
9393
}
9494

9595
FieldConfig nullableTextConfig =
96-
new FieldConfig(TEXT_COLUMN_NULL_NAME, FieldConfig.EncodingType.RAW, null, null, null, null, textColumnIndexes,
97-
null,
98-
null);
96+
new FieldConfig.Builder(TEXT_COLUMN_NULL_NAME).withEncodingType(FieldConfig.EncodingType.RAW)
97+
.withIndexes(textColumnIndexes).build();
9998

10099
FieldConfig textColumnFieldConfig =
101-
new FieldConfig(TEXT_COLUMN_NAME, FieldConfig.EncodingType.RAW, null, null, null, null, textColumnIndexes, null,
102-
null);
100+
new FieldConfig.Builder(TEXT_COLUMN_NAME).withEncodingType(FieldConfig.EncodingType.RAW)
101+
.withIndexes(textColumnIndexes).build();
103102

104103
ObjectNode textColumnCaseSensitiveIndexes;
105104
try {
@@ -115,8 +114,8 @@ protected List<FieldConfig> getFieldConfigs() {
115114
throw new RuntimeException(e);
116115
}
117116
FieldConfig textColumnCaseSensitiveFieldConfig =
118-
new FieldConfig(TEXT_COLUMN_NAME_CASE_SENSITIVE, FieldConfig.EncodingType.RAW, null, null, null, null,
119-
textColumnCaseSensitiveIndexes, null, null);
117+
new FieldConfig.Builder(TEXT_COLUMN_NAME_CASE_SENSITIVE).withEncodingType(FieldConfig.EncodingType.RAW)
118+
.withIndexes(textColumnCaseSensitiveIndexes).build();
120119
return Arrays.asList(nullableTextConfig, textColumnFieldConfig, textColumnCaseSensitiveFieldConfig);
121120
}
122121

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/ForwardIndexHandler.java

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import org.apache.commons.configuration2.PropertiesConfiguration;
3434
import org.apache.commons.io.FileUtils;
3535
import org.apache.pinot.segment.local.io.util.PinotDataBitSet;
36-
import org.apache.pinot.segment.local.segment.creator.impl.BaseSegmentCreator;
3736
import org.apache.pinot.segment.local.segment.creator.impl.SegmentDictionaryCreator;
3837
import org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueVarByteRawIndexCreator;
3938
import org.apache.pinot.segment.local.segment.creator.impl.stats.AbstractColumnStatisticsCollector;
@@ -397,16 +396,23 @@ private boolean shouldChangeRawCompressionType(String column, SegmentDirectory.R
397396
// Get the new compression type.
398397
ForwardIndexConfig forwardIndexConfig = _fieldIndexConfigs.get(column).getConfig(StandardIndexes.forward());
399398
ChunkCompressionType newCompressionType = forwardIndexConfig.getChunkCompressionType();
400-
if (forwardIndexConfig.getCompressionCodecSpec() != null) {
401-
String newCompressionCodecSpec = forwardIndexConfig.getCompressionCodecSpec().toConfigString();
402-
String existingCompressionCodecSpec =
403-
metadataProperties.getString(getKeyFor(column, FORWARD_INDEX_COMPRESSION_CODEC), null);
404-
if (existingCompressionCodecSpec != null) {
405-
return !existingCompressionCodecSpec.equals(newCompressionCodecSpec);
399+
String existingCompressionCodecSpec =
400+
metadataProperties.getString(getKeyFor(column, FORWARD_INDEX_COMPRESSION_CODEC), null);
401+
if (forwardIndexConfig.getCompressionCodecSpec() != null
402+
&& forwardIndexConfig.getCompressionCodecSpec().hasLevel()) {
403+
// Segments written before explicit codec metadata was introduced have an unknown level. Rewrite them when the
404+
// table config now specifies an explicit level so the persisted forward index matches the configured codec spec.
405+
if (existingCompressionCodecSpec == null) {
406+
return true;
406407
}
407-
if (forwardIndexConfig.getCompressionCodecSpec().hasLevel()) {
408+
String newCompressionCodecSpec = forwardIndexConfig.getCompressionCodecSpec().toConfigString();
409+
if (!newCompressionCodecSpec.equals(existingCompressionCodecSpec)) {
408410
return true;
409411
}
412+
} else if (existingCompressionCodecSpec != null) {
413+
// Persisted codec specs are only used for explicit levels. If an existing segment has one and the new
414+
// configuration does not, rewrite the forward index to restore the plain codec behavior.
415+
return true;
410416
}
411417

412418
// Note that default compression type (PASS_THROUGH for metric and LZ4 for dimension) is not considered if the
@@ -476,12 +482,8 @@ private void rewriteForwardIndexForCompressionChange(String column, SegmentDirec
476482
// called during segmentWriter.close().
477483
segmentWriter.removeIndex(column, StandardIndexes.forward());
478484
LoaderUtils.writeIndexToV3Format(segmentWriter, column, fwdIndexFile, StandardIndexes.forward());
479-
Map<String, String> metadataProperties = new HashMap<>();
480-
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(metadataProperties, column,
485+
updateForwardIndexCompressionCodecMetadata(column,
481486
_fieldIndexConfigs.get(column).getConfig(StandardIndexes.forward()), hasDictionary);
482-
if (!metadataProperties.isEmpty()) {
483-
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory, metadataProperties);
484-
}
485487

486488
// Delete the marker file.
487489
FileUtils.deleteQuietly(inProgress);
@@ -919,6 +921,7 @@ private void createDictBasedForwardIndex(String column, SegmentDirectory.Writer
919921
metadataProperties.put(getKeyFor(column, CARDINALITY), String.valueOf(cardinality));
920922
metadataProperties.put(getKeyFor(column, BITS_PER_ELEMENT),
921923
String.valueOf(PinotDataBitSet.getNumBitsPerValue(cardinality - 1)));
924+
metadataProperties.put(getKeyFor(column, FORWARD_INDEX_COMPRESSION_CODEC), null);
922925
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory, metadataProperties);
923926

924927
// We remove indexes that have to be rewritten when a dictEnabled is toggled. Note that the respective index
@@ -971,14 +974,17 @@ private void disableDictionaryAndCreateRawForwardIndex(String column, SegmentDir
971974

972975
LOGGER.info("Created raw forwardIndex. Updating metadata properties for segment={} and column={}", segmentName,
973976
column);
974-
Map<String, String> metadataProperties = new HashMap<>();
975-
metadataProperties.put(getKeyFor(column, HAS_DICTIONARY), String.valueOf(false));
976-
metadataProperties.put(getKeyFor(column, DICTIONARY_ELEMENT_SIZE), String.valueOf(0));
977-
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(metadataProperties, column,
977+
PropertiesConfiguration metadataProperties =
978+
SegmentMetadataUtils.getPropertiesConfiguration(_segmentDirectory.getSegmentMetadata());
979+
metadataProperties.setProperty(getKeyFor(column, HAS_DICTIONARY), String.valueOf(false));
980+
metadataProperties.setProperty(getKeyFor(column, DICTIONARY_ELEMENT_SIZE), String.valueOf(0));
981+
updateForwardIndexCompressionCodecMetadata(metadataProperties, column,
978982
_fieldIndexConfigs.get(column).getConfig(StandardIndexes.forward()), false);
979983
// TODO: See https://github.com/apache/pinot/pull/16921 for details
980-
// metadataProperties.put(getKeyFor(column, BITS_PER_ELEMENT), String.valueOf(-1));
981-
SegmentMetadataUtils.updateMetadataProperties(_segmentDirectory, metadataProperties);
984+
// metadataProperties.setProperty(getKeyFor(column, BITS_PER_ELEMENT), String.valueOf(-1));
985+
SegmentMetadataUtils.savePropertiesConfiguration(metadataProperties, _segmentDirectory.getSegmentMetadata()
986+
.getIndexDir());
987+
_segmentDirectory.reloadMetadata();
982988

983989
// Remove range index, inverted index and FST index.
984990
removeDictRelatedIndexes(column, segmentWriter);
@@ -989,6 +995,28 @@ private void disableDictionaryAndCreateRawForwardIndex(String column, SegmentDir
989995
LOGGER.info("Created raw based forward index for segment: {}, column: {}", segmentName, column);
990996
}
991997

998+
private void updateForwardIndexCompressionCodecMetadata(String column,
999+
@Nullable ForwardIndexConfig forwardIndexConfig, boolean hasDictionary)
1000+
throws Exception {
1001+
PropertiesConfiguration metadataProperties =
1002+
SegmentMetadataUtils.getPropertiesConfiguration(_segmentDirectory.getSegmentMetadata());
1003+
updateForwardIndexCompressionCodecMetadata(metadataProperties, column, forwardIndexConfig, hasDictionary);
1004+
SegmentMetadataUtils.savePropertiesConfiguration(metadataProperties, _segmentDirectory.getSegmentMetadata()
1005+
.getIndexDir());
1006+
_segmentDirectory.reloadMetadata();
1007+
}
1008+
1009+
private static void updateForwardIndexCompressionCodecMetadata(PropertiesConfiguration metadataProperties,
1010+
String column, @Nullable ForwardIndexConfig forwardIndexConfig, boolean hasDictionary) {
1011+
String metadataKey = getKeyFor(column, FORWARD_INDEX_COMPRESSION_CODEC);
1012+
if (!hasDictionary && forwardIndexConfig != null && forwardIndexConfig.getCompressionCodecSpec() != null
1013+
&& forwardIndexConfig.getCompressionCodecSpec().hasLevel()) {
1014+
metadataProperties.setProperty(metadataKey, forwardIndexConfig.getCompressionCodecSpec().toConfigString());
1015+
} else {
1016+
metadataProperties.clearProperty(metadataKey);
1017+
}
1018+
}
1019+
9921020
private void rewriteDictToRawForwardIndex(ColumnMetadata columnMetadata, SegmentDirectory.Writer segmentWriter,
9931021
File indexDir)
9941022
throws Exception {

pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/InvertedIndexAndDictionaryBasedForwardIndexCreator.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,11 @@ private Map<String, String> createForwardIndexForSVColumn()
280280
writeToForwardIndex(dictionary, context);
281281

282282
// Setup and return the metadata properties to update
283+
Map<String, String> metadataProperties = new HashMap<>();
283284
if (_dictionaryEnabled) {
284-
return Map.of();
285+
metadataProperties.put(getKeyFor(_columnName, FORWARD_INDEX_COMPRESSION_CODEC), null);
286+
return metadataProperties;
285287
} else {
286-
Map<String, String> metadataProperties = new HashMap<>();
287288
metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY), String.valueOf(false));
288289
metadataProperties.put(getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE), String.valueOf(0));
289290
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(metadataProperties, _columnName, _forwardIndexConfig,
@@ -378,7 +379,9 @@ private Map<String, String> createForwardIndexForMVColumn()
378379
metadataProperties.put(getKeyFor(_columnName, MAX_MULTI_VALUE_ELEMENTS),
379380
String.valueOf(maxNumberOfMultiValues[0]));
380381
metadataProperties.put(getKeyFor(_columnName, TOTAL_NUMBER_OF_ENTRIES), String.valueOf(_nextValueId));
381-
if (!_dictionaryEnabled) {
382+
if (_dictionaryEnabled) {
383+
metadataProperties.put(getKeyFor(_columnName, FORWARD_INDEX_COMPRESSION_CODEC), null);
384+
} else {
382385
metadataProperties.put(getKeyFor(_columnName, HAS_DICTIONARY), String.valueOf(false));
383386
metadataProperties.put(getKeyFor(_columnName, DICTIONARY_ELEMENT_SIZE), String.valueOf(0));
384387
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(metadataProperties, _columnName, _forwardIndexConfig,
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.pinot.segment.local.segment.creator.impl;
20+
21+
import java.util.HashMap;
22+
import java.util.Map;
23+
import org.apache.commons.configuration2.PropertiesConfiguration;
24+
import org.apache.pinot.segment.spi.V1Constants;
25+
import org.apache.pinot.segment.spi.index.ForwardIndexConfig;
26+
import org.apache.pinot.spi.config.table.CompressionCodecSpec;
27+
import org.testng.annotations.Test;
28+
29+
import static org.testng.Assert.assertEquals;
30+
import static org.testng.Assert.assertFalse;
31+
32+
33+
/**
34+
* Unit tests for forward-index compression metadata helpers in {@link BaseSegmentCreator}.
35+
*/
36+
public class BaseSegmentCreatorTest {
37+
private static final String COLUMN = "rawCol";
38+
private static final String METADATA_KEY = V1Constants.MetadataKeys.Column.getKeyFor(COLUMN,
39+
V1Constants.MetadataKeys.Column.FORWARD_INDEX_COMPRESSION_CODEC);
40+
41+
@Test
42+
public void testAddsExplicitLeveledCompressionCodecMetadata() {
43+
ForwardIndexConfig forwardIndexConfig = new ForwardIndexConfig.Builder()
44+
.withCompressionCodecSpec(CompressionCodecSpec.fromString("zstd(3)"))
45+
.build();
46+
47+
PropertiesConfiguration properties = new PropertiesConfiguration();
48+
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(properties, COLUMN, forwardIndexConfig, false);
49+
assertEquals(properties.getString(METADATA_KEY), "ZSTANDARD(3)");
50+
51+
Map<String, String> metadataProperties = new HashMap<>();
52+
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(metadataProperties, COLUMN, forwardIndexConfig, false);
53+
assertEquals(metadataProperties.get(METADATA_KEY), "ZSTANDARD(3)");
54+
}
55+
56+
@Test
57+
public void testSkipsImplicitOrDictionaryCompressionCodecMetadata() {
58+
ForwardIndexConfig plainCodecConfig = new ForwardIndexConfig.Builder()
59+
.withCompressionCodecSpec(CompressionCodecSpec.fromString("LZ4"))
60+
.build();
61+
PropertiesConfiguration properties = new PropertiesConfiguration();
62+
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(properties, COLUMN, plainCodecConfig, false);
63+
assertFalse(properties.containsKey(METADATA_KEY));
64+
65+
Map<String, String> metadataProperties = new HashMap<>();
66+
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(metadataProperties, COLUMN, plainCodecConfig, false);
67+
assertFalse(metadataProperties.containsKey(METADATA_KEY));
68+
69+
ForwardIndexConfig leveledCodecConfig = new ForwardIndexConfig.Builder()
70+
.withCompressionCodecSpec(CompressionCodecSpec.fromString("GZIP(6)"))
71+
.build();
72+
PropertiesConfiguration dictionaryProperties = new PropertiesConfiguration();
73+
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(dictionaryProperties, COLUMN, leveledCodecConfig, true);
74+
assertFalse(dictionaryProperties.containsKey(METADATA_KEY));
75+
76+
Map<String, String> nullConfigProperties = new HashMap<>();
77+
BaseSegmentCreator.addForwardIndexCompressionCodecInfo(nullConfigProperties, COLUMN, null, false);
78+
assertFalse(nullConfigProperties.containsKey(METADATA_KEY));
79+
}
80+
}

0 commit comments

Comments
 (0)