Skip to content

Commit 46f58fb

Browse files
authored
[Issue #521] support chunk aligned compact file (#532)
Also fix an argument checking bug in PixelsCompactor.
1 parent 9b92914 commit 46f58fb

5 files changed

Lines changed: 63 additions & 28 deletions

File tree

pixels-common/src/main/java/io/pixelsdb/pixels/common/layout/InvertedSplitsIndex.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public InvertedSplitsIndex(long version, List<String> columnOrder, List<SplitPat
5555
BitSet bitMap = new BitSet(this.querySplitPatterns.size());
5656
for (int i = 0; i < this.querySplitPatterns.size(); ++i)
5757
{
58-
if (this.querySplitPatterns.get(i).contaiansColumn(column))
58+
if (this.querySplitPatterns.get(i).containsColumn(column))
5959
{
6060
bitMap.set(i, true);
6161
}

pixels-common/src/main/java/io/pixelsdb/pixels/common/layout/SplitPattern.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public int getSplitSize()
6666
return splitSize;
6767
}
6868

69-
public boolean contaiansColumn(String column)
69+
public boolean containsColumn(String column)
7070
{
7171
return this.columnSet.contains(column);
7272
}

pixels-common/src/main/resources/pixels.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ pixel.stride=10000
6060
row.group.size=268435456
6161
# The alignment is for SIMD and its unit is byte
6262
column.chunk.alignment=32
63+
column.chunk.encoding=true
6364
block.size=2147483648
6465
block.replication=1
6566
block.padding=true
66-
encoding=true
6767
compression.block.size=1
6868
# row batch size for pixels record reader, default value is 10000
6969
row.batch.size=10000

pixels-core/src/main/java/io/pixelsdb/pixels/core/PixelsWriterImpl.java

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ public class PixelsWriterImpl implements PixelsWriter
7777
private final boolean encoding;
7878
private final boolean partitioned;
7979
private final Optional<List<Integer>> partKeyColumnIds;
80+
/**
81+
* The number of bytes that each column chunk is aligned to.
82+
*/
83+
private final int chunkAlignment;
84+
/**
85+
* The byte buffer padded to each column chunk for alignment.
86+
*/
87+
private final byte[] chunkPaddingBuffer;
8088

8189
private final ColumnWriter[] columnWriters;
8290
private final StatsRecorder[] fileColStatRecorders;
@@ -101,14 +109,6 @@ public class PixelsWriterImpl implements PixelsWriter
101109
private final List<TypeDescription> children;
102110

103111
private final ExecutorService columnWriterService = Executors.newCachedThreadPool();
104-
/**
105-
* The number of bytes that each column chunk is aligned to.
106-
*/
107-
private final int chunkAlignment;
108-
/**
109-
* The byte buffer padded to each column chunk for alignment.
110-
*/
111-
private final byte[] chunkPaddingBuffer;
112112

113113
private PixelsWriterImpl(
114114
TypeDescription schema,
@@ -282,8 +282,7 @@ public Builder setPartKeyColumnIds(List<Integer> partitionColumnIds)
282282
return this;
283283
}
284284

285-
public PixelsWriter build()
286-
throws PixelsWriterException
285+
public PixelsWriter build() throws PixelsWriterException
287286
{
288287
requireNonNull(this.builderStorage, "storage is not set");
289288
requireNonNull(this.builderFilePath, "file path is not set");
@@ -399,8 +398,7 @@ public boolean isPartitioned()
399398
}
400399

401400
@Override
402-
public boolean addRowBatch(VectorizedRowBatch rowBatch)
403-
throws IOException
401+
public boolean addRowBatch(VectorizedRowBatch rowBatch) throws IOException
404402
{
405403
checkArgument(!partitioned, "this file is hash partitioned, " +
406404
"use addRowBatch(rowBatch, hashValue) instead");
@@ -498,8 +496,7 @@ public void close()
498496
}
499497
}
500498

501-
private void writeRowGroup()
502-
throws IOException
499+
private void writeRowGroup() throws IOException
503500
{
504501
int rowGroupDataLength = 0;
505502

@@ -660,8 +657,7 @@ private void writeRowGroup()
660657
this.fileContentLength += rowGroupDataLength;
661658
}
662659

663-
private void writeFileTail()
664-
throws IOException
660+
private void writeFileTail() throws IOException
665661
{
666662
PixelsProto.Footer footer;
667663
PixelsProto.PostScript postScript;
@@ -694,6 +690,7 @@ private void writeFileTail()
694690
.setPixelStride(pixelStride)
695691
.setWriterTimezone(timeZone.getDisplayName())
696692
.setPartitioned(partitioned)
693+
.setColumnChunkAlignment(chunkAlignment)
697694
.setMagic(Constants.MAGIC)
698695
.build();
699696

pixels-core/src/main/java/io/pixelsdb/pixels/core/compactor/PixelsCompactor.java

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import com.google.common.collect.ImmutableList;
2323
import io.pixelsdb.pixels.common.physical.*;
24+
import io.pixelsdb.pixels.common.utils.ConfigFactory;
2425
import io.pixelsdb.pixels.common.utils.Constants;
2526
import io.pixelsdb.pixels.core.PixelsProto;
2627
import io.pixelsdb.pixels.core.PixelsVersion;
@@ -58,6 +59,14 @@ public class PixelsCompactor
5859
private final TimeZone timeZone;
5960
private final long fileContentLength;
6061
private final int fileRowNum;
62+
/**
63+
* The number of bytes that each column chunk is aligned to.
64+
*/
65+
private final int chunkAlignment;
66+
/**
67+
* The byte buffer padded to each column chunk for alignment.
68+
*/
69+
private final byte[] chunkPaddingBuffer;
6170

6271
private final Storage inputStorage;
6372
private final PhysicalWriter fsWriter;
@@ -93,6 +102,9 @@ private PixelsCompactor(
93102
checkArgument(compressionBlockSize > 0, "compression block size is not positive");
94103
this.compressionBlockSize = compressionBlockSize;
95104
this.timeZone = requireNonNull(timeZone);
105+
this.chunkAlignment = Integer.parseInt(ConfigFactory.Instance().getProperty("column.chunk.alignment"));
106+
checkArgument(this.chunkAlignment >= 0, "column.chunk.alignment must >= 0");
107+
this.chunkPaddingBuffer = new byte[this.chunkAlignment];
96108
checkArgument(fileContentLength > 0, "file content length is not positive");
97109
this.fileContentLength = fileContentLength;
98110
checkArgument(fileRowNum > 0, "file row number is not positive");
@@ -103,7 +115,7 @@ private PixelsCompactor(
103115

104116
this.fileColStatRecorders = requireNonNull(fileColStatRecorders, "file column stat reader is null");
105117

106-
checkArgument(!requireNonNull(rowGroupFooterBuilderList).isEmpty());
118+
checkArgument(!requireNonNull(rowGroupInfoBuilderList).isEmpty());
107119
checkArgument(!requireNonNull(rowGroupStatBuilderList).isEmpty());
108120
checkArgument(!requireNonNull(rowGroupFooterBuilderList).isEmpty());
109121
checkArgument(!requireNonNull(rowGroupPaths).isEmpty());
@@ -369,10 +381,34 @@ private void writeColumnChunks()
369381
fsReader.seek(columnChunkOffset);
370382
byte[] chunkBuffer = new byte[(int) columnChunkLength];
371383
fsReader.readFully(chunkBuffer);
372-
fsWriter.prepare((int) columnChunkLength);
373-
long offset = this.fsWriter.append(chunkBuffer, 0, (int) columnChunkLength);
374-
columnChunkIndexBuilder.setChunkOffset(offset);
375-
// this.fsWriter.flush(); // Issue #192: no need to flush as writing has not finished.
384+
385+
// Issue #521: prepare for writing the column chunk, and make sure the start offset is aligned.
386+
long chunkStartOffset = fsWriter.prepare((int) columnChunkLength);
387+
int tryAlign = 0;
388+
while (chunkAlignment != 0 && chunkStartOffset % chunkAlignment != 0 && tryAlign++ < 2)
389+
{
390+
int alignBytes = (int) (chunkAlignment - chunkStartOffset % chunkAlignment);
391+
this.fsWriter.append(chunkPaddingBuffer, 0, alignBytes);
392+
chunkStartOffset = this.fsWriter.prepare((int) columnChunkLength);
393+
}
394+
if (tryAlign > 2)
395+
{
396+
LOGGER.warn("failed to align the start offset of the column chunk");
397+
throw new IOException("failed to align the start offset of the column chunk");
398+
}
399+
400+
this.fsWriter.append(chunkBuffer, 0, (int) columnChunkLength);
401+
/*
402+
* Issue #521:
403+
* It is not necessary pad the column chunk here, as additional bytes are already padded before
404+
* writing this column chunk to ensure chunkStartOffset is aligned. For the last column chunk,
405+
* there is no need to ensure its length is aligned. We only need aligned start offsets.
406+
*
407+
* Also, there is no need to update the column chunk length, pixels reader needs the real length
408+
* of the column chunk.
409+
*/
410+
columnChunkIndexBuilder.setChunkOffset(chunkStartOffset);
411+
// Issue #192: no need to flush fsWriter as writing has not finished.
376412
}
377413
catch (IOException e)
378414
{
@@ -417,8 +453,8 @@ private void writeRowGroupFooters()
417453

418454
private void writeFileTail()
419455
{
420-
PixelsProto.Footer footer = writeFooter();
421-
PixelsProto.PostScript postScript = writePostScript();
456+
PixelsProto.Footer footer = buildFileFooter();
457+
PixelsProto.PostScript postScript = buildPostScript();
422458

423459
PixelsProto.FileTail fileTail =
424460
PixelsProto.FileTail.newBuilder()
@@ -447,7 +483,7 @@ private void writeFileTail()
447483
}
448484
}
449485

450-
private PixelsProto.Footer writeFooter()
486+
private PixelsProto.Footer buildFileFooter()
451487
{
452488
PixelsProto.Footer.Builder footerBuilder =
453489
PixelsProto.Footer.newBuilder();
@@ -468,7 +504,7 @@ private PixelsProto.Footer writeFooter()
468504
return footerBuilder.build();
469505
}
470506

471-
private PixelsProto.PostScript writePostScript()
507+
private PixelsProto.PostScript buildPostScript()
472508
{
473509
return PixelsProto.PostScript.newBuilder()
474510
.setVersion(Constants.VERSION)
@@ -478,6 +514,8 @@ private PixelsProto.PostScript writePostScript()
478514
.setCompressionBlockSize(compressionBlockSize)
479515
.setPixelStride(pixelStride)
480516
.setWriterTimezone(timeZone.getDisplayName())
517+
.setPartitioned(false) // Issue #521: we do not compact partitioned files.
518+
.setColumnChunkAlignment(chunkAlignment)
481519
.setMagic(Constants.MAGIC)
482520
.build();
483521
}

0 commit comments

Comments
 (0)