Skip to content

Commit c588b67

Browse files
committed
Merge branch 'cassandra-6.0' into trunk
* cassandra-6.0: Avoid type lookup in SerializationHeader#getType if schema and SSTable are aligned
2 parents 0a6774d + 9198058 commit c588b67

3 files changed

Lines changed: 180 additions & 5 deletions

File tree

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Allow nodetool garbagecollect to take a user defined list of SSTables (CASSANDRA-16767)
44
* Add a guardrail for misprepared statements (CASSANDRA-21139)
55
Merged from 6.0:
6+
* Avoid type lookup in SerializationHeader#getType if schema and SSTable are aligned (CASSANDRA-21402)
67
* Replace LongAdder in metric-like logic with ThreadLocalCounter (CASSANDRA-21400)
78
* BTreeRow#hasLiveData: Avoid Cell iteration if there are no tombstones in a row (CASSANDRA-21363)
89
* Reduce memory allocations in row merge logic (CASSANDRA-21359)

src/java/org/apache/cassandra/db/SerializationHeader.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ public MetadataType getType()
349349
public SerializationHeader toHeader(TableMetadata metadata) throws UnknownColumnException
350350
{
351351
Map<ByteBuffer, AbstractType<?>> typeMap = new HashMap<>(staticColumns.size() + regularColumns.size());
352+
boolean hasTypeMismatch = false;
352353

353354
RegularAndStaticColumns.Builder builder = RegularAndStaticColumns.builder();
354355
for (Map<ByteBuffer, AbstractType<?>> map : ImmutableList.of(staticColumns, regularColumns))
@@ -357,9 +358,10 @@ public SerializationHeader toHeader(TableMetadata metadata) throws UnknownColumn
357358
for (Map.Entry<ByteBuffer, AbstractType<?>> e : map.entrySet())
358359
{
359360
ByteBuffer name = e.getKey();
360-
AbstractType<?> other = typeMap.put(name, e.getValue());
361-
if (other != null && !other.equals(e.getValue()))
362-
throw new IllegalStateException("Column " + name + " occurs as both regular and static with types " + other + "and " + e.getValue());
361+
AbstractType<?> diskType = e.getValue();
362+
AbstractType<?> other = typeMap.put(name, diskType);
363+
if (other != null && !other.equals(diskType))
364+
throw new IllegalStateException("Column " + name + " occurs as both regular and static with types " + other + "and " + diskType);
363365

364366
ColumnMetadata column = metadata.getColumn(name);
365367
if (column == null || column.isStatic() != isStatic)
@@ -376,11 +378,15 @@ public SerializationHeader toHeader(TableMetadata metadata) throws UnknownColumn
376378
if (column == null)
377379
throw new UnknownColumnException("Unknown column " + UTF8Type.instance.getString(name) + " during deserialization");
378380
}
381+
if (!diskType.equals(column.type))
382+
hasTypeMismatch = true;
379383
builder.add(column);
380384
}
381385
}
382386

383-
return new SerializationHeader(true, keyType, clusteringTypes, builder.build(), stats, typeMap);
387+
// if all on-disk types match the current schema then we can use
388+
// fast column.type path on every cell read instead of doing a map lookup.
389+
return new SerializationHeader(true, keyType, clusteringTypes, builder.build(), stats, hasTypeMismatch ? typeMap : null);
384390
}
385391

386392
@Override

test/unit/org/apache/cassandra/db/SerializationHeaderTest.java

Lines changed: 169 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,15 @@
3636
import org.apache.cassandra.db.compaction.OperationType;
3737
import org.apache.cassandra.db.lifecycle.LifecycleTransaction;
3838
import org.apache.cassandra.db.marshal.Int32Type;
39+
import org.apache.cassandra.db.marshal.ListType;
40+
import org.apache.cassandra.db.marshal.LongType;
3941
import org.apache.cassandra.db.partitions.PartitionUpdate;
4042
import org.apache.cassandra.db.rows.BTreeRow;
4143
import org.apache.cassandra.db.rows.BufferCell;
4244
import org.apache.cassandra.db.rows.Cell;
4345
import org.apache.cassandra.db.rows.Row;
4446
import org.apache.cassandra.db.rows.UnfilteredRowIterator;
47+
import org.apache.cassandra.exceptions.UnknownColumnException;
4548
import org.apache.cassandra.io.sstable.Descriptor;
4649
import org.apache.cassandra.io.sstable.ISSTableScanner;
4750
import org.apache.cassandra.io.sstable.SequenceBasedSSTableId;
@@ -63,7 +66,172 @@ public class SerializationHeaderTest
6366
{
6467
DatabaseDescriptor.daemonInitialization();
6568
}
66-
69+
70+
/**
71+
* Common case: schema unchanged since the SSTable was written.
72+
*/
73+
@Test
74+
public void testTypeMapNullWhenNoSchemaChanges() throws UnknownColumnException
75+
{
76+
TableMetadata schema = TableMetadata.builder(KEYSPACE, "testTypeMapNull")
77+
.addPartitionKeyColumn("k", Int32Type.instance)
78+
.addClusteringColumn("c", Int32Type.instance)
79+
.addRegularColumn("v", Int32Type.instance)
80+
.build();
81+
82+
SerializationHeader.Component component = SerializationHeader.makeWithoutStats(schema).toComponent();
83+
SerializationHeader header = component.toHeader(schema);
84+
85+
ColumnMetadata v = schema.getColumn(ColumnIdentifier.getInterned("v", false));
86+
// typeMap == null => getType() returns exactly column.type (same object reference)
87+
Assert.assertSame(v.type, header.getType(v));
88+
}
89+
90+
/**
91+
* ALTER TABLE scenario: column was written as Int32Type, schema now has LongType.
92+
* toHeader() must retain typeMap so getType() returns the on-disk Int32Type and
93+
* not the current schema LongType, which would corrupt deserialization.
94+
*/
95+
@Test
96+
public void testTypeMapNonNullWhenColumnTypeChanged() throws UnknownColumnException
97+
{
98+
ColumnIdentifier v = ColumnIdentifier.getInterned("v", false);
99+
TableMetadata schemaAtWrite = TableMetadata.builder(KEYSPACE, "testTypeMapNonNull")
100+
.addPartitionKeyColumn("k", Int32Type.instance)
101+
.addClusteringColumn("c", Int32Type.instance)
102+
.addRegularColumn("v", Int32Type.instance)
103+
.build();
104+
TableMetadata schemaAfterAlter = TableMetadata.builder(KEYSPACE, "testTypeMapNonNull")
105+
.addPartitionKeyColumn("k", Int32Type.instance)
106+
.addClusteringColumn("c", Int32Type.instance)
107+
.addRegularColumn("v", LongType.instance) // type is changed
108+
.build();
109+
110+
SerializationHeader.Component component = SerializationHeader.makeWithoutStats(schemaAtWrite).toComponent();
111+
SerializationHeader header = component.toHeader(schemaAfterAlter);
112+
113+
ColumnMetadata vNew = schemaAfterAlter.getColumn(v);
114+
// getType() must return the on-disk Int32Type, not the current LongType
115+
Assert.assertEquals(Int32Type.instance, header.getType(vNew));
116+
}
117+
118+
/**
119+
* Column was dropped at the same type it was written with — no actual type mismatch.
120+
*/
121+
@Test
122+
public void testTypeMapNullForDroppedColumnWithSameType() throws UnknownColumnException
123+
{
124+
ColumnIdentifier v = ColumnIdentifier.getInterned("v", false);
125+
TableMetadata schemaAtWrite = TableMetadata.builder(KEYSPACE, "testTypeMapDroppedSame")
126+
.addPartitionKeyColumn("k", Int32Type.instance)
127+
.addClusteringColumn("c", Int32Type.instance)
128+
.addRegularColumn("v", Int32Type.instance)
129+
.build();
130+
ColumnMetadata vColumn = schemaAtWrite.getColumn(v);
131+
TableMetadata schemaAfterDrop = schemaAtWrite.unbuild().recordColumnDrop(vColumn, 0L).build();
132+
133+
SerializationHeader.Component component = SerializationHeader.makeWithoutStats(schemaAtWrite).toComponent();
134+
SerializationHeader header = component.toHeader(schemaAfterDrop);
135+
136+
ColumnMetadata vDropped = schemaAfterDrop.getDroppedColumn(vColumn.name.bytes);
137+
// disk type == dropped type => typeMap null => same reference
138+
Assert.assertSame(vDropped.type, header.getType(vDropped));
139+
Assert.assertEquals(Int32Type.instance, header.getType(vDropped));
140+
}
141+
142+
/**
143+
* Column was altered from Int32Type to LongType and then dropped.
144+
* An SSTable written before the ALTER stores Int32Type on disk, but the drop record
145+
* has LongType. The type mismatch must be detected and typeMap retained so
146+
* the Int32Type bytes can still be skipped correctly.
147+
*/
148+
@Test
149+
public void testTypeMapNonNullForDroppedColumnWithChangedType() throws UnknownColumnException
150+
{
151+
ColumnIdentifier v = ColumnIdentifier.getInterned("v", false);
152+
TableMetadata schemaAtWrite = TableMetadata.builder(KEYSPACE, "testTypeMapDroppedChanged")
153+
.addPartitionKeyColumn("k", Int32Type.instance)
154+
.addClusteringColumn("c", Int32Type.instance)
155+
.addRegularColumn("v", Int32Type.instance)
156+
.build();
157+
TableMetadata schemaWithAlteredType = TableMetadata.builder(KEYSPACE, "testTypeMapDroppedChanged")
158+
.addPartitionKeyColumn("k", Int32Type.instance)
159+
.addClusteringColumn("c", Int32Type.instance)
160+
.addRegularColumn("v", LongType.instance) // type is changed
161+
.build();
162+
ColumnMetadata vLong = schemaWithAlteredType.getColumn(v);
163+
TableMetadata schemaAfterDrop = schemaWithAlteredType.unbuild()
164+
.recordColumnDrop(vLong, 0L) // drop the value column
165+
.build();
166+
167+
SerializationHeader.Component component = SerializationHeader.makeWithoutStats(schemaAtWrite).toComponent();
168+
SerializationHeader header = component.toHeader(schemaAfterDrop);
169+
170+
ColumnMetadata vDropped = schemaAfterDrop.getDroppedColumn(vLong.name.bytes);
171+
// disk type (Int32) != drop-record type (Long) => typeMap retained, returns Int32
172+
Assert.assertEquals(Int32Type.instance, header.getType(vDropped));
173+
Assert.assertNotSame(vDropped.type, header.getType(vDropped));
174+
}
175+
176+
/**
177+
* Static-column variant of {@link #testTypeMapNonNullWhenColumnTypeChanged}.
178+
* The mismatch detection in toHeader() iterates staticColumns and regularColumns through
179+
* the same loop; this covers the static branch.
180+
*/
181+
@Test
182+
public void testTypeMapNonNullWhenStaticColumnTypeChanged() throws UnknownColumnException
183+
{
184+
ColumnIdentifier s = ColumnIdentifier.getInterned("s", false);
185+
TableMetadata schemaAtWrite = TableMetadata.builder(KEYSPACE, "testTypeMapNonNullStatic")
186+
.addPartitionKeyColumn("k", Int32Type.instance)
187+
.addClusteringColumn("c", Int32Type.instance)
188+
.addStaticColumn("s", Int32Type.instance)
189+
.build();
190+
TableMetadata schemaAfterAlter = TableMetadata.builder(KEYSPACE, "testTypeMapNonNullStatic")
191+
.addPartitionKeyColumn("k", Int32Type.instance)
192+
.addClusteringColumn("c", Int32Type.instance)
193+
.addStaticColumn("s", LongType.instance) // static column type is changed
194+
.build();
195+
196+
SerializationHeader.Component component = SerializationHeader.makeWithoutStats(schemaAtWrite).toComponent();
197+
SerializationHeader header = component.toHeader(schemaAfterAlter);
198+
199+
ColumnMetadata sNew = schemaAfterAlter.getColumn(s);
200+
// getType() must return the on-disk Int32Type, not the current LongType
201+
Assert.assertEquals(Int32Type.instance, header.getType(sNew));
202+
}
203+
204+
/**
205+
* Multi-cell vs frozen collections serialize differently and CollectionType.equals
206+
* checks isMultiCell. A schema flipping a list from multi-cell to frozen (or vice versa)
207+
* must be detected as a type mismatch so typeMap is retained.
208+
*/
209+
@Test
210+
public void testTypeMapNonNullWhenCollectionMultiCellChanged() throws UnknownColumnException
211+
{
212+
ColumnIdentifier v = ColumnIdentifier.getInterned("v", false);
213+
ListType<Integer> multiCellList = ListType.getInstance(Int32Type.instance, true);
214+
ListType<Integer> frozenList = ListType.getInstance(Int32Type.instance, false);
215+
216+
TableMetadata schemaAtWrite = TableMetadata.builder(KEYSPACE, "testTypeMapNonNullCollection")
217+
.addPartitionKeyColumn("k", Int32Type.instance)
218+
.addClusteringColumn("c", Int32Type.instance)
219+
.addRegularColumn("v", multiCellList)
220+
.build();
221+
TableMetadata schemaAfterFreeze = TableMetadata.builder(KEYSPACE, "testTypeMapNonNullCollection")
222+
.addPartitionKeyColumn("k", Int32Type.instance)
223+
.addClusteringColumn("c", Int32Type.instance)
224+
.addRegularColumn("v", frozenList) // multi-cell -> frozen
225+
.build();
226+
227+
SerializationHeader.Component component = SerializationHeader.makeWithoutStats(schemaAtWrite).toComponent();
228+
SerializationHeader header = component.toHeader(schemaAfterFreeze);
229+
230+
ColumnMetadata vNew = schemaAfterFreeze.getColumn(v);
231+
// getType() must return the on-disk multi-cell list, not the frozen schema type
232+
Assert.assertEquals(multiCellList, header.getType(vNew));
233+
}
234+
67235
@Test
68236
public void testWrittenAsDifferentKind() throws Exception
69237
{

0 commit comments

Comments
 (0)