Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.catalog;

import org.apache.doris.thrift.TStorageMedium;

import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

/**
* Memory-compact storage for TabletMeta using Structure-of-Arrays layout.
*
* Instead of storing one TabletMeta object per tablet (each with a 16-byte Java object header),
* this class stores each field in a parallel primitive array indexed by an internal slot.
* A Long2IntOpenHashMap maps tabletId to the slot index.
*
* Deleted slots are reused via a free list embedded in the dbIds array.
*
* Thread safety: callers must hold appropriate locks (provided by TabletInvertedIndex).
*/
public class CompactTabletMetaStore {

private static final int INITIAL_CAPACITY = 1024;
private static final int ABSENT = -1;

// tabletId -> slot index
private Long2IntOpenHashMap tabletIdToSlot;

// parallel arrays indexed by slot
private long[] dbIds;
private long[] tableIds;
private long[] partitionIds;
private long[] indexIds;
private int[] oldSchemaHashes;
private byte[] storageMediumOrdinals;

Comment on lines +51 to +54
Copy link

Copilot AI Apr 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

newSchemaHashes is allocated and grown alongside the other arrays, but it is never read or exposed, and TabletMeta currently has no getter for newSchemaHash. This adds per-tablet memory overhead and undermines the compaction goal.

Either remove newSchemaHashes entirely for now, or fully wire it through (capture the value on add and expose a getter / include it when constructing TabletMeta).

Copilot uses AI. Check for mistakes.
// free list head; ABSENT means empty
private int freeHead = ABSENT;
// next never-used slot index
private int highWaterMark = 0;
// number of live entries
private int size = 0;
// allocated length of arrays
private int capacity;

private static final TStorageMedium[] MEDIUM_VALUES = TStorageMedium.values();

public CompactTabletMetaStore() {
this(INITIAL_CAPACITY);
}

public CompactTabletMetaStore(int initialCapacity) {
this.capacity = Math.max(initialCapacity, 4);
this.tabletIdToSlot = new Long2IntOpenHashMap(this.capacity);
this.tabletIdToSlot.defaultReturnValue(ABSENT);
this.dbIds = new long[this.capacity];
this.tableIds = new long[this.capacity];
this.partitionIds = new long[this.capacity];
this.indexIds = new long[this.capacity];
this.oldSchemaHashes = new int[this.capacity];
this.storageMediumOrdinals = new byte[this.capacity];
}

public boolean add(long tabletId, TabletMeta meta) {
if (tabletIdToSlot.containsKey(tabletId)) {
return false;
}
int slot = allocateSlot();
tabletIdToSlot.put(tabletId, slot);
dbIds[slot] = meta.getDbId();
tableIds[slot] = meta.getTableId();
partitionIds[slot] = meta.getPartitionId();
indexIds[slot] = meta.getIndexId();
oldSchemaHashes[slot] = meta.getOldSchemaHash();
storageMediumOrdinals[slot] = (byte) meta.getStorageMedium().getValue();
size++;
return true;
}

public void remove(long tabletId) {
int slot = tabletIdToSlot.remove(tabletId);
if (slot == ABSENT) {
return;
}
freeSlot(slot);
size--;
}

public boolean containsKey(long tabletId) {
return tabletIdToSlot.containsKey(tabletId);
}

public long getDbId(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
return slot == ABSENT ? TabletInvertedIndex.NOT_EXIST_VALUE : dbIds[slot];
}

public long getTableId(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
return slot == ABSENT ? TabletInvertedIndex.NOT_EXIST_VALUE : tableIds[slot];
}

public long getPartitionId(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
return slot == ABSENT ? TabletInvertedIndex.NOT_EXIST_VALUE : partitionIds[slot];
}

public long getIndexId(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
return slot == ABSENT ? TabletInvertedIndex.NOT_EXIST_VALUE : indexIds[slot];
}

public int getOldSchemaHash(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
return slot == ABSENT ? TabletInvertedIndex.NOT_EXIST_VALUE : oldSchemaHashes[slot];
}

public TStorageMedium getStorageMedium(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
if (slot == ABSENT) {
return null;
}
return MEDIUM_VALUES[storageMediumOrdinals[slot]];
}

public void setStorageMedium(long tabletId, TStorageMedium medium) {
int slot = tabletIdToSlot.get(tabletId);
if (slot != ABSENT) {
storageMediumOrdinals[slot] = (byte) medium.getValue();
}
}

/**
* Construct a TabletMeta on demand for backward compatibility.
* Returns null if the tabletId is not present.
*/
public TabletMeta getTabletMeta(long tabletId) {
int slot = tabletIdToSlot.get(tabletId);
if (slot == ABSENT) {
return null;
}
return new TabletMeta(
dbIds[slot],
tableIds[slot],
partitionIds[slot],
indexIds[slot],
oldSchemaHashes[slot],
MEDIUM_VALUES[storageMediumOrdinals[slot]]);
}

/**
* Build a full Map for backward compatibility (test-only usage).
*/
public Map<Long, TabletMeta> toMap() {
Map<Long, TabletMeta> map = new HashMap<>(size * 4 / 3 + 1);
for (Long2IntOpenHashMap.Entry entry : tabletIdToSlot.long2IntEntrySet()) {
long tabletId = entry.getLongKey();
int slot = entry.getIntValue();
map.put(tabletId, new TabletMeta(
dbIds[slot],
tableIds[slot],
partitionIds[slot],
indexIds[slot],
oldSchemaHashes[slot],
MEDIUM_VALUES[storageMediumOrdinals[slot]]));
}
return map;
}

public int size() {
return size;
}

public void clear() {
tabletIdToSlot.clear();
freeHead = ABSENT;
highWaterMark = 0;
size = 0;
}

private int allocateSlot() {
if (freeHead != ABSENT) {
int slot = freeHead;
freeHead = (int) dbIds[freeHead];
return slot;
}
if (highWaterMark == capacity) {
grow();
}
return highWaterMark++;
}

private void freeSlot(int slot) {
dbIds[slot] = freeHead;
freeHead = slot;
}

private void grow() {
int newCapacity = capacity * 2;
dbIds = Arrays.copyOf(dbIds, newCapacity);
tableIds = Arrays.copyOf(tableIds, newCapacity);
partitionIds = Arrays.copyOf(partitionIds, newCapacity);
indexIds = Arrays.copyOf(indexIds, newCapacity);
oldSchemaHashes = Arrays.copyOf(oldSchemaHashes, newCapacity);
storageMediumOrdinals = Arrays.copyOf(storageMediumOrdinals, newCapacity);
capacity = newCapacity;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,9 @@ private void processTabletEntry(long backendId, Map<Long, TTablet> backendTablet
long tabletId = entry.getKey();
Replica replica = entry.getValue();

Preconditions.checkState(tabletMetaMap.containsKey(tabletId),
Preconditions.checkState(tabletMetaStore.containsKey(tabletId),
"tablet " + tabletId + " not exists, backend " + backendId);
TabletMeta tabletMeta = tabletMetaMap.get(tabletId);
TabletMeta tabletMeta = tabletMetaStore.getTabletMeta(tabletId);

if (backendTablets.containsKey(tabletId)) {
// Tablet exists in both FE and BE
Expand Down Expand Up @@ -408,7 +408,7 @@ && isLocal(tabletMeta.getStorageMedium())) {
}

if (storageMedium != tabletMeta.getStorageMedium()) {
tabletMeta.setStorageMedium(storageMedium);
tabletMetaStore.setStorageMedium(tabletId, storageMedium);
}
}
}
Expand Down Expand Up @@ -740,7 +740,7 @@ public void deleteTablet(long tabletId) {
}
}
}
tabletMetaMap.remove(tabletId);
tabletMetaStore.remove(tabletId);
if (LOG.isDebugEnabled()) {
LOG.debug("delete tablet: {}", tabletId);
}
Expand All @@ -754,7 +754,7 @@ public void addReplica(long tabletId, Replica replica) {
long stamp = writeLock();
try {
long backendId = replica.getBackendIdWithoutException();
Preconditions.checkState(tabletMetaMap.containsKey(tabletId),
Preconditions.checkState(tabletMetaStore.containsKey(tabletId),
"tablet " + tabletId + " not exists, replica " + replica.getId()
+ ", backend " + backendId);
replicaMetaTable.put(tabletId, backendId, replica);
Expand All @@ -773,7 +773,7 @@ public void addReplica(long tabletId, Replica replica) {
public void deleteReplica(long tabletId, long backendId) {
long stamp = writeLock();
try {
Preconditions.checkState(tabletMetaMap.containsKey(tabletId),
Preconditions.checkState(tabletMetaStore.containsKey(tabletId),
"tablet " + tabletId + " not exists, backend " + backendId);
if (replicaMetaTable.containsRow(tabletId)) {
Replica replica = replicaMetaTable.remove(tabletId, backendId);
Expand Down Expand Up @@ -804,7 +804,7 @@ public void deleteReplica(long tabletId, long backendId) {
public Replica getReplica(long tabletId, long backendId) {
long stamp = readLock();
try {
Preconditions.checkState(tabletMetaMap.containsKey(tabletId),
Preconditions.checkState(tabletMetaStore.containsKey(tabletId),
"tablet " + tabletId + " not exists, backend " + backendId);
return replicaMetaTable.get(tabletId, backendId);
} finally {
Expand Down Expand Up @@ -862,7 +862,7 @@ public List<Pair<Long, Long>> getTabletSizeByBackendIdAndStorageMedium(long back
Map<Long, Replica> replicaMetaWithBackend = backingReplicaMetaTable.get(backendId);
if (replicaMetaWithBackend != null) {
return replicaMetaWithBackend.entrySet().stream()
.filter(entry -> tabletMetaMap.get(entry.getKey()).getStorageMedium() == storageMedium)
.filter(entry -> tabletMetaStore.getStorageMedium(entry.getKey()) == storageMedium)
.map(entry -> Pair.of(entry.getKey(), entry.getValue().getDataSize()))
.collect(Collectors.toList());
}
Expand Down Expand Up @@ -896,7 +896,7 @@ public Map<TStorageMedium, Long> getReplicaNumByBeIdAndStorageMedium(long backen
Map<Long, Replica> replicaMetaWithBackend = backingReplicaMetaTable.get(backendId);
if (replicaMetaWithBackend != null) {
for (long tabletId : replicaMetaWithBackend.keySet()) {
if (tabletMetaMap.get(tabletId).getStorageMedium() == TStorageMedium.HDD) {
if (tabletMetaStore.getStorageMedium(tabletId) == TStorageMedium.HDD) {
hddNum++;
} else {
ssdNum++;
Expand Down Expand Up @@ -971,28 +971,34 @@ public Map<TStorageMedium, TreeMultimap<Long, PartitionBalanceInfo>> buildPartit

try {
Preconditions.checkState(availableBeIds.contains(beId), "dead be " + beId);
TabletMeta tabletMeta = tabletMetaMap.get(tabletId);
if (dbIds.contains(tabletMeta.getDbId()) || tableIds.contains(tabletMeta.getTableId())
|| partitionIds.contains(tabletMeta.getPartitionId())) {
// Use individual field accessors to avoid constructing a TabletMeta object
// per iteration, reducing GC pressure when iterating over millions of tablets.
long dbId = tabletMetaStore.getDbId(tabletId);
if (dbId == TabletInvertedIndex.NOT_EXIST_VALUE) {
continue;
}
long tableId = tabletMetaStore.getTableId(tabletId);
long partitionId = tabletMetaStore.getPartitionId(tabletId);
if (dbIds.contains(dbId) || tableIds.contains(tableId)
|| partitionIds.contains(partitionId)) {
continue;
}
Preconditions.checkNotNull(tabletMeta, "invalid tablet " + tabletId);
Preconditions.checkState(
!Env.getCurrentColocateIndex().isColocateTable(tabletMeta.getTableId()),
"table " + tabletMeta.getTableId() + " should not be the colocate table");
!Env.getCurrentColocateIndex().isColocateTable(tableId),
"table " + tableId + " should not be the colocate table");

TStorageMedium medium = tabletMeta.getStorageMedium();
long indexId = tabletMetaStore.getIndexId(tabletId);
TStorageMedium medium = tabletMetaStore.getStorageMedium(tabletId);
Table<Long, Long, Map<Long, Long>> partitionReplicasInfo = partitionReplicasInfoMaps.get(medium);
Map<Long, Long> countMap = partitionReplicasInfo.get(
tabletMeta.getPartitionId(), tabletMeta.getIndexId());
Map<Long, Long> countMap = partitionReplicasInfo.get(partitionId, indexId);
if (countMap == null) {
// If one be doesn't have any replica of one partition, it should be counted too.
countMap = availableBeIds.stream().collect(Collectors.toMap(i -> i, i -> 0L));
}

Long count = countMap.get(beId);
countMap.put(beId, count + 1L);
partitionReplicasInfo.put(tabletMeta.getPartitionId(), tabletMeta.getIndexId(), countMap);
partitionReplicasInfo.put(partitionId, indexId, countMap);
partitionReplicasInfoMaps.put(medium, partitionReplicasInfo);
} catch (IllegalStateException | NullPointerException e) {
// If the tablet or be has some problem, don't count in
Expand Down
Loading
Loading