Skip to content

Commit bbf1687

Browse files
committed
Introduce MVCC key reclaimability checks with seqNumMin & seqNumMax
Signed-off-by: peterxcli <peterxcli@gmail.com>
1 parent e0e821f commit bbf1687

13 files changed

Lines changed: 459 additions & 2 deletions

File tree

hadoop-hdds/common/src/main/resources/ozone-default.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2429,6 +2429,19 @@
24292429
</description>
24302430
</property>
24312431

2432+
<property>
2433+
<name>ozone.om.snapshot.key.reclaim.interval.enabled</name>
2434+
<value>true</value>
2435+
<tag>OZONE, OM, MANAGEMENT, PERFORMANCE</tag>
2436+
<description>
2437+
Enables MVCC-style deleted key reclaimability checks using optional
2438+
seqNumMin and seqNumMax fields on key versions. Entries without complete
2439+
sequence interval metadata, or buckets whose active snapshot create
2440+
sequence numbers cannot be built exactly, fall back to the previous
2441+
snapshot lookup based reclaimability logic.
2442+
</description>
2443+
</property>
2444+
24322445
<property>
24332446
<name>ozone.acl.authorizer.class</name>
24342447
<value>org.apache.hadoop.ozone.security.acl.OzoneAccessAuthorizer</value>

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,9 @@ public static File createOMDir(String dirPath) {
640640
* create a new instance to include this key, else we update the existing
641641
* repeatedOmKeyInfo instance.
642642
* 3. Set the updateID to the transactionLogIndex.
643+
* 4. Set seqNumMax to the transactionLogIndex when seqNumMin is present,
644+
* making the interval exclusive at the transaction that deleted or
645+
* overwrote this version.
643646
* @param keyInfo args supplied by client
644647
* @param bucketId bucket id
645648
* @param trxnLogIndex For Multipart keys, this is the transactionLogIndex
@@ -667,6 +670,9 @@ public static RepeatedOmKeyInfo prepareKeyForDelete(long bucketId, OmKeyInfo key
667670

668671
// Set the updateID
669672
builder.setUpdateID(trxnLogIndex);
673+
if (keyInfo.hasSeqNumMin()) {
674+
builder.setSeqNumMax(trxnLogIndex);
675+
}
670676

671677
//The key doesn't exist in deletedTable, so create a new instance.
672678
return new RepeatedOmKeyInfo(builder.build(), bucketId);

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OMConfigKeys.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,11 @@ public final class OMConfigKeys {
292292
"ozone.om.fs.snapshot.max.limit";
293293
public static final int OZONE_OM_FS_SNAPSHOT_MAX_LIMIT_DEFAULT = 10000;
294294

295+
public static final String OZONE_OM_SNAPSHOT_KEY_RECLAIM_INTERVAL_ENABLED =
296+
"ozone.om.snapshot.key.reclaim.interval.enabled";
297+
public static final boolean
298+
OZONE_OM_SNAPSHOT_KEY_RECLAIM_INTERVAL_ENABLED_DEFAULT = true;
299+
295300
public static final String OZONE_OM_KERBEROS_KEYTAB_FILE_KEY = "ozone.om."
296301
+ "kerberos.keytab.file";
297302
public static final String OZONE_OM_KERBEROS_PRINCIPAL_KEY = "ozone.om"

hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmKeyInfo.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ public final class OmKeyInfo extends WithParentObjectId
111111
// been modified.
112112
private Long expectedDataGeneration = null;
113113
private String expectedETag;
114+
private Long seqNumMin;
115+
private Long seqNumMax;
114116

115117
private OmKeyInfo(Builder b) {
116118
super(b);
@@ -131,6 +133,8 @@ private OmKeyInfo(Builder b) {
131133
this.tags = b.tags.build();
132134
this.expectedDataGeneration = b.expectedDataGeneration;
133135
this.expectedETag = b.expectedETag;
136+
this.seqNumMin = b.seqNumMin;
137+
this.seqNumMax = b.seqNumMax;
134138
}
135139

136140
private static Codec<OmKeyInfo> newCodec(boolean ignorePipeline) {
@@ -199,6 +203,30 @@ public String getExpectedETag() {
199203
return expectedETag;
200204
}
201205

206+
public void setSeqNumMin(Long seqNumMin) {
207+
this.seqNumMin = seqNumMin;
208+
}
209+
210+
public Long getSeqNumMin() {
211+
return seqNumMin;
212+
}
213+
214+
public boolean hasSeqNumMin() {
215+
return seqNumMin != null;
216+
}
217+
218+
public void setSeqNumMax(Long seqNumMax) {
219+
this.seqNumMax = seqNumMax;
220+
}
221+
222+
public Long getSeqNumMax() {
223+
return seqNumMax;
224+
}
225+
226+
public boolean hasSeqNumMax() {
227+
return seqNumMax != null;
228+
}
229+
202230
public String getOwnerName() {
203231
return ownerName;
204232
}
@@ -475,6 +503,8 @@ public String toString() {
475503
", fileChecksum=" + fileChecksum +
476504
", isFile=" + isFile +
477505
", fileName='" + fileName + '\'' +
506+
", seqNumMin=" + seqNumMin +
507+
", seqNumMax=" + seqNumMax +
478508
", acls=" + acls +
479509
'}';
480510
}
@@ -503,6 +533,8 @@ public static class Builder extends WithParentObjectId.Builder<OmKeyInfo> {
503533
private final MapBuilder<String, String> tags;
504534
private Long expectedDataGeneration = null;
505535
private String expectedETag;
536+
private Long seqNumMin;
537+
private Long seqNumMax;
506538

507539
public Builder() {
508540
this.acls = AclListBuilder.empty();
@@ -526,6 +558,8 @@ public Builder(OmKeyInfo obj) {
526558
this.isFile = obj.isFile;
527559
this.expectedDataGeneration = obj.expectedDataGeneration;
528560
this.expectedETag = obj.expectedETag;
561+
this.seqNumMin = obj.seqNumMin;
562+
this.seqNumMax = obj.seqNumMax;
529563
this.tags = MapBuilder.of(obj.tags);
530564
obj.keyLocationVersions.forEach(keyLocationVersion ->
531565
this.omKeyLocationInfoGroups.add(
@@ -702,6 +736,16 @@ public Builder setExpectedETag(String eTag) {
702736
return this;
703737
}
704738

739+
public Builder setSeqNumMin(Long seqNum) {
740+
this.seqNumMin = seqNum;
741+
return this;
742+
}
743+
744+
public Builder setSeqNumMax(Long seqNum) {
745+
this.seqNumMax = seqNum;
746+
return this;
747+
}
748+
705749
@Override
706750
protected void validate() {
707751
super.validate();
@@ -824,6 +868,12 @@ private KeyInfo getProtobuf(boolean ignorePipeline, String fullKeyName,
824868
if (expectedETag != null) {
825869
kb.setExpectedETag(expectedETag);
826870
}
871+
if (seqNumMin != null) {
872+
kb.setSeqNumMin(seqNumMin);
873+
}
874+
if (seqNumMax != null) {
875+
kb.setSeqNumMax(seqNumMax);
876+
}
827877
if (ownerName != null) {
828878
kb.setOwnerName(ownerName);
829879
}
@@ -880,6 +930,12 @@ public static Builder builderFromProtobuf(KeyInfo keyInfo) {
880930
if (keyInfo.hasExpectedETag()) {
881931
builder.setExpectedETag(keyInfo.getExpectedETag());
882932
}
933+
if (keyInfo.hasSeqNumMin()) {
934+
builder.setSeqNumMin(keyInfo.getSeqNumMin());
935+
}
936+
if (keyInfo.hasSeqNumMax()) {
937+
builder.setSeqNumMax(keyInfo.getSeqNumMax());
938+
}
883939

884940
if (keyInfo.hasOwnerName()) {
885941
builder.setOwnerName(keyInfo.getOwnerName());
@@ -903,6 +959,8 @@ public String getObjectInfo() {
903959
", creationTime='" + creationTime + '\'' +
904960
", objectID='" + getObjectID() + '\'' +
905961
", parentID='" + getParentObjectID() + '\'' +
962+
", seqNumMin='" + seqNumMin + '\'' +
963+
", seqNumMax='" + seqNumMax + '\'' +
906964
", replication='" + replicationConfig + '\'' +
907965
", fileChecksum='" + fileChecksum +
908966
'}';
@@ -921,6 +979,8 @@ public boolean isKeyInfoSame(OmKeyInfo omKeyInfo, boolean checkPath,
921979
Objects.equals(getMetadata(), omKeyInfo.getMetadata()) &&
922980
Objects.equals(acls, omKeyInfo.acls) &&
923981
Objects.equals(getTags(), omKeyInfo.getTags()) &&
982+
Objects.equals(seqNumMin, omKeyInfo.seqNumMin) &&
983+
Objects.equals(seqNumMax, omKeyInfo.seqNumMax) &&
924984
getObjectID() == omKeyInfo.getObjectID();
925985

926986
if (isEqual && checkUpdateID) {

hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmKeyInfo.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import static org.junit.jupiter.api.Assertions.assertFalse;
2626
import static org.junit.jupiter.api.Assertions.assertNotEquals;
2727
import static org.junit.jupiter.api.Assertions.assertNotNull;
28+
import static org.junit.jupiter.api.Assertions.assertNull;
2829
import static org.junit.jupiter.api.Assertions.assertTrue;
2930

3031
import java.io.IOException;
@@ -41,6 +42,7 @@
4142
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
4243
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
4344
import org.apache.hadoop.ozone.ClientVersion;
45+
import org.apache.hadoop.ozone.OmUtils;
4446
import org.apache.hadoop.ozone.OzoneAcl;
4547
import org.apache.hadoop.ozone.OzoneConsts;
4648
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo.Builder;
@@ -73,6 +75,62 @@ public void protobufConversion() throws IOException {
7375
assertEquals(5678L, key.getExpectedDataGeneration());
7476
}
7577

78+
@Test
79+
public void protobufConversionPreservesSeqNumIntervalPresence()
80+
throws IOException {
81+
OmKeyInfo key = createOmKeyInfo(
82+
RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
83+
OzoneManagerProtocolProtos.KeyInfo proto =
84+
key.getProtobuf(ClientVersion.CURRENT_VERSION);
85+
assertFalse(proto.hasSeqNumMin());
86+
assertFalse(proto.hasSeqNumMax());
87+
88+
OmKeyInfo recovered = OmKeyInfo.getFromProtobuf(proto);
89+
assertNull(recovered.getSeqNumMin());
90+
assertNull(recovered.getSeqNumMax());
91+
assertFalse(recovered.hasSeqNumMin());
92+
assertFalse(recovered.hasSeqNumMax());
93+
94+
key = key.toBuilder()
95+
.setSeqNumMin(1234L)
96+
.setSeqNumMax(5678L)
97+
.build();
98+
proto = key.getProtobuf(ClientVersion.CURRENT_VERSION);
99+
assertTrue(proto.hasSeqNumMin());
100+
assertTrue(proto.hasSeqNumMax());
101+
assertEquals(1234L, proto.getSeqNumMin());
102+
assertEquals(5678L, proto.getSeqNumMax());
103+
104+
recovered = OmKeyInfo.getFromProtobuf(proto);
105+
assertEquals(1234L, recovered.getSeqNumMin());
106+
assertEquals(5678L, recovered.getSeqNumMax());
107+
assertTrue(recovered.hasSeqNumMin());
108+
assertTrue(recovered.hasSeqNumMax());
109+
assertEquals(key, recovered);
110+
}
111+
112+
@Test
113+
public void prepareKeyForDeletePreservesSeqNumMinAndSetsSeqNumMax() {
114+
OmKeyInfo key = createOmKeyInfo(
115+
RatisReplicationConfig.getInstance(ReplicationFactor.THREE))
116+
.toBuilder()
117+
.setSeqNumMin(11L)
118+
.build();
119+
120+
RepeatedOmKeyInfo deletedKeyInfo =
121+
OmUtils.prepareKeyForDelete(1L, key, 22L);
122+
OmKeyInfo deletedKey = deletedKeyInfo.getOmKeyInfoList().get(0);
123+
assertEquals(11L, deletedKey.getSeqNumMin());
124+
assertEquals(22L, deletedKey.getSeqNumMax());
125+
126+
key = createOmKeyInfo(
127+
RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
128+
deletedKeyInfo = OmUtils.prepareKeyForDelete(1L, key, 22L);
129+
deletedKey = deletedKeyInfo.getOmKeyInfoList().get(0);
130+
assertNull(deletedKey.getSeqNumMin());
131+
assertNull(deletedKey.getSeqNumMax());
132+
}
133+
76134
@Test
77135
public void getProtobufMessageEC() throws IOException {
78136
OmKeyInfo key = createOmKeyInfo(

hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,12 @@ message KeyInfo {
11971197
// the given ETag for the operation to succeed. This is used for
11981198
// S3 conditional writes with the If-Match header.
11991199
optional string expectedETag = 23;
1200+
1201+
// MVCC-style visibility interval for snapshot key reclamation.
1202+
// A key version is visible to a snapshot when:
1203+
// seqNumMin <= snapshotCreateSeqNum < seqNumMax.
1204+
optional uint64 seqNumMin = 24;
1205+
optional uint64 seqNumMax = 25;
12001206
}
12011207

12021208
// KeyInfoProtoLight is a lightweight subset of KeyInfo message containing

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/DeletingServiceMetrics.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ public final class DeletingServiceMetrics {
109109
private MutableGaugeLong snapKeysIteratedLast;
110110
@Metric("Snapshot: No. of not reclaimable keys the last run")
111111
private MutableGaugeLong snapKeysNotReclaimableLast;
112+
@Metric("Snapshot: No. of key reclaimability decisions optimized by seqNum intervals")
113+
private MutableGaugeLong snapKeyReclaimIntervalOptimized;
114+
@Metric("Snapshot: No. of key reclaimability decisions falling back from seqNum intervals")
115+
private MutableGaugeLong snapKeyReclaimIntervalFallback;
112116

113117
/**
114118
* Metric to track the term ID of the last key that was purged from the
@@ -299,6 +303,22 @@ public long getSnapKeysNotReclaimableLast() {
299303
return snapKeysNotReclaimableLast.value();
300304
}
301305

306+
public void incrSnapKeyReclaimIntervalOptimized() {
307+
this.snapKeyReclaimIntervalOptimized.incr(1L);
308+
}
309+
310+
public long getSnapKeyReclaimIntervalOptimized() {
311+
return snapKeyReclaimIntervalOptimized.value();
312+
}
313+
314+
public void incrSnapKeyReclaimIntervalFallback() {
315+
this.snapKeyReclaimIntervalFallback.incr(1L);
316+
}
317+
318+
public long getSnapKeyReclaimIntervalFallback() {
319+
return snapKeyReclaimIntervalFallback.value();
320+
}
321+
302322
public synchronized TransactionInfo getLastAOSTransactionInfo() {
303323
return TransactionInfo.valueOf(lastAOSPurgeTermId.value(), lastAOSPurgeTransactionId.value());
304324
}

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
313313
.addAllMetadata(KeyValueUtil.getFromProtobuf(
314314
commitKeyArgs.getMetadataList()))
315315
.setUpdateID(trxnLogIndex)
316+
.setSeqNumMin(trxnLogIndex)
316317
.setDataSize(commitKeyArgs.getDataSize())
317318
.build();
318319

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyCommitRequestWithFSO.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
231231
commitKeyArgs.getMetadataList()))
232232
.setDataSize(commitKeyArgs.getDataSize())
233233
.setUpdateID(trxnLogIndex)
234+
.setSeqNumMin(trxnLogIndex)
234235
.build();
235236

236237
List<OmKeyLocationInfo> uncommitted =
@@ -318,8 +319,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, Execut
318319
if (null == oldKeyVersionsToDeleteMap) {
319320
oldKeyVersionsToDeleteMap = new HashMap<>();
320321
}
322+
OmKeyInfo keyInfoToDelete = prepareKeyInfoForDeleteMap(trxnLogIndex, pseudoKeyInfo);
321323
oldKeyVersionsToDeleteMap.computeIfAbsent(delKeyName,
322-
key -> new RepeatedOmKeyInfo(omBucketInfo.getObjectID())).addOmKeyInfo(pseudoKeyInfo);
324+
key -> new RepeatedOmKeyInfo(omBucketInfo.getObjectID())).addOmKeyInfo(keyInfoToDelete);
323325
}
324326

325327
// Add to cache of open key table and key table.

hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/key/OMKeyRequest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,7 @@ protected static Map<String, RepeatedOmKeyInfo> addKeyInfoToDeleteMap(OzoneManag
11851185
if (keyInfo == null) {
11861186
return deleteMap;
11871187
}
1188+
keyInfo = prepareKeyInfoForDeleteMap(trxnLogIndex, keyInfo);
11881189
final long pseudoObjId = om.getObjectIdFromTxId(trxnLogIndex);
11891190
final String delKeyName = om.getMetadataManager().getOzoneDeletePathKey(pseudoObjId, ozoneKey);
11901191
if (deleteMap == null) {
@@ -1195,6 +1196,17 @@ protected static Map<String, RepeatedOmKeyInfo> addKeyInfoToDeleteMap(OzoneManag
11951196
return deleteMap;
11961197
}
11971198

1199+
protected static OmKeyInfo prepareKeyInfoForDeleteMap(long trxnLogIndex, OmKeyInfo keyInfo) {
1200+
if (keyInfo.getObjectID() == OBJECT_ID_RECLAIM_BLOCKS) {
1201+
return keyInfo.toBuilder()
1202+
.setUpdateID(trxnLogIndex)
1203+
.setSeqNumMin(trxnLogIndex)
1204+
.setSeqNumMax(trxnLogIndex)
1205+
.build();
1206+
}
1207+
return keyInfo;
1208+
}
1209+
11981210
/**
11991211
* Remove blocks in-place from keysToBeFiltered that exist in referenceKey.
12001212
* <p>

0 commit comments

Comments
 (0)