Skip to content

Commit b52a8da

Browse files
author
Kerkesni
committed
1 parent 682643d commit b52a8da

6 files changed

Lines changed: 142 additions & 73 deletions

File tree

lib/storage/metadata/MetadataWrapper.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class MetadataWrapper {
117117
replicaSet: params.mongodb.replicaSet,
118118
readPreference: params.mongodb.readPreference,
119119
database: params.mongodb.database,
120+
instanceId: params.instanceId,
120121
replicationGroupId: params.replicationGroupId,
121122
path: params.mongodb.path,
122123
authCredentials: params.mongodb.authCredentials,

lib/storage/metadata/mongoclient/MongoClientInterface.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import {
3737
} from 'mongodb';
3838
import { v4 as uuidv4 } from 'uuid';
3939

40-
import { generateUniqueVersionId } from '../../../versioning/VersionID';
40+
import { generateVersionId } from '../../../versioning/VersionID';
4141
import * as listAlgos from '../../../algos/list/exportAlgos';
4242
import LRUCache from '../../../algos/cache/LRUCache';
4343

@@ -98,6 +98,7 @@ export type MongoDBClientInterfaceParameters = {
9898
path: string,
9999
database: string,
100100
logger: werelogs.Logger,
101+
instanceId: string,
101102
replicationGroupId: string,
102103
authCredentials: MongoUtils.AuthCredentials,
103104
isLocationTransient: Function,
@@ -245,6 +246,7 @@ class MongoClientInterface {
245246
private client: MongoClient | null;
246247
private db: Db | null;
247248
private path: string;
249+
private instanceId: string;
248250
private replicationGroupId: string;
249251
private database: string;
250252
private isLocationTransient: Function;
@@ -261,7 +263,7 @@ class MongoClientInterface {
261263

262264
constructor(params: MongoDBClientInterfaceParameters) {
263265
const { replicaSetHosts, writeConcern, replicaSet, readPreference, path,
264-
database, logger, replicationGroupId, authCredentials,
266+
database, logger, instanceId, replicationGroupId, authCredentials,
265267
isLocationTransient, shardCollections } = params;
266268
const cred = MongoUtils.credPrefix(authCredentials);
267269
this.mongoUrl = `mongodb://${cred}${replicaSetHosts}/` +
@@ -276,6 +278,7 @@ class MongoClientInterface {
276278
this.adminDb = null;
277279
this.logger = logger;
278280
this.path = path;
281+
this.instanceId = instanceId;
279282
this.replicationGroupId = replicationGroupId;
280283
this.database = database;
281284
this.isLocationTransient = isLocationTransient;
@@ -837,7 +840,7 @@ class MongoClientInterface {
837840
cb: ArsenalCallback<string>,
838841
isRetry?: boolean,
839842
) {
840-
const versionId = generateUniqueVersionId(this.replicationGroupId);
843+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
841844
objVal.versionId = versionId;
842845
const versionKey = formatVersionKey(objName, versionId, params.vFormat);
843846
const masterKey = formatMasterKey(objName, params.vFormat);
@@ -964,7 +967,7 @@ class MongoClientInterface {
964967
log: werelogs.Logger,
965968
cb: ArsenalCallback<string>,
966969
) {
967-
const versionId = generateUniqueVersionId(this.replicationGroupId);
970+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
968971
objVal.versionId = versionId;
969972
const masterKey = formatMasterKey(objName, params.vFormat);
970973
c.updateOne({ _id: masterKey },
@@ -1795,7 +1798,7 @@ class MongoClientInterface {
17951798
) {
17961799
const masterKey = formatMasterKey(objName, params.vFormat);
17971800
const versionKey = formatVersionKey(objName, params.versionId, params.vFormat);
1798-
const _vid = generateUniqueVersionId(this.replicationGroupId);
1801+
const _vid = generateVersionId(this.instanceId, this.replicationGroupId);
17991802
async.series([
18001803
next => c.updateOne(
18011804
{

lib/versioning/VersionID.ts

Lines changed: 80 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,53 @@
1-
// VersionID format:
1+
// Hex VersionID format:
22
// timestamp sequential_position rep_group_id other_information
33
// where:
44
// - timestamp 14 bytes epoch in ms (good untill 5138)
55
// - sequential_position 06 bytes position in the ms slot (1B ops)
66
// - rep_group_id 07 bytes replication group identifier
77
// - other_information arbitrary user input, such as a unique string
8+
//
9+
// Base62 VersionID format:
10+
// timestamp sequential_position rep_group_id instance_id version_id_format
11+
// where:
12+
// - timestamp 14 bytes epoch in ms
13+
// - sequential_position 06 bytes position in the ms slot
14+
// - rep_group_id 07 bytes replication group identifier
15+
// - instance_id 06 bytes unique instance identifier (optional)
16+
// - version_id_format 02 bytes version ID format marker + version
817

918
import base62Integer from 'base62';
1019
import baseX from 'base-x';
20+
import assert from 'assert';
21+
import { VersioningConstants } from './constants';
1122
const BASE62 = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
1223
const base62String = baseX(BASE62);
1324

1425
// the lengths of the components in bytes
1526
const LENGTH_TS = 14; // timestamp: epoch in ms
1627
const LENGTH_SEQ = 6; // position in ms slot
1728
const LENGTH_RG = 7; // replication group id
29+
const LENGTH_ID = 6; // instance id, can be empty
30+
const LENGTH_FT = 2; // version ID format, 1 byte + separator
1831

1932
// empty string template for the variables in a versionId
2033
const TEMPLATE_TS = new Array(LENGTH_TS + 1).join('0');
2134
const TEMPLATE_SEQ = new Array(LENGTH_SEQ + 1).join('0');
22-
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join(' ');
35+
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join('0');
36+
const TEMPLATE_ID = new Array(LENGTH_ID + 1).join('0');
2337

2438
export const S3_VERSION_ID_ENCODING_TYPE = process.env.S3_VERSION_ID_ENCODING_TYPE;
2539

26-
// Counter that is increased after each call to generateUniqueVersionId
27-
export let uidCounter = 0;
28-
export const versionIdSeed = getVersionIdSeed();
40+
const versionIDFormat = '1';
41+
42+
/**
43+
* Check if the S3_VERSION_ID_ENCODING_TYPE is set to 'hex' or not set at all.
44+
* If it is, we use the legacy hex encoding for version IDs.
45+
*
46+
* @return - true if hex encoding type is used, false otherwise
47+
*/
48+
export function isHexEncodingType() {
49+
return S3_VERSION_ID_ENCODING_TYPE === 'hex' || !S3_VERSION_ID_ENCODING_TYPE;
50+
}
2951

3052
/**
3153
* Left-pad a string representation of a value with a given template.
@@ -91,23 +113,6 @@ function wait(span: number) {
91113
}
92114
}
93115

94-
export function getVersionIdSeed(): string {
95-
// The HOSTNAME environment variable is set by default by Kubernetes
96-
// and populated with the pod name, containing a suffix with a unique id
97-
// as a string.
98-
// By default, we rely on the pid, to account for multiple workers in
99-
// cluster mode. As a result, the unique id is either <pod-suffix>.<pid>
100-
// or <pid>.
101-
// If unique vID are needed in a multi cluster mode architecture (i.e.,
102-
// multiple server instances, each with multiple workers), the
103-
// HOSTNAME environment variable can be set.
104-
return `${process.env.HOSTNAME?.split('-').pop() || ''}${process.pid}`;
105-
}
106-
107-
export function generateUniqueVersionId(replicationGroupId: string): string {
108-
return generateVersionId(`${versionIdSeed}.${uidCounter++}`, replicationGroupId);
109-
}
110-
111116
/**
112117
* This function returns a "versionId" string indicating the current time as a
113118
* combination of the current time in millisecond, the position of the request
@@ -124,6 +129,21 @@ export function generateVersionId(info: string, replicationGroupId: string): str
124129
// replication group ID, like PARIS; will be trimmed if exceed LENGTH_RG
125130
const repGroupId = padRight(replicationGroupId, TEMPLATE_RG);
126131

132+
let otherInfo = '';
133+
let instanceIdPadded = '';
134+
let vidFormat = '';
135+
136+
if (isHexEncodingType()) {
137+
// In HEX encoding, the full info data is used.
138+
otherInfo = info;
139+
} else {
140+
// In base62, info is for the instance ID and is trimmed/padded.
141+
instanceIdPadded = padRight(info, TEMPLATE_ID);
142+
// versionID format, 2 bytes
143+
vidFormat = VersioningConstants.VersionId.FormatMarker + versionIDFormat;
144+
assert(vidFormat.length === LENGTH_FT, `versionID format must be ${LENGTH_FT} bytes`);
145+
}
146+
127147
// Need to wait for the millisecond slot got "flushed". We wait for
128148
// only a single millisecond when the module is restarted, which is
129149
// necessary for the correctness of the system. This is therefore cheap.
@@ -143,13 +163,6 @@ export function generateVersionId(info: string, replicationGroupId: string): str
143163
lastSeq = lastTimestamp === ts ? lastSeq + 1 : 0;
144164
lastTimestamp = ts;
145165

146-
// if S3_VERSION_ID_ENCODING_TYPE is "hex", info is used.
147-
if (S3_VERSION_ID_ENCODING_TYPE === 'hex' || !S3_VERSION_ID_ENCODING_TYPE) {
148-
// info field stays as is
149-
} else {
150-
info = '';
151-
}
152-
153166
// In the default cases, we reverse the chronological order of the
154167
// timestamps so that all versions of an object can be retrieved in the
155168
// reversed chronological order---newest versions first. This is because of
@@ -158,7 +171,9 @@ export function generateVersionId(info: string, replicationGroupId: string): str
158171
padLeft(MAX_TS - lastTimestamp, TEMPLATE_TS) +
159172
padLeft(MAX_SEQ - lastSeq, TEMPLATE_SEQ) +
160173
repGroupId +
161-
info
174+
otherInfo +
175+
instanceIdPadded +
176+
vidFormat
162177
);
163178
}
164179

@@ -271,6 +286,30 @@ export function base62Decode(str: string): string | Error {
271286
export const ENC_TYPE_HEX = 0; // legacy (large) encoding
272287
export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
273288

289+
/**
290+
* Checks if the given versionId string contains the specified format version.
291+
* For performance, this function assumes the format marker and version are always
292+
* located at the end of the versionId (at position versionId.length - LENGTH_FT).
293+
* This allows for O(1) access without scanning the entire string.
294+
*
295+
* @param versionId - The versionId string to check.
296+
* @param version - The expected format version.
297+
* @returns true if the versionId contains the format marker and version, false otherwise.
298+
*/
299+
function hasVersionIDFormat(versionId: string, version: string): boolean {
300+
const formatMarkerIdx = versionId.length - LENGTH_FT;
301+
const formatMarker = versionId.charAt(formatMarkerIdx);
302+
if (formatMarker !== VersioningConstants.VersionId.FormatMarker) {
303+
return false; // no format marker
304+
}
305+
const formatVersion = versionId.substring(formatMarkerIdx + 1);
306+
return formatVersion === version;
307+
}
308+
309+
const LEGACY_BASE62_DECODED_LENGTH = 27;
310+
const BASE62_DECODED_LENGTH = 35;
311+
const BASE62_ENCODED_LENGTH = 32;
312+
274313
/**
275314
* Encode a versionId to obscure internal information contained
276315
* in a version ID.
@@ -279,8 +318,9 @@ export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
279318
* @return - the encoded versionId
280319
*/
281320
export function encode(str: string): string {
282-
// default format without 'info' field will always be 27 characters
283-
if (str.length === 27) {
321+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
322+
// The new base62 format is 35 characters and includes the format marker at the end.
323+
if (str.length === LEGACY_BASE62_DECODED_LENGTH || hasVersionIDFormat(str, versionIDFormat)) {
284324
return base62Encode(str);
285325
} // legacy format
286326
return hexEncode(str);
@@ -296,15 +336,19 @@ export function encode(str: string): string {
296336
*/
297337
export function decode(str: string): string | Error {
298338
// default format is exactly 32 characters when encoded
299-
if (str.length === 32) {
339+
if (str.length === BASE62_ENCODED_LENGTH) {
300340
const decoded: string | Error = base62Decode(str);
301-
if (typeof decoded === 'string' && decoded.length !== 27) {
302-
return new Error(`decoded ${str} is not length 27`);
341+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
342+
// The new base62 format is always 35 characters long.
343+
if (typeof decoded === 'string' &&
344+
![LEGACY_BASE62_DECODED_LENGTH, BASE62_DECODED_LENGTH].includes(decoded.length)) {
345+
return new Error(`decoded ${str} is not length ` +
346+
`${LEGACY_BASE62_DECODED_LENGTH} or ${BASE62_DECODED_LENGTH}`);
303347
}
304348
return decoded;
305349
}
306350
// legacy format
307-
if (str.length > 32) {
351+
if (str.length > BASE62_ENCODED_LENGTH) {
308352
return hexDecode(str);
309353
}
310354
return new Error(`cannot decode str ${str.length}`);

lib/versioning/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export enum BucketVersioningFormat {
99
export const VersioningConstants = {
1010
VersionId: {
1111
Separator: '\0',
12+
FormatMarker: '\x1E',
1213
},
1314
DbPrefixes: {
1415
Master: '\x7fM',

tests/unit/versioning/VersionID.spec.js

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,29 +22,6 @@ function generateRandomVIDs(count) {
2222
const count = 1000000;
2323

2424
describe('test generating versionIds', () => {
25-
describe('getVersionIdSeed', () => {
26-
it('should return the correct versionIdSeed', () => {
27-
const versionIdSeed = VID.getVersionIdSeed();
28-
assert.strictEqual(versionIdSeed, process.pid.toString());
29-
});
30-
31-
it('should return the correct versionIdSeed when HOSTNAME is set', () => {
32-
process.env.HOSTNAME = 'test-pod-123';
33-
const versionIdSeed = VID.getVersionIdSeed();
34-
assert.strictEqual(versionIdSeed.startsWith('123'), true);
35-
});
36-
});
37-
38-
describe('generateUniqueVersionId', () => {
39-
it('should increase the uidCounter', () => {
40-
const versionId1 = VID.generateUniqueVersionId('somestring');
41-
const versionId2 = VID.generateUniqueVersionId('somestring');
42-
assert.notStrictEqual(versionId1, versionId2);
43-
assert(VID.uidCounter > 0);
44-
assert(VID.versionIdSeed);
45-
});
46-
});
47-
4825
describe('invalid IDs', () => {
4926
// A client can use the CLI to send requests with arbitrary version IDs.
5027
// These IDs may contain invalid characters and should be handled gracefully.
@@ -55,7 +32,7 @@ describe('test generating versionIds', () => {
5532
assert.strictEqual(decoded.message, 'Non-base62 character');
5633
});
5734
});
58-
describe('legaxy hex encoding', () => {
35+
describe('legacy hex encoding', () => {
5936
VID.S3_VERSION_ID_ENCODING_TYPE = 'hex';
6037
const vids = generateRandomVIDs(count);
6138

@@ -83,12 +60,39 @@ describe('test generating versionIds', () => {
8360
const encoded = vids.map(VID.encode);
8461
const decoded = encoded.map(VID.decode);
8562

86-
assert.strictEqual(vids.every(x => x.length > 27), true);
63+
assert.strictEqual(vids.every(x => x.length > 35), true);
8764
assert.strictEqual(encoded.every(x => x.length > 32), true);
8865
assert.deepStrictEqual(vids, decoded);
8966
});
90-
});
9167

68+
it('should not include format marker in legacy hex encoding', () => {
69+
assert.strictEqual(vids.some(vid => vid.includes('\x1E')), false);
70+
});
71+
72+
it('should encode and decode hex versionID with exactly Short ID length', () => {
73+
const versionID = '98248620612400999999RG00001145.20.5'; // 35 characters long
74+
const encoded = VID.encode(versionID);
75+
assert.strictEqual(encoded.length > 32, true);
76+
const decoded = VID.decode(encoded);
77+
assert.strictEqual(decoded, versionID);
78+
});
79+
80+
it('should encode and decode versionID with legacy Short ID length', () => {
81+
const versionID = '98248620612400999999RG00001'; // 27 characters long
82+
const encoded = VID.encode(versionID);
83+
assert.strictEqual(encoded.length === 32, true);
84+
const decoded = VID.decode(encoded);
85+
assert.strictEqual(decoded, versionID);
86+
});
87+
88+
it('should encode and decode Short ID', () => {
89+
const versionID = '98248700112011999999RG00001enr984\x1E1'; // 35 characters long
90+
const encoded = VID.encode(versionID);
91+
assert.strictEqual(encoded.length === 32, true);
92+
const decoded = VID.decode(encoded);
93+
assert.strictEqual(decoded, versionID);
94+
});
95+
});
9296

9397
describe('Short IDs', () => {
9498
VID.S3_VERSION_ID_ENCODING_TYPE = 'base62';
@@ -154,9 +158,25 @@ describe('test generating versionIds', () => {
154158
it('should encode and decode correctly with new 32 byte format', () => {
155159
const encoded = vids.map(vid => VID.encode(vid));
156160
const decoded = encoded.map(vid => VID.decode(vid));
157-
assert(vids.every(x => x.length === 27));
161+
assert(vids.every(x => x.length === 35));
158162
assert(encoded.every(x => x.length === 32));
159163
assert.deepStrictEqual(vids, decoded);
160164
});
165+
166+
it('should encode and decode legacy short versionID', () => {
167+
const legacyVID = '98248620612400999999RG00001'; // 27 characters long
168+
const encoded = VID.encode(legacyVID);
169+
assert.strictEqual(encoded.length === 32, true);
170+
const decoded = VID.decode(encoded);
171+
assert.strictEqual(decoded, legacyVID);
172+
});
173+
174+
it('should encode and decode legacy hex versionID', () => {
175+
const legacyVID = '98248620612400999999RG00001someinformation';
176+
const encoded = VID.encode(legacyVID);
177+
assert.strictEqual(encoded.length > 32, true);
178+
const decoded = VID.decode(encoded);
179+
assert.strictEqual(decoded, legacyVID);
180+
});
161181
});
162182
});

0 commit comments

Comments
 (0)