Skip to content

Commit d462812

Browse files
author
Kerkesni
committed
1 parent 8e90d70 commit d462812

6 files changed

Lines changed: 146 additions & 75 deletions

File tree

lib/storage/metadata/MetadataWrapper.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class MetadataWrapper {
117117
replicaSet: params.mongodb.replicaSet,
118118
readPreference: params.mongodb.readPreference,
119119
database: params.mongodb.database,
120+
instanceId: params.instanceId,
120121
replicationGroupId: params.replicationGroupId,
121122
path: params.mongodb.path,
122123
authCredentials: params.mongodb.authCredentials,

lib/storage/metadata/mongoclient/MongoClientInterface.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import {
3737
import Uuid from 'uuid';
3838
import diskusage from 'diskusage';
3939

40-
import { generateUniqueVersionId, getVersionIdSeed } from '../../../versioning/VersionID';
40+
import { generateVersionId } from '../../../versioning/VersionID';
4141
import * as listAlgos from '../../../algos/list/exportAlgos';
4242
import LRUCache from '../../../algos/cache/LRUCache';
4343

@@ -84,6 +84,7 @@ export type MongoDBClientInterfaceParameters = {
8484
path: string,
8585
database: string,
8686
logger: werelogs.Logger,
87+
instanceId: string,
8788
replicationGroupId: string,
8889
authCredentials: MongoUtils.AuthCredentials,
8990
isLocationTransient: Function,
@@ -237,6 +238,7 @@ class MongoClientInterface {
237238
private client: MongoClient | null;
238239
private db: Db | null;
239240
private path: string;
241+
private instanceId: string;
240242
private replicationGroupId: string;
241243
private database: string;
242244
private isLocationTransient: Function;
@@ -253,7 +255,7 @@ class MongoClientInterface {
253255

254256
constructor(params: MongoDBClientInterfaceParameters) {
255257
const { replicaSetHosts, writeConcern, replicaSet, readPreference, path,
256-
database, logger, replicationGroupId, authCredentials,
258+
database, logger, instanceId, replicationGroupId, authCredentials,
257259
isLocationTransient, shardCollections } = params;
258260
const cred = MongoUtils.credPrefix(authCredentials);
259261
this.mongoUrl = `mongodb://${cred}${replicaSetHosts}/` +
@@ -268,6 +270,7 @@ class MongoClientInterface {
268270
this.adminDb = null;
269271
this.logger = logger;
270272
this.path = path;
273+
this.instanceId = instanceId;
271274
this.replicationGroupId = replicationGroupId;
272275
this.database = database;
273276
this.isLocationTransient = isLocationTransient;
@@ -819,7 +822,7 @@ class MongoClientInterface {
819822
cb: ArsenalCallback<string>,
820823
isRetry?: boolean,
821824
) {
822-
const versionId = generateUniqueVersionId(this.replicationGroupId);
825+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
823826
// eslint-disable-next-line
824827
objVal.versionId = versionId;
825828
const versionKey = formatVersionKey(objName, versionId, params.vFormat);
@@ -947,7 +950,7 @@ class MongoClientInterface {
947950
log: werelogs.Logger,
948951
cb: ArsenalCallback<string>,
949952
) {
950-
const versionId = generateUniqueVersionId(this.replicationGroupId);
953+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
951954
// eslint-disable-next-line
952955
objVal.versionId = versionId;
953956
const masterKey = formatMasterKey(objName, params.vFormat);
@@ -1781,7 +1784,7 @@ class MongoClientInterface {
17811784
) {
17821785
const masterKey = formatMasterKey(objName, params.vFormat);
17831786
const versionKey = formatVersionKey(objName, params.versionId, params.vFormat);
1784-
const _vid = generateUniqueVersionId(this.replicationGroupId);
1787+
const _vid = generateVersionId(this.instanceId, this.replicationGroupId);
17851788
async.series([
17861789
next => c.updateOne(
17871790
{

lib/versioning/VersionID.ts

Lines changed: 82 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,53 @@
1-
// VersionID format:
1+
// Hex VersionID format:
22
// timestamp sequential_position rep_group_id other_information
33
// where:
44
// - timestamp 14 bytes epoch in ms (good untill 5138)
55
// - sequential_position 06 bytes position in the ms slot (1B ops)
66
// - rep_group_id 07 bytes replication group identifier
77
// - other_information arbitrary user input, such as a unique string
8+
//
9+
// Base62 VersionID format:
10+
// timestamp sequential_position rep_group_id instance_id version_id_format
11+
// where:
12+
// - timestamp 14 bytes epoch in ms
13+
// - sequential_position 06 bytes position in the ms slot
14+
// - rep_group_id 07 bytes replication group identifier
15+
// - instance_id 06 bytes unique instance identifier (optional)
16+
// - version_id_format 02 bytes version ID format marker + version
817

918
import base62Integer from 'base62';
1019
import baseX from 'base-x';
20+
import assert from 'assert';
21+
import { VersioningConstants } from './constants';
1122
const BASE62 = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
1223
const base62String = baseX(BASE62);
1324

1425
// the lengths of the components in bytes
1526
const LENGTH_TS = 14; // timestamp: epoch in ms
1627
const LENGTH_SEQ = 6; // position in ms slot
1728
const LENGTH_RG = 7; // replication group id
29+
const LENGTH_ID = 6; // instance id, can be empty
30+
const LENGTH_FT = 2; // version ID format, 1 byte + separator
1831

1932
// empty string template for the variables in a versionId
2033
const TEMPLATE_TS = new Array(LENGTH_TS + 1).join('0');
2134
const TEMPLATE_SEQ = new Array(LENGTH_SEQ + 1).join('0');
22-
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join(' ');
35+
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join('0');
36+
const TEMPLATE_ID = new Array(LENGTH_ID + 1).join('0');
2337

24-
// Counter that is increased after each call to generateUniqueVersionId
25-
export let uidCounter = 0;
26-
export const versionIdSeed = getVersionIdSeed();
38+
export const S3_VERSION_ID_ENCODING_TYPE = process.env.S3_VERSION_ID_ENCODING_TYPE;
39+
40+
const versionIDFormat = '1';
41+
42+
/**
43+
* Check if the S3_VERSION_ID_ENCODING_TYPE is set to 'hex' or not set at all.
44+
* If it is, we use the legacy hex encoding for version IDs.
45+
*
46+
* @return - true if hex encoding type is used, false otherwise
47+
*/
48+
export function isHexEncodingType() {
49+
return S3_VERSION_ID_ENCODING_TYPE === 'hex' || !S3_VERSION_ID_ENCODING_TYPE;
50+
}
2751

2852
/**
2953
* Left-pad a string representation of a value with a given template.
@@ -89,23 +113,6 @@ function wait(span: number) {
89113
}
90114
}
91115

92-
export function getVersionIdSeed(): string {
93-
// The HOSTNAME environment variable is set by default by Kubernetes
94-
// and populated with the pod name, containing a suffix with a unique id
95-
// as a string.
96-
// By default, we rely on the pid, to account for multiple workers in
97-
// cluster mode. As a result, the unique id is either <pod-suffix>.<pid>
98-
// or <pid>.
99-
// If unique vID are needed in a multi cluster mode architecture (i.e.,
100-
// multiple server instances, each with multiple workers), the
101-
// HOSTNAME environment variable can be set.
102-
return `${process.env.HOSTNAME?.split('-').pop() || ''}${process.pid}`;
103-
}
104-
105-
export function generateUniqueVersionId(replicationGroupId: string): string {
106-
return generateVersionId(`${versionIdSeed}.${uidCounter++}`, replicationGroupId);
107-
}
108-
109116
/**
110117
* This function returns a "versionId" string indicating the current time as a
111118
* combination of the current time in millisecond, the position of the request
@@ -122,6 +129,21 @@ export function generateVersionId(info: string, replicationGroupId: string): str
122129
// replication group ID, like PARIS; will be trimmed if exceed LENGTH_RG
123130
const repGroupId = padRight(replicationGroupId, TEMPLATE_RG);
124131

132+
let otherInfo = '';
133+
let instanceIdPadded = '';
134+
let vidFormat = '';
135+
136+
if (isHexEncodingType()) {
137+
// In HEX encoding, the full info data is used.
138+
otherInfo = info;
139+
} else {
140+
// In base62, info is for the instance ID and is trimmed/padded.
141+
instanceIdPadded = padRight(info, TEMPLATE_ID);
142+
// versionID format, 2 bytes
143+
vidFormat = VersioningConstants.VersionId.FormatMarker + versionIDFormat;
144+
assert(vidFormat.length === LENGTH_FT, `versionID format must be ${LENGTH_FT} bytes`);
145+
}
146+
125147
// Need to wait for the millisecond slot got "flushed". We wait for
126148
// only a single millisecond when the module is restarted, which is
127149
// necessary for the correctness of the system. This is therefore cheap.
@@ -141,13 +163,6 @@ export function generateVersionId(info: string, replicationGroupId: string): str
141163
lastSeq = lastTimestamp === ts ? lastSeq + 1 : 0;
142164
lastTimestamp = ts;
143165

144-
// if S3_VERSION_ID_ENCODING_TYPE is "hex", info is used.
145-
if (process.env.S3_VERSION_ID_ENCODING_TYPE === 'hex' || !process.env.S3_VERSION_ID_ENCODING_TYPE) {
146-
// info field stays as is
147-
} else {
148-
info = ''; // eslint-disable-line
149-
}
150-
151166
// In the default cases, we reverse the chronological order of the
152167
// timestamps so that all versions of an object can be retrieved in the
153168
// reversed chronological order---newest versions first. This is because of
@@ -156,7 +171,9 @@ export function generateVersionId(info: string, replicationGroupId: string): str
156171
padLeft(MAX_TS - lastTimestamp, TEMPLATE_TS) +
157172
padLeft(MAX_SEQ - lastSeq, TEMPLATE_SEQ) +
158173
repGroupId +
159-
info
174+
otherInfo +
175+
instanceIdPadded +
176+
vidFormat
160177
);
161178
}
162179

@@ -269,6 +286,30 @@ export function base62Decode(str: string): string | Error {
269286
export const ENC_TYPE_HEX = 0; // legacy (large) encoding
270287
export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
271288

289+
/**
290+
* Checks if the given versionId string contains the specified format version.
291+
* For performance, this function assumes the format marker and version are always
292+
* located at the end of the versionId (at position versionId.length - LENGTH_FT).
293+
* This allows for O(1) access without scanning the entire string.
294+
*
295+
* @param versionId - The versionId string to check.
296+
* @param version - The expected format version.
297+
* @returns true if the versionId contains the format marker and version, false otherwise.
298+
*/
299+
function hasVersionIDFormat(versionId: string, version: string): boolean {
300+
const formatMarkerIdx = versionId.length - LENGTH_FT;
301+
const formatMarker = versionId.charAt(formatMarkerIdx);
302+
if (formatMarker !== VersioningConstants.VersionId.FormatMarker) {
303+
return false; // no format marker
304+
}
305+
const formatVersion = versionId.substring(formatMarkerIdx + 1);
306+
return formatVersion === version;
307+
}
308+
309+
const LEGACY_BASE62_DECODED_LENGTH = 27;
310+
const BASE62_DECODED_LENGTH = 35;
311+
const BASE62_ENCODED_LENGTH = 32;
312+
272313
/**
273314
* Encode a versionId to obscure internal information contained
274315
* in a version ID.
@@ -277,8 +318,9 @@ export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
277318
* @return - the encoded versionId
278319
*/
279320
export function encode(str: string): string {
280-
// default format without 'info' field will always be 27 characters
281-
if (str.length === 27) {
321+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
322+
// The new base62 format is 35 characters and includes the format marker at the end.
323+
if (str.length === LEGACY_BASE62_DECODED_LENGTH || hasVersionIDFormat(str, versionIDFormat)) {
282324
return base62Encode(str);
283325
} // legacy format
284326
return hexEncode(str);
@@ -294,15 +336,19 @@ export function encode(str: string): string {
294336
*/
295337
export function decode(str: string): string | Error {
296338
// default format is exactly 32 characters when encoded
297-
if (str.length === 32) {
339+
if (str.length === BASE62_ENCODED_LENGTH) {
298340
const decoded: string | Error = base62Decode(str);
299-
if (typeof decoded === 'string' && decoded.length !== 27) {
300-
return new Error(`decoded ${str} is not length 27`);
341+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
342+
// The new base62 format is always 35 characters long.
343+
if (typeof decoded === 'string' &&
344+
![LEGACY_BASE62_DECODED_LENGTH, BASE62_DECODED_LENGTH].includes(decoded.length)) {
345+
return new Error(`decoded ${str} is not length ` +
346+
`${LEGACY_BASE62_DECODED_LENGTH} or ${BASE62_DECODED_LENGTH}`);
301347
}
302348
return decoded;
303349
}
304350
// legacy format
305-
if (str.length > 32) {
351+
if (str.length > BASE62_ENCODED_LENGTH) {
306352
return hexDecode(str);
307353
}
308354
return new Error(`cannot decode str ${str.length}`);

lib/versioning/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export enum BucketVersioningFormat {
1010
export const VersioningConstants = {
1111
VersionId: {
1212
Separator: '\0',
13+
FormatMarker: '\x1E',
1314
},
1415
DbPrefixes: {
1516
Master: '\x7fM',

tests/unit/versioning/VersionID.spec.js

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,6 @@ function generateRandomVIDs(count) {
2323
const count = 1000000;
2424

2525
describe('test generating versionIds', () => {
26-
describe('getVersionIdSeed', () => {
27-
it('should return the correct versionIdSeed', () => {
28-
const versionIdSeed = VID.getVersionIdSeed();
29-
assert.strictEqual(versionIdSeed, process.pid.toString());
30-
});
31-
32-
it('should return the correct versionIdSeed when HOSTNAME is set', () => {
33-
process.env.HOSTNAME = 'test-pod-123';
34-
const versionIdSeed = VID.getVersionIdSeed();
35-
assert.strictEqual(versionIdSeed.startsWith('123'), true);
36-
});
37-
});
38-
39-
describe('generateUniqueVersionId', () => {
40-
it('should increase the uidCounter', () => {
41-
const versionId1 = VID.generateUniqueVersionId('somestring');
42-
const versionId2 = VID.generateUniqueVersionId('somestring');
43-
assert.notStrictEqual(versionId1, versionId2);
44-
assert(VID.uidCounter > 0);
45-
assert(VID.versionIdSeed);
46-
});
47-
});
48-
4926
describe('invalid IDs', () => {
5027
// A client can use the CLI to send requests with arbitrary version IDs.
5128
// These IDs may contain invalid characters and should be handled gracefully.
@@ -56,8 +33,8 @@ describe('test generating versionIds', () => {
5633
assert.strictEqual(decoded.message, 'Non-base62 character');
5734
});
5835
});
59-
describe('legaxy hex encoding', () => {
60-
env.S3_VERSION_ID_ENCODING_TYPE = 'hex';
36+
describe('legacy hex encoding', () => {
37+
VID.S3_VERSION_ID_ENCODING_TYPE = 'hex';
6138
const vids = generateRandomVIDs(count);
6239

6340
it('sorted in reversed chronological and alphabetical order', () => {
@@ -84,15 +61,42 @@ describe('test generating versionIds', () => {
8461
const encoded = vids.map(VID.encode);
8562
const decoded = encoded.map(VID.decode);
8663

87-
assert.strictEqual(vids.every(x => x.length > 27), true);
64+
assert.strictEqual(vids.every(x => x.length > 35), true);
8865
assert.strictEqual(encoded.every(x => x.length > 32), true);
8966
assert.deepStrictEqual(vids, decoded);
9067
});
91-
});
9268

69+
it('should not include format marker in legacy hex encoding', () => {
70+
assert.strictEqual(vids.some(vid => vid.includes('\x1E')), false);
71+
});
72+
73+
it('should encode and decode hex versionID with exactly Short ID length', () => {
74+
const versionID = '98248620612400999999RG00001145.20.5'; // 35 characters long
75+
const encoded = VID.encode(versionID);
76+
assert.strictEqual(encoded.length > 32, true);
77+
const decoded = VID.decode(encoded);
78+
assert.strictEqual(decoded, versionID);
79+
});
80+
81+
it('should encode and decode versionID with legacy Short ID length', () => {
82+
const versionID = '98248620612400999999RG00001'; // 27 characters long
83+
const encoded = VID.encode(versionID);
84+
assert.strictEqual(encoded.length === 32, true);
85+
const decoded = VID.decode(encoded);
86+
assert.strictEqual(decoded, versionID);
87+
});
88+
89+
it('should encode and decode Short ID', () => {
90+
const versionID = '98248700112011999999RG00001enr984\x1E1'; // 35 characters long
91+
const encoded = VID.encode(versionID);
92+
assert.strictEqual(encoded.length === 32, true);
93+
const decoded = VID.decode(encoded);
94+
assert.strictEqual(decoded, versionID);
95+
});
96+
});
9397

9498
describe('Short IDs', () => {
95-
env.S3_VERSION_ID_ENCODING_TYPE = 'base62';
99+
VID.S3_VERSION_ID_ENCODING_TYPE = 'base62';
96100
const vids = generateRandomVIDs(count);
97101

98102
it('sorted in reversed chronological and alphabetical order', () => {
@@ -155,9 +159,25 @@ describe('test generating versionIds', () => {
155159
it('should encode and decode correctly with new 32 byte format', () => {
156160
const encoded = vids.map(vid => VID.encode(vid));
157161
const decoded = encoded.map(vid => VID.decode(vid));
158-
assert(vids.every(x => x.length === 27));
162+
assert(vids.every(x => x.length === 35));
159163
assert(encoded.every(x => x.length === 32));
160164
assert.deepStrictEqual(vids, decoded);
161165
});
166+
167+
it('should encode and decode legacy short versionID', () => {
168+
const legacyVID = '98248620612400999999RG00001'; // 27 characters long
169+
const encoded = VID.encode(legacyVID);
170+
assert.strictEqual(encoded.length === 32, true);
171+
const decoded = VID.decode(encoded);
172+
assert.strictEqual(decoded, legacyVID);
173+
});
174+
175+
it('should encode and decode legacy hex versionID', () => {
176+
const legacyVID = '98248620612400999999RG00001someinformation';
177+
const encoded = VID.encode(legacyVID);
178+
assert.strictEqual(encoded.length > 32, true);
179+
const decoded = VID.decode(encoded);
180+
assert.strictEqual(decoded, legacyVID);
181+
});
162182
});
163183
});

0 commit comments

Comments
 (0)