Skip to content

Commit bb9b416

Browse files
author
Kerkesni
committed
1 parent b0cd656 commit bb9b416

6 files changed

Lines changed: 227 additions & 133 deletions

File tree

lib/storage/metadata/MetadataWrapper.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class MetadataWrapper {
117117
replicaSet: params.mongodb.replicaSet,
118118
readPreference: params.mongodb.readPreference,
119119
database: params.mongodb.database,
120+
instanceId: params.instanceId,
120121
replicationGroupId: params.replicationGroupId,
121122
path: params.mongodb.path,
122123
authCredentials: params.mongodb.authCredentials,

lib/storage/metadata/mongoclient/MongoClientInterface.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import {
3737
import Uuid from 'uuid';
3838
import diskusage from 'diskusage';
3939

40-
import { generateUniqueVersionId, getVersionIdSeed } from '../../../versioning/VersionID';
40+
import { generateVersionId } from '../../../versioning/VersionID';
4141
import * as listAlgos from '../../../algos/list/exportAlgos';
4242
import LRUCache from '../../../algos/cache/LRUCache';
4343

@@ -84,6 +84,7 @@ export type MongoDBClientInterfaceParameters = {
8484
path: string,
8585
database: string,
8686
logger: werelogs.Logger,
87+
instanceId: string,
8788
replicationGroupId: string,
8889
authCredentials: MongoUtils.AuthCredentials,
8990
isLocationTransient: Function,
@@ -237,6 +238,7 @@ class MongoClientInterface {
237238
private client: MongoClient | null;
238239
private db: Db | null;
239240
private path: string;
241+
private instanceId: string;
240242
private replicationGroupId: string;
241243
private database: string;
242244
private isLocationTransient: Function;
@@ -253,7 +255,7 @@ class MongoClientInterface {
253255

254256
constructor(params: MongoDBClientInterfaceParameters) {
255257
const { replicaSetHosts, writeConcern, replicaSet, readPreference, path,
256-
database, logger, replicationGroupId, authCredentials,
258+
database, logger, instanceId, replicationGroupId, authCredentials,
257259
isLocationTransient, shardCollections } = params;
258260
const cred = MongoUtils.credPrefix(authCredentials);
259261
this.mongoUrl = `mongodb://${cred}${replicaSetHosts}/` +
@@ -268,6 +270,7 @@ class MongoClientInterface {
268270
this.adminDb = null;
269271
this.logger = logger;
270272
this.path = path;
273+
this.instanceId = instanceId;
271274
this.replicationGroupId = replicationGroupId;
272275
this.database = database;
273276
this.isLocationTransient = isLocationTransient;
@@ -819,7 +822,7 @@ class MongoClientInterface {
819822
cb: ArsenalCallback<string>,
820823
isRetry?: boolean,
821824
) {
822-
const versionId = generateUniqueVersionId(this.replicationGroupId);
825+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
823826
// eslint-disable-next-line
824827
objVal.versionId = versionId;
825828
const versionKey = formatVersionKey(objName, versionId, params.vFormat);
@@ -944,7 +947,7 @@ class MongoClientInterface {
944947
log: werelogs.Logger,
945948
cb: ArsenalCallback<string>,
946949
) {
947-
const versionId = generateUniqueVersionId(this.replicationGroupId);
950+
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
948951
// eslint-disable-next-line
949952
objVal.versionId = versionId;
950953
const masterKey = formatMasterKey(objName, params.vFormat);
@@ -1778,7 +1781,7 @@ class MongoClientInterface {
17781781
) {
17791782
const masterKey = formatMasterKey(objName, params.vFormat);
17801783
const versionKey = formatVersionKey(objName, params.versionId, params.vFormat);
1781-
const _vid = generateUniqueVersionId(this.replicationGroupId);
1784+
const _vid = generateVersionId(this.instanceId, this.replicationGroupId);
17821785
async.series([
17831786
next => c.updateOne(
17841787
{

lib/versioning/VersionID.ts

Lines changed: 97 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,69 @@
1-
// VersionID format:
1+
// Hex VersionID format:
22
// timestamp sequential_position rep_group_id other_information
33
// where:
44
// - timestamp 14 bytes epoch in ms (good untill 5138)
55
// - sequential_position 06 bytes position in the ms slot (1B ops)
66
// - rep_group_id 07 bytes replication group identifier
77
// - other_information arbitrary user input, such as a unique string
8+
//
9+
// Legacy Base62 VersionID:
10+
// timestamp sequential_position rep_group_id
11+
// where:
12+
// - timestamp 14 bytes epoch in ms
13+
// - sequential_position 06 bytes position in the ms slot
14+
// - rep_group_id 07 bytes replication group identifie
15+
//
16+
// Base62 VersionID:
17+
// timestamp sequential_position rep_group_id instance_id version_id_format
18+
// where:
19+
// - timestamp 14 bytes epoch in ms
20+
// - sequential_position 06 bytes position in the ms slot
21+
// - rep_group_id 07 bytes replication group identifier
22+
// - instance_id 06 bytes unique instance identifier
23+
// - version_id_format 02 bytes version ID format marker + version
824

925
import base62Integer from 'base62';
1026
import baseX from 'base-x';
27+
import assert from 'assert';
28+
import { VersioningConstants } from './constants';
1129
const BASE62 = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
1230
const base62String = baseX(BASE62);
1331

1432
// the lengths of the components in bytes
1533
const LENGTH_TS = 14; // timestamp: epoch in ms
1634
const LENGTH_SEQ = 6; // position in ms slot
1735
const LENGTH_RG = 7; // replication group id
36+
const LENGTH_ID = 6; // instance id
37+
const LENGTH_FT = 2; // version ID format, 1 byte + separator
1838

1939
// empty string template for the variables in a versionId
2040
const TEMPLATE_TS = new Array(LENGTH_TS + 1).join('0');
2141
const TEMPLATE_SEQ = new Array(LENGTH_SEQ + 1).join('0');
2242
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join(' ');
43+
const TEMPLATE_ID = new Array(LENGTH_ID + 1).join('0');
2344

24-
// Counter that is increased after each call to generateUniqueVersionId
25-
export let uidCounter = 0;
26-
export const versionIdSeed = getVersionIdSeed();
45+
export const S3_VERSION_ID_ENCODING_TYPE = process.env.S3_VERSION_ID_ENCODING_TYPE;
46+
47+
// Flag to enable the new version ID (35 characters) over legacy shortID format (27 characters).
48+
// When enabled and S3_VERSION_ID_ENCODING_TYPE is 'base62':
49+
// - Uses new format: timestamp + sequential_position + rep_group_id + instance_id + version_id_format
50+
// - Includes instance_id field to differentiate version IDs across multiple instances in the same k8s cluster
51+
// - Appends format marker and version identifier for format detection
52+
// When disabled and S3_VERSION_ID_ENCODING_TYPE is 'base62':
53+
// - Uses old format: timestamp + sequential_position + rep_group_id (legacy 27-char format)
54+
// Falls back to hex encoding if S3_VERSION_ID_ENCODING_TYPE is 'hex' or unset
55+
export const ENABLE_FORMATTED_VERSION_ID =
56+
process.env.ENABLE_FORMATTED_VERSION_ID === 'true' ||
57+
process.env.ENABLE_FORMATTED_VERSION_ID === '1';
58+
console.log(ENABLE_FORMATTED_VERSION_ID)
59+
// version ID format added to the end of the version ID
60+
const VERSION_ID_FORMAT_VERSION = '1';
61+
const VERSION_ID_FORMAT_SUFFIX = `${VersioningConstants.VersionId.FormatMarker}${VERSION_ID_FORMAT_VERSION}`;
62+
assert(VERSION_ID_FORMAT_SUFFIX.length === LENGTH_FT, `versionID format must be ${LENGTH_FT} bytes`);
63+
64+
const LEGACY_BASE62_DECODED_LENGTH = 27;
65+
const BASE62_DECODED_LENGTH = 35;
66+
const BASE62_ENCODED_LENGTH = 32;
2767

2868
/**
2969
* Left-pad a string representation of a value with a given template.
@@ -89,23 +129,6 @@ function wait(span: number) {
89129
}
90130
}
91131

92-
export function getVersionIdSeed(): string {
93-
// The HOSTNAME environment variable is set by default by Kubernetes
94-
// and populated with the pod name, containing a suffix with a unique id
95-
// as a string.
96-
// By default, we rely on the pid, to account for multiple workers in
97-
// cluster mode. As a result, the unique id is either <pod-suffix>.<pid>
98-
// or <pid>.
99-
// If unique vID are needed in a multi cluster mode architecture (i.e.,
100-
// multiple server instances, each with multiple workers), the
101-
// HOSTNAME environment variable can be set.
102-
return `${process.env.HOSTNAME?.split('-').pop() || ''}${process.pid}`;
103-
}
104-
105-
export function generateUniqueVersionId(replicationGroupId: string): string {
106-
return generateVersionId(`${versionIdSeed}.${uidCounter++}`, replicationGroupId);
107-
}
108-
109132
/**
110133
* This function returns a "versionId" string indicating the current time as a
111134
* combination of the current time in millisecond, the position of the request
@@ -122,6 +145,20 @@ export function generateVersionId(info: string, replicationGroupId: string): str
122145
// replication group ID, like PARIS; will be trimmed if exceed LENGTH_RG
123146
const repGroupId = padRight(replicationGroupId, TEMPLATE_RG);
124147

148+
let otherInfo = '';
149+
let instanceIdPadded = '';
150+
let formatSuffix = '';
151+
152+
if (!S3_VERSION_ID_ENCODING_TYPE || S3_VERSION_ID_ENCODING_TYPE === 'hex') {
153+
// In HEX encoding, the full info data is used.
154+
otherInfo = info;
155+
} else if (ENABLE_FORMATTED_VERSION_ID) {
156+
// In base62, info is for the instance ID and is trimmed/padded.
157+
instanceIdPadded = padRight(info, TEMPLATE_ID);
158+
// Add the version ID format marker and version.
159+
formatSuffix = VERSION_ID_FORMAT_SUFFIX;
160+
}
161+
125162
// Need to wait for the millisecond slot got "flushed". We wait for
126163
// only a single millisecond when the module is restarted, which is
127164
// necessary for the correctness of the system. This is therefore cheap.
@@ -141,13 +178,6 @@ export function generateVersionId(info: string, replicationGroupId: string): str
141178
lastSeq = lastTimestamp === ts ? lastSeq + 1 : 0;
142179
lastTimestamp = ts;
143180

144-
// if S3_VERSION_ID_ENCODING_TYPE is "hex", info is used.
145-
if (process.env.S3_VERSION_ID_ENCODING_TYPE === 'hex' || !process.env.S3_VERSION_ID_ENCODING_TYPE) {
146-
// info field stays as is
147-
} else {
148-
info = ''; // eslint-disable-line
149-
}
150-
151181
// In the default cases, we reverse the chronological order of the
152182
// timestamps so that all versions of an object can be retrieved in the
153183
// reversed chronological order---newest versions first. This is because of
@@ -156,7 +186,9 @@ export function generateVersionId(info: string, replicationGroupId: string): str
156186
padLeft(MAX_TS - lastTimestamp, TEMPLATE_TS) +
157187
padLeft(MAX_SEQ - lastSeq, TEMPLATE_SEQ) +
158188
repGroupId +
159-
info
189+
otherInfo +
190+
instanceIdPadded +
191+
formatSuffix
160192
);
161193
}
162194

@@ -269,6 +301,30 @@ export function base62Decode(str: string): string | Error {
269301
export const ENC_TYPE_HEX = 0; // legacy (large) encoding
270302
export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
271303

304+
/**
305+
* Checks if the given versionId string contains the specified format version.
306+
*
307+
* @param versionId - The versionId string to check.
308+
* @param version - The expected format version.
309+
* @returns true if the versionId contains the format marker and version, false otherwise.
310+
*/
311+
function hasVersionIDFormat(versionId: string, version: string): boolean {
312+
// Format marker can only exist after the required versionId sections.
313+
// This check removes the risk of looking for the format marker in the
314+
// replication group ID, which can technically contain any character as
315+
// it's set by the end user.
316+
if (versionId.length < LENGTH_TS + LENGTH_SEQ + LENGTH_RG + LENGTH_FT) {
317+
return false; // Not enough characters for format marker
318+
}
319+
// For constant time lookup, we always assume that the format marker is
320+
// at the end of the versionId.
321+
const formatMarkerIdx = versionId.length - LENGTH_FT;
322+
if (versionId.charAt(formatMarkerIdx) !== VersioningConstants.VersionId.FormatMarker) {
323+
return false; // no format marker
324+
}
325+
return versionId.substring(formatMarkerIdx + 1) === version; // check if the version matches
326+
}
327+
272328
/**
273329
* Encode a versionId to obscure internal information contained
274330
* in a version ID.
@@ -277,8 +333,9 @@ export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
277333
* @return - the encoded versionId
278334
*/
279335
export function encode(str: string): string {
280-
// default format without 'info' field will always be 27 characters
281-
if (str.length === 27) {
336+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
337+
// The new base62 format is 35 characters and includes the format marker at the end.
338+
if (str.length === LEGACY_BASE62_DECODED_LENGTH || hasVersionIDFormat(str, VERSION_ID_FORMAT_VERSION)) {
282339
return base62Encode(str);
283340
} // legacy format
284341
return hexEncode(str);
@@ -294,15 +351,20 @@ export function encode(str: string): string {
294351
*/
295352
export function decode(str: string): string | Error {
296353
// default format is exactly 32 characters when encoded
297-
if (str.length === 32) {
354+
if (str.length === BASE62_ENCODED_LENGTH) {
298355
const decoded: string | Error = base62Decode(str);
299-
if (typeof decoded === 'string' && decoded.length !== 27) {
300-
return new Error(`decoded ${str} is not length 27`);
356+
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
357+
// The new base62 format is always 35 characters long.
358+
if (typeof decoded === 'string' &&
359+
decoded.length !== LEGACY_BASE62_DECODED_LENGTH &&
360+
decoded.length !== BASE62_DECODED_LENGTH) {
361+
return new Error(`decoded ${str} is not length ` +
362+
`${LEGACY_BASE62_DECODED_LENGTH} or ${BASE62_DECODED_LENGTH}`);
301363
}
302364
return decoded;
303365
}
304366
// legacy format
305-
if (str.length > 32) {
367+
if (str.length > BASE62_ENCODED_LENGTH) {
306368
return hexDecode(str);
307369
}
308370
return new Error(`cannot decode str ${str.length}`);

lib/versioning/constants.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export enum BucketVersioningFormat {
1010
export const VersioningConstants = {
1111
VersionId: {
1212
Separator: '\0',
13+
FormatMarker: '?',
1314
},
1415
DbPrefixes: {
1516
Master: '\x7fM',

tests/functional/metadata/mongodb/putObject.spec.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
368368
};
369369

370370
// simulate a versionId collision by always generating the same versionId
371-
const genVID = sinon.stub(VersionID, 'generateUniqueVersionId')
371+
const genVID = sinon.stub(VersionID, 'generateVersionId')
372372
.returns('test-version-id');
373373

374374
async.series([
@@ -382,7 +382,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
382382
);
383383
// make sure the retry triggered on the first collision detection
384384
assert(genVID.calledThrice,
385-
`expected generateUniqueVersionId to be called thrice, got ${genVID.callCount} times`);
385+
`expected generateVersionId to be called thrice, got ${genVID.callCount} times`);
386386
done();
387387
});
388388
});
@@ -398,7 +398,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
398398
};
399399

400400
// simulate a versionId collision by always generating the same versionId
401-
const genVID = sinon.stub(VersionID, 'generateUniqueVersionId')
401+
const genVID = sinon.stub(VersionID, 'generateVersionId')
402402
.onFirstCall().returns('test-version-id')
403403
.onSecondCall().returns('test-version-id') // trigger collision
404404
.onThirdCall().returns('test-version-id-retry'); // change versionId on retry
@@ -412,7 +412,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
412412
assert.ifError(err, `expected no error, got ${err}`);
413413
// make sure the retry triggered on the first collision detection
414414
assert(genVID.calledThrice,
415-
`expected generateUniqueVersionId to be called thrice, got ${genVID.callCount} times`);
415+
`expected generateVersionId to be called thrice, got ${genVID.callCount} times`);
416416
// make sure the last call returned a different versionId
417417
const vid1 = JSON.parse(res[0]).versionId;
418418
const vid2 = JSON.parse(res[1]).versionId;

0 commit comments

Comments
 (0)