Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/storage/metadata/MetadataWrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ class MetadataWrapper {
replicaSet: params.mongodb.replicaSet,
readPreference: params.mongodb.readPreference,
database: params.mongodb.database,
instanceId: params.instanceId,
replicationGroupId: params.replicationGroupId,
path: params.mongodb.path,
authCredentials: params.mongodb.authCredentials,
Expand Down
13 changes: 8 additions & 5 deletions lib/storage/metadata/mongoclient/MongoClientInterface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import {
import Uuid from 'uuid';
import diskusage from 'diskusage';

import { generateUniqueVersionId, getVersionIdSeed } from '../../../versioning/VersionID';
import { generateVersionId } from '../../../versioning/VersionID';
import * as listAlgos from '../../../algos/list/exportAlgos';
import LRUCache from '../../../algos/cache/LRUCache';

Expand Down Expand Up @@ -84,6 +84,7 @@ export type MongoDBClientInterfaceParameters = {
path: string,
database: string,
logger: werelogs.Logger,
instanceId: string,
replicationGroupId: string,
authCredentials: MongoUtils.AuthCredentials,
isLocationTransient: Function,
Expand Down Expand Up @@ -237,6 +238,7 @@ class MongoClientInterface {
private client: MongoClient | null;
private db: Db | null;
private path: string;
private instanceId: string;
private replicationGroupId: string;
private database: string;
private isLocationTransient: Function;
Expand All @@ -253,7 +255,7 @@ class MongoClientInterface {

constructor(params: MongoDBClientInterfaceParameters) {
const { replicaSetHosts, writeConcern, replicaSet, readPreference, path,
database, logger, replicationGroupId, authCredentials,
database, logger, instanceId, replicationGroupId, authCredentials,
isLocationTransient, shardCollections } = params;
const cred = MongoUtils.credPrefix(authCredentials);
this.mongoUrl = `mongodb://${cred}${replicaSetHosts}/` +
Expand All @@ -268,6 +270,7 @@ class MongoClientInterface {
this.adminDb = null;
this.logger = logger;
this.path = path;
this.instanceId = instanceId;
this.replicationGroupId = replicationGroupId;
this.database = database;
this.isLocationTransient = isLocationTransient;
Expand Down Expand Up @@ -819,7 +822,7 @@ class MongoClientInterface {
cb: ArsenalCallback<string>,
isRetry?: boolean,
) {
const versionId = generateUniqueVersionId(this.replicationGroupId);
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
// eslint-disable-next-line
objVal.versionId = versionId;
const versionKey = formatVersionKey(objName, versionId, params.vFormat);
Expand Down Expand Up @@ -944,7 +947,7 @@ class MongoClientInterface {
log: werelogs.Logger,
cb: ArsenalCallback<string>,
) {
const versionId = generateUniqueVersionId(this.replicationGroupId);
const versionId = generateVersionId(this.instanceId, this.replicationGroupId);
// eslint-disable-next-line
objVal.versionId = versionId;
const masterKey = formatMasterKey(objName, params.vFormat);
Expand Down Expand Up @@ -1778,7 +1781,7 @@ class MongoClientInterface {
) {
const masterKey = formatMasterKey(objName, params.vFormat);
const versionKey = formatVersionKey(objName, params.versionId, params.vFormat);
const _vid = generateUniqueVersionId(this.replicationGroupId);
const _vid = generateVersionId(this.instanceId, this.replicationGroupId);
async.series([
next => c.updateOne(
{
Expand Down
132 changes: 97 additions & 35 deletions lib/versioning/VersionID.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,69 @@
// VersionID format:
// Hex VersionID format:
// timestamp sequential_position rep_group_id other_information
// where:
// - timestamp 14 bytes epoch in ms (good untill 5138)
// - sequential_position 06 bytes position in the ms slot (1B ops)
// - rep_group_id 07 bytes replication group identifier
// - other_information arbitrary user input, such as a unique string
//
// Legacy Base62 VersionID:
// timestamp sequential_position rep_group_id
// where:
// - timestamp 14 bytes epoch in ms
// - sequential_position 06 bytes position in the ms slot
// - rep_group_id 07 bytes replication group identifie
//
// Base62 VersionID:
// timestamp sequential_position rep_group_id instance_id version_id_format
// where:
// - timestamp 14 bytes epoch in ms
// - sequential_position 06 bytes position in the ms slot
// - rep_group_id 07 bytes replication group identifier
// - instance_id 06 bytes unique instance identifier
// - version_id_format 02 bytes version ID format marker + version

import base62Integer from 'base62';
import baseX from 'base-x';
import assert from 'assert';
import { VersioningConstants } from './constants';
const BASE62 = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
const base62String = baseX(BASE62);

// the lengths of the components in bytes
const LENGTH_TS = 14; // timestamp: epoch in ms
const LENGTH_SEQ = 6; // position in ms slot
const LENGTH_RG = 7; // replication group id
const LENGTH_ID = 6; // instance id
const LENGTH_FT = 2; // version ID format, 1 byte + separator

// empty string template for the variables in a versionId
const TEMPLATE_TS = new Array(LENGTH_TS + 1).join('0');
const TEMPLATE_SEQ = new Array(LENGTH_SEQ + 1).join('0');
const TEMPLATE_RG = new Array(LENGTH_RG + 1).join(' ');
const TEMPLATE_ID = new Array(LENGTH_ID + 1).join('0');

export const S3_VERSION_ID_ENCODING_TYPE = process.env.S3_VERSION_ID_ENCODING_TYPE;

// Flag to enable the new version ID (35 characters) over legacy shortID format (27 characters).
// When enabled and S3_VERSION_ID_ENCODING_TYPE is 'base62':
// - Uses new format: timestamp + sequential_position + rep_group_id + instance_id + version_id_format
// - Includes instance_id field to differentiate version IDs across multiple instances in the same k8s cluster
// - Appends format marker and version identifier for format detection
// When disabled and S3_VERSION_ID_ENCODING_TYPE is 'base62':
// - Uses old format: timestamp + sequential_position + rep_group_id (legacy 27-char format)
// Falls back to hex encoding if S3_VERSION_ID_ENCODING_TYPE is 'hex' or unset
export const ENABLE_FORMATTED_VERSION_ID =
process.env.ENABLE_FORMATTED_VERSION_ID === 'true' ||
process.env.ENABLE_FORMATTED_VERSION_ID === '1';

// version ID format added to the end of the version ID
const VERSION_ID_FORMAT_VERSION = '1';
const VERSION_ID_FORMAT_SUFFIX = `${VersioningConstants.VersionId.FormatMarker}${VERSION_ID_FORMAT_VERSION}`;
assert(VERSION_ID_FORMAT_SUFFIX.length === LENGTH_FT, `versionID format must be ${LENGTH_FT} bytes`);

// Counter that is increased after each call to generateUniqueVersionId
export let uidCounter = 0;
export const versionIdSeed = getVersionIdSeed();
const LEGACY_BASE62_DECODED_LENGTH = 27;
const BASE62_DECODED_LENGTH = 35;
const BASE62_ENCODED_LENGTH = 32;

/**
* Left-pad a string representation of a value with a given template.
Expand Down Expand Up @@ -89,23 +129,6 @@ function wait(span: number) {
}
}

export function getVersionIdSeed(): string {
// The HOSTNAME environment variable is set by default by Kubernetes
// and populated with the pod name, containing a suffix with a unique id
// as a string.
// By default, we rely on the pid, to account for multiple workers in
// cluster mode. As a result, the unique id is either <pod-suffix>.<pid>
// or <pid>.
// If unique vID are needed in a multi cluster mode architecture (i.e.,
// multiple server instances, each with multiple workers), the
// HOSTNAME environment variable can be set.
return `${process.env.HOSTNAME?.split('-').pop() || ''}${process.pid}`;
}

export function generateUniqueVersionId(replicationGroupId: string): string {
return generateVersionId(`${versionIdSeed}.${uidCounter++}`, replicationGroupId);
}

/**
* This function returns a "versionId" string indicating the current time as a
* combination of the current time in millisecond, the position of the request
Expand All @@ -122,6 +145,20 @@ export function generateVersionId(info: string, replicationGroupId: string): str
// replication group ID, like PARIS; will be trimmed if exceed LENGTH_RG
const repGroupId = padRight(replicationGroupId, TEMPLATE_RG);

let otherInfo = '';
let instanceIdPadded = '';
let formatSuffix = '';

if (!S3_VERSION_ID_ENCODING_TYPE || S3_VERSION_ID_ENCODING_TYPE === 'hex') {
// In HEX encoding, the full info data is used.
otherInfo = info;
} else if (ENABLE_FORMATTED_VERSION_ID) {
// In base62, info is for the instance ID and is trimmed/padded.
instanceIdPadded = padRight(info, TEMPLATE_ID);
// Add the version ID format marker and version.
formatSuffix = VERSION_ID_FORMAT_SUFFIX;
}

// Need to wait for the millisecond slot got "flushed". We wait for
// only a single millisecond when the module is restarted, which is
// necessary for the correctness of the system. This is therefore cheap.
Expand All @@ -141,13 +178,6 @@ export function generateVersionId(info: string, replicationGroupId: string): str
lastSeq = lastTimestamp === ts ? lastSeq + 1 : 0;
lastTimestamp = ts;

// if S3_VERSION_ID_ENCODING_TYPE is "hex", info is used.
if (process.env.S3_VERSION_ID_ENCODING_TYPE === 'hex' || !process.env.S3_VERSION_ID_ENCODING_TYPE) {
// info field stays as is
} else {
info = ''; // eslint-disable-line
}

// In the default cases, we reverse the chronological order of the
// timestamps so that all versions of an object can be retrieved in the
// reversed chronological order---newest versions first. This is because of
Expand All @@ -156,7 +186,9 @@ export function generateVersionId(info: string, replicationGroupId: string): str
padLeft(MAX_TS - lastTimestamp, TEMPLATE_TS) +
padLeft(MAX_SEQ - lastSeq, TEMPLATE_SEQ) +
repGroupId +
info
otherInfo +
instanceIdPadded +
formatSuffix
);
}

Expand Down Expand Up @@ -269,6 +301,30 @@ export function base62Decode(str: string): string | Error {
export const ENC_TYPE_HEX = 0; // legacy (large) encoding
export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding

/**
* Checks if the given versionId string contains the specified format version.
*
* @param versionId - The versionId string to check.
* @param version - The expected format version.
* @returns true if the versionId contains the format marker and version, false otherwise.
*/
function hasVersionIDFormat(versionId: string, version: string): boolean {
// Format marker can only exist after the required versionId sections.
// This check removes the risk of looking for the format marker in the
// replication group ID, which can technically contain any character as
// it's set by the end user.
if (versionId.length < LENGTH_TS + LENGTH_SEQ + LENGTH_RG + LENGTH_FT) {
return false; // Not enough characters for format marker
}
// For constant time lookup, we always assume that the format marker is
// at the end of the versionId.
const formatMarkerIdx = versionId.length - LENGTH_FT;
if (versionId.charAt(formatMarkerIdx) !== VersioningConstants.VersionId.FormatMarker) {
return false; // no format marker
}
return versionId.substring(formatMarkerIdx + 1) === version; // check if the version matches
}

/**
* Encode a versionId to obscure internal information contained
* in a version ID.
Expand All @@ -277,8 +333,9 @@ export const ENC_TYPE_BASE62 = 1; // new (tiny) encoding
* @return - the encoded versionId
*/
export function encode(str: string): string {
// default format without 'info' field will always be 27 characters
if (str.length === 27) {
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
// The new base62 format is 35 characters and includes the format marker at the end.
if (str.length === LEGACY_BASE62_DECODED_LENGTH || hasVersionIDFormat(str, VERSION_ID_FORMAT_VERSION)) {
return base62Encode(str);
} // legacy format
return hexEncode(str);
Expand All @@ -294,15 +351,20 @@ export function encode(str: string): string {
*/
export function decode(str: string): string | Error {
// default format is exactly 32 characters when encoded
if (str.length === 32) {
if (str.length === BASE62_ENCODED_LENGTH) {
const decoded: string | Error = base62Decode(str);
if (typeof decoded === 'string' && decoded.length !== 27) {
return new Error(`decoded ${str} is not length 27`);
// Legacy base62 version IDs (without 'info' field) are always 27 characters long.
// The new base62 format is always 35 characters long.
if (typeof decoded === 'string' &&
decoded.length !== LEGACY_BASE62_DECODED_LENGTH &&
decoded.length !== BASE62_DECODED_LENGTH) {
return new Error(`decoded ${str} is not length ` +
`${LEGACY_BASE62_DECODED_LENGTH} or ${BASE62_DECODED_LENGTH}`);
}
return decoded;
}
// legacy format
if (str.length > 32) {
if (str.length > BASE62_ENCODED_LENGTH) {
return hexDecode(str);
}
return new Error(`cannot decode str ${str.length}`);
Expand Down
1 change: 1 addition & 0 deletions lib/versioning/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export enum BucketVersioningFormat {
export const VersioningConstants = {
VersionId: {
Separator: '\0',
FormatMarker: '?',
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note for reviewers:

I updated the versionID format marker from the record separator control character (\x1E) to ? for better compatibility and performance. This way we avoid having to parse the versionID every time we read it from the DB.

The ? character is safe to use because it never appears in the info section of existing hex-encoded versionIDs. In both our metadata implementations, the info field contains only numeric values and basic punctuation:

Metadata: ${info.term}.${info.aseq}.${this.uidCounter++}
MongoDB: ${process.pid}.${uidCounter++}

Also, the new versionID format is disabled by default, this will allow keeping the same branches and decide when to enabled it through feature flags or other means.

},
DbPrefixes: {
Master: '\x7fM',
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"engines": {
"node": ">=16"
},
"version": "8.1.162",
"version": "8.1.163",
"description": "Common utilities for the S3 project components",
"main": "build/index.js",
"repository": {
Expand Down
8 changes: 4 additions & 4 deletions tests/functional/metadata/mongodb/putObject.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
};

// simulate a versionId collision by always generating the same versionId
const genVID = sinon.stub(VersionID, 'generateUniqueVersionId')
const genVID = sinon.stub(VersionID, 'generateVersionId')
.returns('test-version-id');

async.series([
Expand All @@ -382,7 +382,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
);
// make sure the retry triggered on the first collision detection
assert(genVID.calledThrice,
`expected generateUniqueVersionId to be called thrice, got ${genVID.callCount} times`);
`expected generateVersionId to be called thrice, got ${genVID.callCount} times`);
done();
});
});
Expand All @@ -398,7 +398,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
};

// simulate a versionId collision by always generating the same versionId
const genVID = sinon.stub(VersionID, 'generateUniqueVersionId')
const genVID = sinon.stub(VersionID, 'generateVersionId')
.onFirstCall().returns('test-version-id')
.onSecondCall().returns('test-version-id') // trigger collision
.onThirdCall().returns('test-version-id-retry'); // change versionId on retry
Expand All @@ -412,7 +412,7 @@ describe('MongoClientInterface:metadata.putObjectMD', () => {
assert.ifError(err, `expected no error, got ${err}`);
// make sure the retry triggered on the first collision detection
assert(genVID.calledThrice,
`expected generateUniqueVersionId to be called thrice, got ${genVID.callCount} times`);
`expected generateVersionId to be called thrice, got ${genVID.callCount} times`);
// make sure the last call returned a different versionId
const vid1 = JSON.parse(res[0]).versionId;
const vid2 = JSON.parse(res[1]).versionId;
Expand Down
Loading
Loading