scality
diff --git a/‎lib/api/apiUtils/integrity/crcCombine.js‎
Lines changed: 173 additions & 0 deletions b/‎lib/api/apiUtils/integrity/crcCombine.js‎
Lines changed: 173 additions & 0 deletions
diff --git a/‎lib/api/apiUtils/integrity/validateChecksums.js‎
Lines changed: 82 additions & 0 deletions b/‎lib/api/apiUtils/integrity/validateChecksums.js‎
Lines changed: 82 additions & 0 deletions
@@ -0,0 +1,173 @@
+// Combine two right-shift CRCs (zlib's gf2_matrix_* trick) without using BigInt
+// inside the hot loops. Each GF(2) operator matrix is stored as a Uint32Array
+// of `2 * dim` words, where row n is packed as [lo32, hi32]. For 32-bit CRCs
+// the high halves stay zero and the per-row loop exits early; for the 64-bit
+// CRC (crc64nvme) the pair-of-u32s representation lets every XOR/shift stay on
+// 32-bit ints.
+//
+// References:
+//   zlib crc32_combine (canonical C implementation):
+//     https://github.com/madler/zlib/blob/master/crc32.c
+//   Mark Adler, "How does CRC32 work?" — derivation of the matrix trick:
+//     https://stackoverflow.com/a/23126768
+//   AWS S3 multipart upload full-object checksums:
+//     https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
+
+function gf2MatrixTimes(mat, vecLo, vecHi) {
+    let sumLo = 0;
+    let sumHi = 0;
+    let lo = vecLo;
+    let hi = vecHi;
+    let i = 0;
+    while ((lo | hi) !== 0) {
+        if (lo & 1) {
+            sumLo ^= mat[2 * i];
+            sumHi ^= mat[2 * i + 1];
+        }
+        lo = (lo >>> 1) | ((hi & 1) << 31);
+        hi = hi >>> 1;
+        i += 1;
+    }
+    return [sumLo >>> 0, sumHi >>> 0];
+}
+
+function gf2MatrixSquare(square, mat, dim) {
+    for (let n = 0; n < dim; n += 1) {
+        const r = gf2MatrixTimes(mat, mat[2 * n], mat[2 * n + 1]);
+        // In-place mutation of the caller's scratch buffer is intentional —
+        // the callers (combineCrcPair, ensureChainLen) own `square` and re-use
+        // it across iterations to avoid re-allocating per squaring step.
+        /* eslint-disable no-param-reassign */
+        square[2 * n] = r[0];
+        square[2 * n + 1] = r[1];
+        /* eslint-enable no-param-reassign */
+    }
+}
+
+// Per (polyReversed, dim), a lazily-grown chain of zero-byte operators.
+// state.byteOps[j] is the GF(2) operator for prepending 2^j zero bytes
+// (i.e. M^(8 * 2^j)). Building this chain is the dominant cost of combineCrcPair
+// and depends only on the polynomial, so we cache it across calls.
+const chainCache = new Map();
+
+function getOrInitChain(polyReversed, dim) {
+    let state = chainCache.get(polyReversed);
+    if (state !== undefined) {
+        return state;
+    }
+
+    // M^1: one-zero-bit operator. Column 0 is the polynomial; column k>0 is
+    // 1 << (k - 1) — what right-shifting a state with bit k set produces.
+    const m1 = new Uint32Array(2 * dim);
+    m1[0] = Number(polyReversed & 0xffffffffn);
+    m1[1] = Number((polyReversed >> 32n) & 0xffffffffn);
+    for (let k = 1; k < dim; k += 1) {
+        const bit = k - 1;
+        if (bit < 32) {
+            m1[2 * k] = (1 << bit) >>> 0;
+        } else {
+            m1[2 * k + 1] = (1 << (bit - 32)) >>> 0;
+        }
+    }
+
+    const m2 = new Uint32Array(2 * dim);
+    gf2MatrixSquare(m2, m1, dim);
+    const m4 = new Uint32Array(2 * dim);
+    gf2MatrixSquare(m4, m2, dim);
+    const m8 = new Uint32Array(2 * dim); // operator for 1 zero byte
+    gf2MatrixSquare(m8, m4, dim);
+
+    state = { dim, byteOps: [m8] };
+    chainCache.set(polyReversed, state);
+    return state;
+}
+
+function ensureChainLen(state, j) {
+    while (state.byteOps.length <= j) {
+        const prev = state.byteOps[state.byteOps.length - 1];
+        const next = new Uint32Array(prev.length);
+        gf2MatrixSquare(next, prev, state.dim);
+        state.byteOps.push(next);
+    }
+}
+
+/**
+ * Combine two CRCs of adjacent byte chunks.
+ *
+ *   combineCrcPair(crc(a), crc(b), len(b), polyReversed, dim) === crc(a ‖ b)
+ *
+ * Works for any right-shift CRC of width `dim` (32 or 64) given its
+ * bit-reversed polynomial. The squaring chain for `polyReversed` is cached
+ * across calls, so the per-call cost is just popcount(len2) cheap operator
+ * applications plus the BigInt boundary conversions.
+ *
+ * @param {bigint} crc1 - CRC of the first chunk
+ * @param {bigint} crc2 - CRC of the second chunk
+ * @param {bigint} len2 - byte length of the second chunk
+ * @param {bigint} polyReversed - bit-reversed polynomial
+ * @param {number} dim - CRC width in bits (32 or 64)
+ * @returns {bigint} CRC of the concatenated chunk, masked to `dim` bits
+ */
+function combineCrcPair(crc1, crc2, len2, polyReversed, dim) {
+    const mask = (1n << BigInt(dim)) - 1n;
+    if (len2 === 0n) {
+        return crc1 & mask;
+    }
+
+    const state = getOrInitChain(polyReversed, dim);
+
+    let cLo = Number(crc1 & 0xffffffffn);
+    let cHi = Number((crc1 >> 32n) & 0xffffffffn);
+
+    // Walk the bits of len2 (each bit represents a power-of-two number of
+    // zero bytes to prepend); apply the cached operator for every set bit.
+    let n = len2;
+    let j = 0;
+    while (n !== 0n) {
+        if ((n & 1n) === 1n) {
+            ensureChainLen(state, j);
+            const r = gf2MatrixTimes(state.byteOps[j], cLo, cHi);
+            cLo = r[0];
+            cHi = r[1];
+        }
+        n >>= 1n;
+        j += 1;
+    }
+
+    const c2Lo = Number(crc2 & 0xffffffffn);
+    const c2Hi = Number((crc2 >> 32n) & 0xffffffffn);
+    cLo = (cLo ^ c2Lo) >>> 0;
+    cHi = (cHi ^ c2Hi) >>> 0;
+
+    return ((BigInt(cHi) << 32n) | BigInt(cLo)) & mask;
+}
+
+function base64ToBigInt(b64) {
+    const buf = Buffer.from(b64, 'base64');
+    return BigInt(`0x${buf.toString('hex')}`);
+}
+
+function bigIntToBase64(value, dim) {
+    const paddedHex = value.toString(16).padStart(dim / 4, '0');
+    return Buffer.from(paddedHex, 'hex').toString('base64');
+}
+
+/**
+ * Combine N per-part CRCs into the full-object CRC, base64-encoded.
+ *
+ * @param {Array<{value: string, length: number}>} parts - per-part data in
+ *   part order; `value` is the base64-encoded per-part CRC, `length` is the
+ *   byte length of that part
+ * @param {bigint} polyReversed - bit-reversed polynomial
+ * @param {number} dim - CRC width in bits (32 or 64)
+ * @returns {string} base64-encoded combined CRC
+ */
+function combinePartCrcs(parts, polyReversed, dim) {
+    let combined = base64ToBigInt(parts[0].value);
+    for (let i = 1; i < parts.length; i += 1) {
+        combined = combineCrcPair(combined, base64ToBigInt(parts[i].value), BigInt(parts[i].length), polyReversed, dim);
+    }
+    return bigIntToBase64(combined, dim);
+}
+
+module.exports = { combinePartCrcs, combineCrcPair };
@@ -4,6 +4,7 @@ const { Crc32c } = require('@aws-crypto/crc32c');
 const { CrtCrc64Nvme } = require('@aws-sdk/crc64-nvme-crt');
 const { errors: ArsenalErrors, errorInstances } = require('arsenal');
 const { config } = require('../../../Config');
+const { combinePartCrcs } = require('./crcCombine');
 
 const defaultChecksumData = Object.freeze({ algorithm: 'crc64nvme', isTrailer: false, expected: undefined });
 
@@ -489,6 +490,85 @@ function getChecksumDataFromMPUHeaders(headers) {
     return { algorithm: algo, type: defaultChecksumType[algo], isDefault: false };
 }
 
+// =============================================================================
+// MPU final-object checksum computation
+// =============================================================================
+//
+// CompleteMultipartUpload composes a final-object checksum from the per-part
+// checksums recorded at UploadPart time. AWS defines two modes:
+//
+//   COMPOSITE   : finalChecksum = base64(algo(decode(c1) || ... || decode(cN)))
+//                 + "-N" suffix, where N is the number of parts.
+//                 Supported on CRC32, CRC32C, SHA1, SHA256.
+//
+//   FULL_OBJECT : finalChecksum is the CRC of the entire object's bytes,
+//                 reconstructed by combining the per-part CRCs via CRC
+//                 linearization. CRC-only: CRC32, CRC32C,
+//                 CRC64NVME.
+
+// Bit-reversed polynomials used by the right-shift CRC implementations that
+// the @aws-crypto/* and @aws-sdk/crc64-nvme-crt packages produce.
+const FULL_OBJECT_POLYS = Object.freeze({
+    crc32: { polyReversed: 0xedb88320n, dim: 32 },
+    crc32c: { polyReversed: 0x82f63b78n, dim: 32 },
+    crc64nvme: { polyReversed: 0x9a6c9329ac4bc9b5n, dim: 64 },
+});
+
+// Algorithms whose digest is synchronous, which is the full set AWS allows
+// for COMPOSITE MPUs. crc64nvme is excluded because (a) AWS does not allow
+// COMPOSITE for CRC64NVME and (b) its CRT-backed digest is async.
+const COMPOSITE_ALGOS = new Set(['crc32', 'crc32c', 'sha1', 'sha256']);
+
+/**
+ * Compute the COMPOSITE final-object checksum for a CompleteMultipartUpload.
+ *
+ *   final = base64(algo(decode(c1) || decode(c2) || ... || decode(cN))) + "-N"
+ *
+ * Supported algorithms: crc32, crc32c, sha1, sha256. (crc64nvme is excluded —
+ * AWS does not allow COMPOSITE for CRC64NVME.)
+ *
+ * @param {string} algorithm - lowercase algorithm name
+ * @param {string[]} partChecksumsBase64 - per-part checksums in part order,
+ *   each base64-encoded (the format stored on MPU part metadata)
+ * @returns {{ checksum: string, error: null }
+ *   | { checksum: null, error: { code: string, details: object } }}
+ */
+function computeCompositeMPUChecksum(algorithm, partChecksumsBase64) {
+    if (!COMPOSITE_ALGOS.has(algorithm)) {
+        return { checksum: null, error: { code: ChecksumError.MPUAlgoNotSupported, details: { algorithm } } };
+    }
+
+    const concat = Buffer.concat(partChecksumsBase64.map(c => Buffer.from(c, 'base64')));
+    const digest = algorithms[algorithm].digest(concat);
+    return {
+        checksum: `${digest}-${partChecksumsBase64.length}`,
+        error: null,
+    };
+}
+
+/**
+ * Compute the FULL_OBJECT final-object checksum for a CompleteMultipartUpload.
+ *
+ * Returns the CRC of the assembled object's bytes, derived purely from the
+ * per-part CRCs and part lengths via CRC linearization.
+ *
+ * Supported algorithms: crc32, crc32c, crc64nvme.
+ *
+ * @param {string} algorithm - lowercase algorithm name
+ * @param {Array<{value: string, length: number}>} parts - per-part data in
+ *   part order; `value` is the base64-encoded per-part CRC, `length` is the
+ *   byte length of that part
+ * @returns {{ checksum: string, error: null }
+ *   | { checksum: null, error: { code: string, details: object } }}
+ */
+function computeFullObjectMPUChecksum(algorithm, parts) {
+    const params = FULL_OBJECT_POLYS[algorithm];
+    if (!params) {
+        return { checksum: null, error: { code: ChecksumError.MPUAlgoNotSupported, details: { algorithm } } };
+    }
+    return { checksum: combinePartCrcs(parts, params.polyReversed, params.dim), error: null };
+}
+
 module.exports = {
     ChecksumError,
     defaultChecksumData,
@@ -499,4 +579,6 @@ module.exports = {
     algorithms,
     checksumedMethods,
     getChecksumDataFromMPUHeaders,
+    computeCompositeMPUChecksum,
+    computeFullObjectMPUChecksum,
 };