From aa383638bac40d0380d4c9b3ee40f931c3192e1e Mon Sep 17 00:00:00 2001 From: Leif Henriksen Date: Fri, 6 Mar 2026 18:51:31 +0100 Subject: [PATCH] tmp2 --- .../apiUtils/integrity/validateChecksums.js | 123 ++++++++++++++++-- .../apiUtils/object/createAndStoreObject.js | 6 + lib/api/apiUtils/object/prepareStream.js | 84 ++++++++++++ lib/api/apiUtils/object/storeObject.js | 29 ++++- .../object/validateChecksumHeaders.js | 3 +- lib/auth/streamingV4/ChecksumTransform.js | 75 +++++++++++ lib/auth/streamingV4/V4Transform.js | 2 + .../streamingV4/trailingChecksumTransform.js | 44 ++++++- lib/routes/veeam/utils.js | 1 + lib/services.js | 10 ++ 10 files changed, 357 insertions(+), 20 deletions(-) create mode 100644 lib/auth/streamingV4/ChecksumTransform.js diff --git a/lib/api/apiUtils/integrity/validateChecksums.js b/lib/api/apiUtils/integrity/validateChecksums.js index 9643a71258..c2147b1e95 100644 --- a/lib/api/apiUtils/integrity/validateChecksums.js +++ b/lib/api/apiUtils/integrity/validateChecksums.js @@ -56,35 +56,51 @@ const algorithms = Object.freeze({ const result = await crc.digest(); return Buffer.from(result).toString('base64'); }, + digestFromHash: async hash => { + const result = await hash.digest(); + return Buffer.from(result).toString('base64'); + }, isValidDigest: expected => typeof expected === 'string' && expected.length === 12 && base64Regex.test(expected), + createHash: () => new CrtCrc64Nvme() }, crc32: { digest: data => { const input = Buffer.isBuffer(data) ? data : Buffer.from(data); return uint32ToBase64(new Crc32().update(input).digest() >>> 0); // >>> 0 coerce number to uint32 }, + digestFromHash: hash => { + const result = hash.digest(); + return uint32ToBase64(result >>> 0); + }, isValidDigest: expected => typeof expected === 'string' && expected.length === 8 && base64Regex.test(expected), + createHash: () => new Crc32() }, crc32c: { digest: data => { const input = Buffer.isBuffer(data) ? data : Buffer.from(data); return uint32ToBase64(new Crc32c().update(input).digest() >>> 0); // >>> 0 coerce number to uint32 }, + digestFromHash: hash => uint32ToBase64(hash.digest() >>> 0), isValidDigest: expected => typeof expected === 'string' && expected.length === 8 && base64Regex.test(expected), + createHash: () => new Crc32c() }, sha1: { digest: data => { const input = Buffer.isBuffer(data) ? data : Buffer.from(data); return crypto.createHash('sha1').update(input).digest('base64'); }, + digestFromHash: hash => hash.digest('base64'), isValidDigest: expected => typeof expected === 'string' && expected.length === 28 && base64Regex.test(expected), + createHash: () => crypto.createHash('sha1') }, sha256: { digest: data => { const input = Buffer.isBuffer(data) ? data : Buffer.from(data); return crypto.createHash('sha256').update(input).digest('base64'); }, + digestFromHash: hash => hash.digest('base64'), isValidDigest: expected => typeof expected === 'string' && expected.length === 44 && base64Regex.test(expected), + createHash: () => crypto.createHash('sha256') } }); @@ -141,6 +157,86 @@ async function validateXAmzChecksums(headers, body) { return null; } +function getChecksumDataFromHeaders(headers) { + const checkSdk = algo => { + if (!('x-amz-sdk-checksum-algorithm' in headers)) { + return null; + } + + const sdkAlgo = headers['x-amz-sdk-checksum-algorithm']; + if (typeof sdkAlgo !== 'string') { + return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: sdkAlgo } }; + } + + const sdkLowerAlgo = sdkAlgo.toLowerCase(); + if (!(sdkLowerAlgo in algorithms)) { + return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: sdkAlgo } }; + } + + // If AWS there is a mismatch, AWS returns the same error as if the algo was invalid. + if (sdkLowerAlgo !== algo) { + return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: sdkAlgo } }; + } + + return null; + }; + + const checksumHeaders = Object.keys(headers).filter(header => header.startsWith('x-amz-checksum-')); + const xAmzChecksumCnt = checksumHeaders.length; + if (xAmzChecksumCnt > 1) { + return { error: ChecksumError.MultipleChecksumTypes, details: { algorithms: checksumHeaders } }; + } + + if (xAmzChecksumCnt === 0 && !('x-amz-trailer' in headers) && 'x-amz-sdk-checksum-algorithm' in headers) { + return { + error: ChecksumError.MissingCorresponding, + details: { expected: headers['x-amz-sdk-checksum-algorithm'] } + }; + } + + if ('x-amz-trailer' in headers) { + const trailer = headers['x-amz-trailer']; + if (!trailer.startsWith('x-amz-checksum-')) { + return { error: 'invalid x-amz-trailer' }; + } + + const trailerAlgo = trailer.slice('x-amz-checksum-'.length); + if (!(trailerAlgo in algorithms)) { + return { error: ChecksumError.AlgoNotSupported, details: { algorithm: trailerAlgo } };; + } + + const err = checkSdk(trailerAlgo); + if (err) { + return err; + } + + return { algorithm: trailerAlgo, isTrailer: true, expected: undefined }; + } + + if (xAmzChecksumCnt === 0) { + // There was no x-amz-checksum- or x-amz-trailer + return { algorithm: 'crc64nvme', isTrailer: false, expected: undefined }; + } + + // No x-amz-sdk-checksum-algorithm we expect one x-amz-checksum-[crc64nvme, crc32, crc32C, sha1, sha256]. + const algo = checksumHeaders[0].slice('x-amz-checksum-'.length); + if (!(algo in algorithms)) { + return { error: ChecksumError.AlgoNotSupported, details: { algorithm: algo } }; + } + + const expected = headers[`x-amz-checksum-${algo}`]; + if (!algorithms[algo].isValidDigest(expected)) { + return { error: ChecksumError.MalformedChecksum, details: { algorithm: algo, expected } }; + } + + const err = checkSdk(algo); + if (err) { + return err; + } + + return { algorithm: algo, isTrailer: false, expected }; +} + /** * validateChecksumsNoChunking - Validate the checksums of a request. * @param {object} headers - http headers @@ -183,16 +279,7 @@ async function validateChecksumsNoChunking(headers, body) { return err; } -async function defaultValidationFunc(request, body, log) { - const err = await validateChecksumsNoChunking(request.headers, body); - if (!err) { - return null; - } - - if (err.error !== ChecksumError.MissingChecksum) { - log.debug('failed checksum validation', { method: request.apiMethod }, err); - } - +function arsenalErrorFromChecksumError(err) { switch (err.error) { case ChecksumError.MissingChecksum: return null; @@ -230,6 +317,19 @@ async function defaultValidationFunc(request, body, log) { } } +async function defaultValidationFunc(request, body, log) { + const err = await validateChecksumsNoChunking(request.headers, body); + if (!err) { + return null; + } + + if (err.error !== ChecksumError.MissingChecksum) { + log.debug('failed checksum validation', { method: request.apiMethod }, err); + } + + return arsenalErrorFromChecksumError(err); +} + /** * validateMethodChecksumsNoChunking - Validate the checksums of a request. * @param {object} request - http request @@ -253,5 +353,8 @@ module.exports = { ChecksumError, validateChecksumsNoChunking, validateMethodChecksumNoChunking, + getChecksumDataFromHeaders, + arsenalErrorFromChecksumError, + algorithms, checksumedMethods, }; diff --git a/lib/api/apiUtils/object/createAndStoreObject.js b/lib/api/apiUtils/object/createAndStoreObject.js index 542b9f5296..f5a6d0b8a6 100644 --- a/lib/api/apiUtils/object/createAndStoreObject.js +++ b/lib/api/apiUtils/object/createAndStoreObject.js @@ -212,10 +212,14 @@ function createAndStoreObject(bucketName, bucketMD, objectKey, objMD, authInfo, const mdOnlyHeader = request.headers['x-amz-meta-mdonly']; const mdOnlySize = request.headers['x-amz-meta-size']; + // console.log('============== createAndStoreObject'); + return async.waterfall([ function storeData(next) { if (size === 0) { + // console.log('============== size 0'); if (!dontSkipBackend[locationType]) { + // console.log('============== skip'); metadataStoreParams.contentMD5 = constants.emptyFileMd5; return next(null, null, null); } @@ -247,6 +251,8 @@ function createAndStoreObject(bucketName, bucketMD, objectKey, objMD, authInfo, } } + // console.log('============== before dataStore'); + return dataStore(objectKeyContext, cipherBundle, request, size, streamingV4Params, backendInfo, log, next); }, diff --git a/lib/api/apiUtils/object/prepareStream.js b/lib/api/apiUtils/object/prepareStream.js index be3f6c69ca..afec077ea4 100644 --- a/lib/api/apiUtils/object/prepareStream.js +++ b/lib/api/apiUtils/object/prepareStream.js @@ -1,5 +1,10 @@ const V4Transform = require('../../../auth/streamingV4/V4Transform'); const TrailingChecksumTransform = require('../../../auth/streamingV4/trailingChecksumTransform'); +const ChecksumTransform = require('../../../auth/streamingV4/ChecksumTransform'); +const { + getChecksumDataFromHeaders, + arsenalErrorFromChecksumError, +} = require('../../apiUtils/integrity/validateChecksums'); /** * Prepares the stream if the chunks are sent in a v4 Auth request @@ -44,7 +49,86 @@ function stripTrailingChecksumStream(stream, log, errCb) { return trailingChecksumTransform; } +function prepareStream2(request, streamingV4Params, log, errCb) { + const xAmzContentSHA256 = request.headers['x-amz-content-sha256']; + + const checksumAlgo = getChecksumDataFromHeaders(request.headers); + if (checksumAlgo.error) { + log.debug('invalid checksum headers', checksumAlgo); + return errCb(arsenalErrorFromChecksumError(checksumAlgo)); // FIXME sometimes we use CB sometimes we use null + } + + let stream = request; + switch (xAmzContentSHA256) { + case 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD': { + if (typeof streamingV4Params !== 'object') { + // this might happen if the user provided a valid V2 + // Authentication header, while the chunked upload method + // requires V4: in such case we don't get any V4 params + // and we should return an error to the client. + log.error('missing v4 streaming params for chunked upload', { + method: 'prepareStream2', + streamingV4ParamsType: typeof streamingV4Params, + streamingV4Params, + }); + return null; // FIXME: use CB + } + const v4Transform = new V4Transform(streamingV4Params, log, errCb); + request.pipe(v4Transform); + v4Transform.headers = request.headers; + stream = v4Transform; + + const checksumedStream = new ChecksumTransform( + checksumAlgo.algorithm, + checksumAlgo.expected, + checksumAlgo.isTrailer, + ); + stream.pipe(checksumedStream); + return checksumedStream; + } + case 'STREAMING-UNSIGNED-PAYLOAD-TRAILER': { + // console.log(checksumAlgo); + const trailingChecksumTransform = new TrailingChecksumTransform(log); + trailingChecksumTransform.on('error', errCb); + request.pipe(trailingChecksumTransform); + trailingChecksumTransform.headers = request.headers; + stream = trailingChecksumTransform; + + const checksumedStream = new ChecksumTransform( + checksumAlgo.algorithm, + checksumAlgo.expected, + checksumAlgo.isTrailer, + ); + stream.on('trailer', (name, value) => { + checksumedStream.setExpectedChecksum(name, value); + }); + stream.pipe(checksumedStream); + return checksumedStream; + } + case 'UNSIGNED-PAYLOAD': { + const checksumedStream = new ChecksumTransform( + checksumAlgo.algorithm, + checksumAlgo.expected, + checksumAlgo.isTrailer, + ); + stream.pipe(checksumedStream); + return checksumedStream; + } + default: { + // console.log(checksumAlgo); + const checksumedStream = new ChecksumTransform( + checksumAlgo.algorithm, + checksumAlgo.expected, + checksumAlgo.isTrailer, + ); + stream.pipe(checksumedStream); + return checksumedStream; + } + } +} + module.exports = { prepareStream, + prepareStream2, stripTrailingChecksumStream, }; diff --git a/lib/api/apiUtils/object/storeObject.js b/lib/api/apiUtils/object/storeObject.js index 8beea03ecb..7efd2644bf 100644 --- a/lib/api/apiUtils/object/storeObject.js +++ b/lib/api/apiUtils/object/storeObject.js @@ -1,7 +1,9 @@ const { errors, jsutil } = require('arsenal'); const { data } = require('../../../data/wrapper'); -const { prepareStream, stripTrailingChecksumStream } = require('./prepareStream'); +// const { prepareStream, prepareStream2, stripTrailingChecksumStream } = require('./prepareStream'); +const { prepareStream2 } = require('./prepareStream'); +// const ChecksumTransform = require('../../../auth/streamingV4/ChecksumTransform'); /** * Check that `hashedStream.completedHash` matches header `stream.contentMD5` @@ -58,13 +60,19 @@ function checkHashMatchMD5(stream, hashedStream, dataRetrievalInfo, log, cb) { function dataStore(objectContext, cipherBundle, stream, size, streamingV4Params, backendInfo, log, cb) { const cbOnce = jsutil.once(cb); - const dataStreamTmp = prepareStream(stream, streamingV4Params, log, cbOnce); - if (!dataStreamTmp) { - return process.nextTick(() => cb(errors.InvalidArgument)); + // const dataStreamTmp = prepareStream(stream, streamingV4Params, log, cbOnce); + // if (!dataStreamTmp) { + // return process.nextTick(() => cb(errors.InvalidArgument)); + // } + // const dataStream = stripTrailingChecksumStream(dataStreamTmp, log, cbOnce); + // console.log('...................................'); + + const checksumedStream = prepareStream2(stream, streamingV4Params, log, cbOnce); + if (!checksumedStream) { + return process.nextTick(() => cbOnce(errors.InvalidArgument)); } - const dataStream = stripTrailingChecksumStream(dataStreamTmp, log, cbOnce); return data.put( - cipherBundle, dataStream, size, objectContext, backendInfo, log, + cipherBundle, checksumedStream, size, objectContext, backendInfo, log, (err, dataRetrievalInfo, hashedStream) => { if (err) { log.error('error in datastore', { @@ -81,6 +89,15 @@ function dataStore(objectContext, cipherBundle, stream, size, log.trace('dataStore: backend stored key', { dataRetrievalInfo, }); + + // console.log('================', + // checksumedStream.algoName, checksumedStream.digest, checksumedStream.expectedDigest); + const valid = checksumedStream.validateChecksum(); + if (valid !== null) { + // console.log(valid); + return cbOnce(errors.BadDigest); + } + return checkHashMatchMD5(stream, hashedStream, dataRetrievalInfo, log, cbOnce); }); diff --git a/lib/api/apiUtils/object/validateChecksumHeaders.js b/lib/api/apiUtils/object/validateChecksumHeaders.js index d2a50395a3..0d5fbd050c 100644 --- a/lib/api/apiUtils/object/validateChecksumHeaders.js +++ b/lib/api/apiUtils/object/validateChecksumHeaders.js @@ -2,10 +2,11 @@ const { errorInstances } = require('arsenal'); const { unsupportedSignatureChecksums, supportedSignatureChecksums } = require('../../../../constants'); +// FIXME: merge this function validateChecksumHeaders(headers) { // If the x-amz-trailer header is present the request is using one of the // trailing checksum algorithms, which are not supported. - if (headers['x-amz-trailer'] !== undefined && + if (headers['x-amz-trailer'] !== undefined && // Why do we need this check if we have unsupportedSignatureChecksums? headers['x-amz-content-sha256'] !== 'STREAMING-UNSIGNED-PAYLOAD-TRAILER') { return errorInstances.BadRequest.customizeDescription('signed trailing checksum is not supported'); } diff --git a/lib/auth/streamingV4/ChecksumTransform.js b/lib/auth/streamingV4/ChecksumTransform.js new file mode 100644 index 0000000000..0f20c8f424 --- /dev/null +++ b/lib/auth/streamingV4/ChecksumTransform.js @@ -0,0 +1,75 @@ +const { algorithms, ChecksumError } = require('../../api/apiUtils/integrity/validateChecksums'); +const { Transform } = require('stream'); + +class ChecksumTransform extends Transform { + constructor(algoName, expectedDigest, isTrailer) { + super({}); + this.algoName = algoName; + this.algo = algorithms[algoName]; + this.hash = this.algo.createHash(); + this.digest = undefined; + this.expectedDigest = expectedDigest; + this.isTrailer = isTrailer; + this.trailerChecksumName = undefined; + this.trailerChecksumValue = undefined; + } + + setExpectedChecksum(name, value) { + this.trailerChecksumName = name; + this.trailerChecksumValue = value; + } + + validateChecksum() { + if (this.isTrailer) { + // FIXME: Handle trailer is missing + + const expected = this.trailerChecksumValue; + if (!this.algo.isValidDigest(expected)) { + return { error: ChecksumError.MalformedChecksum, details: { algorithm: this.algoName, expected } }; + } + + // Trailer mismatch + if (this.trailerChecksumName !== `x-amz-checksum-${this.algoName}`) { + return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: this.algoName } }; + } + + if (this.digest !== this.trailerChecksumValue) { + return { + error: ChecksumError.XAmzMismatch, + details: { algorithm: this.algoName, calculated: this.digest, expected }, + }; + } + + return null; + } + + if (this.expectedDigest) { + if (this.digest !== this.expectedDigest) { + return { + error: ChecksumError.XAmzMismatch, + details: { algorithm: this.algoName, calculated: this.digest, expected: this.expectedDigest }, + }; + } + } + + return null; + } + + _flush(callback) { + Promise.resolve(this.algo.digestFromHash(this.hash)) + .then(digest => { + this.digest = digest; + return callback(); + }) + .catch(callback); + } + + _transform(chunk, encoding, callback) { + const input = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); + // console.log("chunk: '%s'", input); + this.hash.update(input, encoding); + callback(null, input, encoding); + } +} + +module.exports = ChecksumTransform; diff --git a/lib/auth/streamingV4/V4Transform.js b/lib/auth/streamingV4/V4Transform.js index e18d20e14f..6e4495b4be 100644 --- a/lib/auth/streamingV4/V4Transform.js +++ b/lib/auth/streamingV4/V4Transform.js @@ -6,6 +6,8 @@ const { errors } = require('arsenal'); const vault = require('../vault'); const constructChunkStringToSign = require('./constructChunkStringToSign'); +// Do we use this one or vaults? + /** * This class is designed to handle the chunks sent in a streaming * v4 Auth request diff --git a/lib/auth/streamingV4/trailingChecksumTransform.js b/lib/auth/streamingV4/trailingChecksumTransform.js index 48870c80e7..f8dadd7a30 100644 --- a/lib/auth/streamingV4/trailingChecksumTransform.js +++ b/lib/auth/streamingV4/trailingChecksumTransform.js @@ -20,6 +20,10 @@ class TrailingChecksumTransform extends Transform { this.bytesToDiscard = 0; // when trailing \r\n are present, we discard them but they can be in different chunks this.bytesToRead = 0; // when a chunk is advertised, the size is put here and we forward all bytes this.streamClosed = false; + this.readingTrailer = false; + this.trailerBuffer = Buffer.alloc(0); + this.trailerName = null; + this.trailerValue = null; } /** @@ -66,6 +70,40 @@ class TrailingChecksumTransform extends Transform { continue; } + // after the 0-size chunk, read the trailer line (e.g. "x-amz-checksum-crc32:YABb/g==") + if (this.readingTrailer) { + const combined = Buffer.concat([this.trailerBuffer, chunk]); + const lineBreakIndex = combined.indexOf('\r\n'); + if (lineBreakIndex === -1) { + if (combined.byteLength > 1024) { + this.log.error('trailer line too long'); + return callback(errors.InvalidArgument); + } + this.trailerBuffer = combined; + return callback(); + } + const fullTrailer = combined.subarray(0, lineBreakIndex); + this.trailerBuffer = Buffer.alloc(0); + let trailerLine = fullTrailer.toString(); + // strip optional trailing \n before \r\n + if (trailerLine.endsWith('\n')) { + trailerLine = trailerLine.slice(0, -1); + } + const colonIndex = trailerLine.indexOf(':'); + if (colonIndex > 0) { + this.trailerName = trailerLine.slice(0, colonIndex); + this.trailerValue = trailerLine.slice(colonIndex + 1); + this.emit('trailer', this.trailerName, this.trailerValue); + } else { + this.log.error('invalid trailer line format', { trailerLine }); + // TODO: test AWS + return callback(errors.InvalidArgument); + } + this.readingTrailer = false; + this.streamClosed = true; + break; + } + // we are now looking for the chunk size field // no need to look further than 10 bytes since the field cannot be bigger: the max // chunk size is 5GB (see constants.maximumAllowedPartSize) @@ -100,9 +138,9 @@ class TrailingChecksumTransform extends Transform { } this.chunkSizeBuffer = Buffer.alloc(0); if (dataSize === 0) { - // TODO: check if the checksum is correct (S3C-9732) - // last chunk, no more data to read, the stream is closed - this.streamClosed = true; + // last chunk, no more data to read; enter trailer-reading mode + // bytesToDiscard = 2 below will consume the \r\n after "0" + this.readingTrailer = true; } if (dataSize > maximumAllowedPartSize) { this.log.error('chunk size too big', { dataSize }); diff --git a/lib/routes/veeam/utils.js b/lib/routes/veeam/utils.js index 5ab082c6d5..8ace03ba9c 100644 --- a/lib/routes/veeam/utils.js +++ b/lib/routes/veeam/utils.js @@ -35,6 +35,7 @@ function receiveData(request, log, callback) { } const value = Buffer.alloc(parsedContentLength); const cbOnce = jsutil.once(callback); + // TODO const dataStream = prepareStream(request, request.streamingV4Params, log, cbOnce); let cursor = 0; let exceeded = false; diff --git a/lib/services.js b/lib/services.js index cbf8740170..8bca472993 100644 --- a/lib/services.js +++ b/lib/services.js @@ -682,6 +682,16 @@ const services = { return metadata.getObjectMD(mpuBucketName, mpuOverviewKey, {}, log, (err, res) => { if (err) { + if (err.is.NoSuchKey) { + // The overview key no longer exists, meaning completeMultipartUpload + // already ran to completion and cleaned up the MPU bucket. + // This is a race condition: objectPutPart checked for old + // part locations after completeMultipartUpload deleted the overview. + // Returning true (complete in progress) prevents objectPutPart + // from deleting part data that may have already been committed + // as the final object. + return cb(null, true); + } log.error('error getting the overview object from mpu bucket', { error: err, method: 'services.isCompleteMPUInProgress',