-
Notifications
You must be signed in to change notification settings - Fork 254
CLDSRV-863: add checksums to PutObject and UploadPart #6094
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: development/9.4
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,35 +56,51 @@ const algorithms = Object.freeze({ | |
| const result = await crc.digest(); | ||
| return Buffer.from(result).toString('base64'); | ||
| }, | ||
| digestFromHash: async hash => { | ||
| const result = await hash.digest(); | ||
| return Buffer.from(result).toString('base64'); | ||
| }, | ||
| isValidDigest: expected => typeof expected === 'string' && expected.length === 12 && base64Regex.test(expected), | ||
| createHash: () => new CrtCrc64Nvme() | ||
| }, | ||
| crc32: { | ||
| digest: data => { | ||
| const input = Buffer.isBuffer(data) ? data : Buffer.from(data); | ||
| return uint32ToBase64(new Crc32().update(input).digest() >>> 0); // >>> 0 coerce number to uint32 | ||
| }, | ||
| digestFromHash: hash => { | ||
| const result = hash.digest(); | ||
| return uint32ToBase64(result >>> 0); | ||
| }, | ||
| isValidDigest: expected => typeof expected === 'string' && expected.length === 8 && base64Regex.test(expected), | ||
| createHash: () => new Crc32() | ||
| }, | ||
| crc32c: { | ||
| digest: data => { | ||
| const input = Buffer.isBuffer(data) ? data : Buffer.from(data); | ||
| return uint32ToBase64(new Crc32c().update(input).digest() >>> 0); // >>> 0 coerce number to uint32 | ||
| }, | ||
| digestFromHash: hash => uint32ToBase64(hash.digest() >>> 0), | ||
| isValidDigest: expected => typeof expected === 'string' && expected.length === 8 && base64Regex.test(expected), | ||
| createHash: () => new Crc32c() | ||
| }, | ||
| sha1: { | ||
| digest: data => { | ||
| const input = Buffer.isBuffer(data) ? data : Buffer.from(data); | ||
| return crypto.createHash('sha1').update(input).digest('base64'); | ||
| }, | ||
| digestFromHash: hash => hash.digest('base64'), | ||
| isValidDigest: expected => typeof expected === 'string' && expected.length === 28 && base64Regex.test(expected), | ||
| createHash: () => crypto.createHash('sha1') | ||
| }, | ||
| sha256: { | ||
| digest: data => { | ||
| const input = Buffer.isBuffer(data) ? data : Buffer.from(data); | ||
| return crypto.createHash('sha256').update(input).digest('base64'); | ||
| }, | ||
| digestFromHash: hash => hash.digest('base64'), | ||
| isValidDigest: expected => typeof expected === 'string' && expected.length === 44 && base64Regex.test(expected), | ||
| createHash: () => crypto.createHash('sha256') | ||
| } | ||
| }); | ||
|
|
||
|
|
@@ -141,6 +157,86 @@ async function validateXAmzChecksums(headers, body) { | |
| return null; | ||
| } | ||
|
|
||
| function getChecksumDataFromHeaders(headers) { | ||
| const checkSdk = algo => { | ||
| if (!('x-amz-sdk-checksum-algorithm' in headers)) { | ||
| return null; | ||
| } | ||
|
|
||
| const sdkAlgo = headers['x-amz-sdk-checksum-algorithm']; | ||
| if (typeof sdkAlgo !== 'string') { | ||
| return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: sdkAlgo } }; | ||
| } | ||
|
|
||
| const sdkLowerAlgo = sdkAlgo.toLowerCase(); | ||
| if (!(sdkLowerAlgo in algorithms)) { | ||
| return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: sdkAlgo } }; | ||
| } | ||
|
|
||
| // If AWS there is a mismatch, AWS returns the same error as if the algo was invalid. | ||
| if (sdkLowerAlgo !== algo) { | ||
| return { error: ChecksumError.AlgoNotSupportedSDK, details: { algorithm: sdkAlgo } }; | ||
| } | ||
|
|
||
| return null; | ||
| }; | ||
|
|
||
| const checksumHeaders = Object.keys(headers).filter(header => header.startsWith('x-amz-checksum-')); | ||
| const xAmzChecksumCnt = checksumHeaders.length; | ||
| if (xAmzChecksumCnt > 1) { | ||
| return { error: ChecksumError.MultipleChecksumTypes, details: { algorithms: checksumHeaders } }; | ||
| } | ||
|
|
||
| if (xAmzChecksumCnt === 0 && !('x-amz-trailer' in headers) && 'x-amz-sdk-checksum-algorithm' in headers) { | ||
| return { | ||
| error: ChecksumError.MissingCorresponding, | ||
| details: { expected: headers['x-amz-sdk-checksum-algorithm'] } | ||
| }; | ||
| } | ||
|
|
||
| if ('x-amz-trailer' in headers) { | ||
| const trailer = headers['x-amz-trailer']; | ||
| if (!trailer.startsWith('x-amz-checksum-')) { | ||
| return { error: 'invalid x-amz-trailer' }; | ||
| } | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double semicolon. |
||
| const trailerAlgo = trailer.slice('x-amz-checksum-'.length); | ||
| if (!(trailerAlgo in algorithms)) { | ||
| return { error: ChecksumError.AlgoNotSupported, details: { algorithm: trailerAlgo } };; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double semicolons. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double semicolon. |
||
| } | ||
|
|
||
| const err = checkSdk(trailerAlgo); | ||
| if (err) { | ||
| return err; | ||
| } | ||
|
|
||
| return { algorithm: trailerAlgo, isTrailer: true, expected: undefined }; | ||
| } | ||
|
|
||
| if (xAmzChecksumCnt === 0) { | ||
| // There was no x-amz-checksum- or x-amz-trailer | ||
| return { algorithm: 'crc64nvme', isTrailer: false, expected: undefined }; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When no checksum headers are present, this defaults to |
||
| } | ||
|
|
||
| // No x-amz-sdk-checksum-algorithm we expect one x-amz-checksum-[crc64nvme, crc32, crc32C, sha1, sha256]. | ||
| const algo = checksumHeaders[0].slice('x-amz-checksum-'.length); | ||
| if (!(algo in algorithms)) { | ||
| return { error: ChecksumError.AlgoNotSupported, details: { algorithm: algo } }; | ||
| } | ||
|
|
||
| const expected = headers[`x-amz-checksum-${algo}`]; | ||
| if (!algorithms[algo].isValidDigest(expected)) { | ||
| return { error: ChecksumError.MalformedChecksum, details: { algorithm: algo, expected } }; | ||
| } | ||
|
|
||
| const err = checkSdk(algo); | ||
| if (err) { | ||
| return err; | ||
| } | ||
|
|
||
| return { algorithm: algo, isTrailer: false, expected }; | ||
| } | ||
|
|
||
| /** | ||
| * validateChecksumsNoChunking - Validate the checksums of a request. | ||
| * @param {object} headers - http headers | ||
|
|
@@ -183,16 +279,7 @@ async function validateChecksumsNoChunking(headers, body) { | |
| return err; | ||
| } | ||
|
|
||
| async function defaultValidationFunc(request, body, log) { | ||
| const err = await validateChecksumsNoChunking(request.headers, body); | ||
| if (!err) { | ||
| return null; | ||
| } | ||
|
|
||
| if (err.error !== ChecksumError.MissingChecksum) { | ||
| log.debug('failed checksum validation', { method: request.apiMethod }, err); | ||
| } | ||
|
|
||
| function arsenalErrorFromChecksumError(err) { | ||
| switch (err.error) { | ||
| case ChecksumError.MissingChecksum: | ||
| return null; | ||
|
|
@@ -230,6 +317,19 @@ async function defaultValidationFunc(request, body, log) { | |
| } | ||
| } | ||
|
|
||
| async function defaultValidationFunc(request, body, log) { | ||
| const err = await validateChecksumsNoChunking(request.headers, body); | ||
| if (!err) { | ||
| return null; | ||
| } | ||
|
|
||
| if (err.error !== ChecksumError.MissingChecksum) { | ||
| log.debug('failed checksum validation', { method: request.apiMethod }, err); | ||
| } | ||
|
|
||
| return arsenalErrorFromChecksumError(err); | ||
| } | ||
|
|
||
| /** | ||
| * validateMethodChecksumsNoChunking - Validate the checksums of a request. | ||
| * @param {object} request - http request | ||
|
|
@@ -253,5 +353,8 @@ module.exports = { | |
| ChecksumError, | ||
| validateChecksumsNoChunking, | ||
| validateMethodChecksumNoChunking, | ||
| getChecksumDataFromHeaders, | ||
| arsenalErrorFromChecksumError, | ||
| algorithms, | ||
| checksumedMethods, | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -212,10 +212,14 @@ function createAndStoreObject(bucketName, bucketMD, objectKey, objMD, authInfo, | |
| const mdOnlyHeader = request.headers['x-amz-meta-mdonly']; | ||
| const mdOnlySize = request.headers['x-amz-meta-size']; | ||
|
|
||
| // console.log('============== createAndStoreObject'); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out console.log statements left in production code. Remove before merging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out debug console.log statements should be removed before merging. There are multiple instances throughout this file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out |
||
|
|
||
| return async.waterfall([ | ||
| function storeData(next) { | ||
| if (size === 0) { | ||
| // console.log('============== size 0'); | ||
| if (!dontSkipBackend[locationType]) { | ||
| // console.log('============== skip'); | ||
| metadataStoreParams.contentMD5 = constants.emptyFileMd5; | ||
| return next(null, null, null); | ||
| } | ||
|
|
@@ -247,6 +251,8 @@ function createAndStoreObject(bucketName, bucketMD, objectKey, objMD, authInfo, | |
| } | ||
| } | ||
|
|
||
| // console.log('============== before dataStore'); | ||
|
|
||
| return dataStore(objectKeyContext, cipherBundle, request, size, | ||
| streamingV4Params, backendInfo, log, next); | ||
| }, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,10 @@ | ||
| const V4Transform = require('../../../auth/streamingV4/V4Transform'); | ||
| const TrailingChecksumTransform = require('../../../auth/streamingV4/trailingChecksumTransform'); | ||
| const ChecksumTransform = require('../../../auth/streamingV4/ChecksumTransform'); | ||
| const { | ||
| getChecksumDataFromHeaders, | ||
| arsenalErrorFromChecksumError, | ||
| } = require('../../apiUtils/integrity/validateChecksums'); | ||
|
|
||
| /** | ||
| * Prepares the stream if the chunks are sent in a v4 Auth request | ||
|
|
@@ -44,7 +49,86 @@ function stripTrailingChecksumStream(stream, log, errCb) { | |
| return trailingChecksumTransform; | ||
| } | ||
|
|
||
| function prepareStream2(request, streamingV4Params, log, errCb) { | ||
| const xAmzContentSHA256 = request.headers['x-amz-content-sha256']; | ||
|
|
||
| const checksumAlgo = getChecksumDataFromHeaders(request.headers); | ||
| if (checksumAlgo.error) { | ||
| log.debug('invalid checksum headers', checksumAlgo); | ||
| return errCb(arsenalErrorFromChecksumError(checksumAlgo)); // FIXME sometimes we use CB sometimes we use null | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Double-callback bug: |
||
| } | ||
|
|
||
| let stream = request; | ||
| switch (xAmzContentSHA256) { | ||
| case 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD': { | ||
| if (typeof streamingV4Params !== 'object') { | ||
| // this might happen if the user provided a valid V2 | ||
| // Authentication header, while the chunked upload method | ||
| // requires V4: in such case we don't get any V4 params | ||
| // and we should return an error to the client. | ||
| log.error('missing v4 streaming params for chunked upload', { | ||
| method: 'prepareStream2', | ||
| streamingV4ParamsType: typeof streamingV4Params, | ||
| streamingV4Params, | ||
| }); | ||
| return null; // FIXME: use CB | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return null with FIXME comment — the caller (dataStore in storeObject.js) passes the result directly to data.put without a null check. The old prepareStream had a null guard (if (!dataStreamTmp)) but it was commented out. This will crash with a TypeError when piping from null. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This returns null with a FIXME comment. The caller in storeObject.js will pass null to data.put(), causing a crash. Should call errCb(errors.InvalidArgument) instead. |
||
| } | ||
| const v4Transform = new V4Transform(streamingV4Params, log, errCb); | ||
| request.pipe(v4Transform); | ||
| v4Transform.headers = request.headers; | ||
| stream = v4Transform; | ||
|
|
||
| const checksumedStream = new ChecksumTransform( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No |
||
| checksumAlgo.algorithm, | ||
| checksumAlgo.expected, | ||
| checksumAlgo.isTrailer, | ||
| ); | ||
| stream.pipe(checksumedStream); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No error listener on ChecksumTransform. The STREAMING-AWS4-HMAC-SHA256-PAYLOAD, UNSIGNED-PAYLOAD, and default cases pipe into a ChecksumTransform but never attach an error event handler. Unhandled stream errors will crash the process. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No error handler on ChecksumTransform streams. If ChecksumTransform emits an error, it will crash the process. The TrailingChecksumTransform path correctly adds .on('error', errCb) at line 87, but ChecksumTransform never gets one in any branch. Add checksumedStream.on('error', errCb) in each case. |
||
| return checksumedStream; | ||
| } | ||
| case 'STREAMING-UNSIGNED-PAYLOAD-TRAILER': { | ||
| // console.log(checksumAlgo); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out console.log left in production code. Remove before merging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out console.log should be removed. |
||
| const trailingChecksumTransform = new TrailingChecksumTransform(log); | ||
| trailingChecksumTransform.on('error', errCb); | ||
| request.pipe(trailingChecksumTransform); | ||
| trailingChecksumTransform.headers = request.headers; | ||
| stream = trailingChecksumTransform; | ||
|
|
||
| const checksumedStream = new ChecksumTransform( | ||
| checksumAlgo.algorithm, | ||
| checksumAlgo.expected, | ||
| checksumAlgo.isTrailer, | ||
| ); | ||
| stream.on('trailer', (name, value) => { | ||
| checksumedStream.setExpectedChecksum(name, value); | ||
| }); | ||
| stream.pipe(checksumedStream); | ||
| return checksumedStream; | ||
| } | ||
| case 'UNSIGNED-PAYLOAD': { | ||
| const checksumedStream = new ChecksumTransform( | ||
| checksumAlgo.algorithm, | ||
| checksumAlgo.expected, | ||
| checksumAlgo.isTrailer, | ||
| ); | ||
| stream.pipe(checksumedStream); | ||
| return checksumedStream; | ||
| } | ||
| default: { | ||
| // console.log(checksumAlgo); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out console.log should be removed. |
||
| const checksumedStream = new ChecksumTransform( | ||
| checksumAlgo.algorithm, | ||
| checksumAlgo.expected, | ||
| checksumAlgo.isTrailer, | ||
| ); | ||
| stream.pipe(checksumedStream); | ||
| return checksumedStream; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| module.exports = { | ||
| prepareStream, | ||
| prepareStream2, | ||
| stripTrailingChecksumStream, | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,9 @@ | ||
| const { errors, jsutil } = require('arsenal'); | ||
|
|
||
| const { data } = require('../../../data/wrapper'); | ||
| const { prepareStream, stripTrailingChecksumStream } = require('./prepareStream'); | ||
| // const { prepareStream, prepareStream2, stripTrailingChecksumStream } = require('./prepareStream'); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out imports should be removed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out code and debug |
||
| const { prepareStream2 } = require('./prepareStream'); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out imports and console.log left in production code. The old prepareStream and stripTrailingChecksumStream imports are commented out instead of removed. |
||
| // const ChecksumTransform = require('../../../auth/streamingV4/ChecksumTransform'); | ||
|
|
||
| /** | ||
| * Check that `hashedStream.completedHash` matches header `stream.contentMD5` | ||
|
|
@@ -58,13 +60,19 @@ function checkHashMatchMD5(stream, hashedStream, dataRetrievalInfo, log, cb) { | |
| function dataStore(objectContext, cipherBundle, stream, size, | ||
| streamingV4Params, backendInfo, log, cb) { | ||
| const cbOnce = jsutil.once(cb); | ||
| const dataStreamTmp = prepareStream(stream, streamingV4Params, log, cbOnce); | ||
| if (!dataStreamTmp) { | ||
| return process.nextTick(() => cb(errors.InvalidArgument)); | ||
| // const dataStreamTmp = prepareStream(stream, streamingV4Params, log, cbOnce); | ||
| // if (!dataStreamTmp) { | ||
| // return process.nextTick(() => cb(errors.InvalidArgument)); | ||
| // } | ||
| // const dataStream = stripTrailingChecksumStream(dataStreamTmp, log, cbOnce); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out old code block (lines 63-67) should be removed, not left as dead code. |
||
| // console.log('...................................'); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented-out code and debug console.log should be removed. |
||
|
|
||
| const checksumedStream = prepareStream2(stream, streamingV4Params, log, cbOnce); | ||
| if (!checksumedStream) { | ||
| return process.nextTick(() => cbOnce(errors.InvalidArgument)); | ||
| } | ||
| const dataStream = stripTrailingChecksumStream(dataStreamTmp, log, cbOnce); | ||
| return data.put( | ||
| cipherBundle, dataStream, size, objectContext, backendInfo, log, | ||
| cipherBundle, checksumedStream, size, objectContext, backendInfo, log, | ||
| (err, dataRetrievalInfo, hashedStream) => { | ||
| if (err) { | ||
| log.error('error in datastore', { | ||
|
|
@@ -81,6 +89,15 @@ function dataStore(objectContext, cipherBundle, stream, size, | |
| log.trace('dataStore: backend stored key', { | ||
| dataRetrievalInfo, | ||
| }); | ||
|
|
||
| // console.log('================', | ||
| // checksumedStream.algoName, checksumedStream.digest, checksumedStream.expectedDigest); | ||
| const valid = checksumedStream.validateChecksum(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. validateChecksum() reads this.digest set in _flush(). The data.put callback fires when the backend finishes consuming data, but _flush() runs when the stream ends. If _flush() has not completed, this.digest is undefined and validation silently passes (non-trailer) or produces wrong results (trailer). Consider validating inside _flush() or waiting for the finish event. |
||
| if (valid !== null) { | ||
| // console.log(valid); | ||
| return cbOnce(errors.BadDigest); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When checksum validation fails, the stored data is not cleaned up. checkHashMatchMD5 deletes stored data on MD5 mismatch, but here the data is left orphaned. Should call data.batchDelete on dataRetrievalInfo before returning the error. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When checksum validation fails, stored data is not cleaned up. The existing checkHashMatchMD5 deletes data on MD5 mismatch via data.batchDelete. This path should do the same, otherwise failed-checksum objects leak storage. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When checksum validation fails, the data that was already stored is not deleted. Compare with |
||
| } | ||
|
|
||
| return checkHashMatchMD5(stream, hashedStream, | ||
| dataRetrievalInfo, log, cbOnce); | ||
| }); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,10 +2,11 @@ const { errorInstances } = require('arsenal'); | |
|
|
||
| const { unsupportedSignatureChecksums, supportedSignatureChecksums } = require('../../../../constants'); | ||
|
|
||
| // FIXME: merge this | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FIXME and question comments should be resolved or removed before merging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FIXME and inline question on line 36 are dev notes that should be resolved or removed before merging. |
||
| function validateChecksumHeaders(headers) { | ||
| // If the x-amz-trailer header is present the request is using one of the | ||
| // trailing checksum algorithms, which are not supported. | ||
| if (headers['x-amz-trailer'] !== undefined && | ||
| if (headers['x-amz-trailer'] !== undefined && // Why do we need this check if we have unsupportedSignatureChecksums? | ||
| headers['x-amz-content-sha256'] !== 'STREAMING-UNSIGNED-PAYLOAD-TRAILER') { | ||
| return errorInstances.BadRequest.customizeDescription('signed trailing checksum is not supported'); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This returns a raw error string
'invalid x-amz-trailer'instead of aChecksumErrorenum value.arsenalErrorFromChecksumErrorwon't match any case and will fall through to thedefaultreturningBadDigest, which is incorrect for an invalid trailer header. Use a properChecksumErrorvalue.— Claude Code