From 6c38b5d654c2b2d9755b0284282179782ea6dd6b Mon Sep 17 00:00:00 2001 From: Bryan Talbot Date: Mon, 8 May 2017 19:31:43 -0700 Subject: [PATCH 1/2] extend tests to use item with a range key. This exposes a bug (previously filed as #90) which occurs when items with a range key are read from the DDB event stream, an md5 hash of the key is computed and the item written to S3. The issue is that the DDB event stream handler does not (and should not) do a 'describe_table' to know which key is the HASH and which is the RANGE and therefor simply generates the md5 hash of the item keys in whatever order they happen to appear in the stream event. The s3-backfill util does do a 'describe_table' and does order the keys by declaration order which DDB requires to be HASH first, RANGE second. The different ordering of the item keys will produce a distinct md5 hash value and different S3 path/keys will result in some items appearing twice in S3 effectively corrupting the incremental backups since two valid versions will be present at the same time. --- test/fixtures/events/adjust-many.json | 79 +++++++++++++++---- test/fixtures/events/insert-buffer.json | 5 +- .../fixtures/events/insert-modify-delete.json | 33 ++++++-- test/fixtures/events/insert-modify.json | 23 +++++- test/fixtures/events/insert.json | 5 +- test/fixtures/records.js | 1 + test/fixtures/table.js | 6 +- test/incremental.test.js | 3 +- test/index.test.js | 32 ++++---- 9 files changed, 144 insertions(+), 43 deletions(-) diff --git a/test/fixtures/events/adjust-many.json b/test/fixtures/events/adjust-many.json index 0ee01c0..898cb40 100644 --- a/test/fixtures/events/adjust-many.json +++ b/test/fixtures/events/adjust-many.json @@ -6,7 +6,10 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -19,6 +22,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, @@ -32,8 +38,11 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { - "N": "11" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-1" @@ -43,7 +52,10 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"222", "OldImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -53,6 +65,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, @@ -66,7 +81,10 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -79,6 +97,9 @@ "Keys":{ "id": { "S": "record-3" + }, + "arange": { + "N": "1" } } }, @@ -92,7 +113,10 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -105,6 +129,9 @@ "Keys":{ "id": { "S": "record-2" + }, + "arange": { + "N": "1" } } }, @@ -118,8 +145,11 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { - "N": "22" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-2" @@ -129,7 +159,10 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"222", "OldImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -139,6 +172,9 @@ "Keys":{ "id": { "S": "record-2" + }, + "arange": { + "N": "1" } } }, @@ -155,8 +191,11 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"333", "OldImage":{ - "range": { - "N": "11" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-1" @@ -165,6 +204,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, @@ -178,8 +220,11 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { - "N": "33" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-3" @@ -189,7 +234,10 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"222", "OldImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -199,6 +247,9 @@ "Keys":{ "id": { "S": "record-3" + }, + "arange": { + "N": "1" } } }, diff --git a/test/fixtures/events/insert-buffer.json b/test/fixtures/events/insert-buffer.json index 4573d90..98edc27 100644 --- a/test/fixtures/events/insert-buffer.json +++ b/test/fixtures/events/insert-buffer.json @@ -6,7 +6,7 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "arange": { "N": "1" }, "id": { @@ -44,6 +44,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, diff --git a/test/fixtures/events/insert-modify-delete.json b/test/fixtures/events/insert-modify-delete.json index ca7b387..3bbe9b6 100644 --- a/test/fixtures/events/insert-modify-delete.json +++ b/test/fixtures/events/insert-modify-delete.json @@ -6,7 +6,10 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -19,6 +22,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, @@ -32,8 +38,11 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { - "N": "2" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-1" @@ -43,7 +52,10 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"222", "OldImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -53,6 +65,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, @@ -69,8 +84,11 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"333", "OldImage":{ - "range": { - "N": "2" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-1" @@ -79,6 +97,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, diff --git a/test/fixtures/events/insert-modify.json b/test/fixtures/events/insert-modify.json index 4076c40..4cd3b69 100644 --- a/test/fixtures/events/insert-modify.json +++ b/test/fixtures/events/insert-modify.json @@ -6,7 +6,10 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -19,6 +22,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, @@ -32,8 +38,11 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { - "N": "2" + "data": { + "S": "data-2" + }, + "arange": { + "N": "1" }, "id": { "S": "record-1" @@ -43,7 +52,10 @@ "StreamViewType":"NEW_AND_OLD_IMAGES", "SequenceNumber":"222", "OldImage":{ - "range": { + "data": { + "S": "data-1" + }, + "arange": { "N": "1" }, "id": { @@ -53,6 +65,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, diff --git a/test/fixtures/events/insert.json b/test/fixtures/events/insert.json index 1e21b97..4afff20 100644 --- a/test/fixtures/events/insert.json +++ b/test/fixtures/events/insert.json @@ -6,7 +6,7 @@ "eventSource":"aws:dynamodb", "dynamodb": { "NewImage":{ - "range": { + "arange": { "N": "1" }, "id": { @@ -19,6 +19,9 @@ "Keys":{ "id": { "S": "record-1" + }, + "arange": { + "N": "1" } } }, diff --git a/test/fixtures/records.js b/test/fixtures/records.js index 9888b17..bb98206 100644 --- a/test/fixtures/records.js +++ b/test/fixtures/records.js @@ -5,6 +5,7 @@ module.exports = function(num) { return _.range(0, num).map(function() { return { id: crypto.randomBytes(16).toString('hex'), + arange: Math.random(), data: crypto.randomBytes(256).toString('base64') }; }); diff --git a/test/fixtures/table.js b/test/fixtures/table.js index 2fa6851..b13e0c8 100644 --- a/test/fixtures/table.js +++ b/test/fixtures/table.js @@ -1,9 +1,11 @@ module.exports = { AttributeDefinitions: [ - {AttributeName: 'id', AttributeType: 'S'} + {AttributeName: 'id', AttributeType: 'S'}, + {AttributeName: 'arange', AttributeType: 'N'} ], KeySchema: [ - {AttributeName: 'id', KeyType: 'HASH'} + {AttributeName: 'id', KeyType: 'HASH'}, + {AttributeName: 'arange', KeyType: 'RANGE'} ], ProvisionedThroughput: { ReadCapacityUnits: 1, diff --git a/test/incremental.test.js b/test/incremental.test.js index 894ba09..c20e592 100644 --- a/test/incremental.test.js +++ b/test/incremental.test.js @@ -53,7 +53,8 @@ dynamodb.test('[s3-backfill]', records, function(assert) { records.forEach(function(expected) { var key = crypto.createHash('md5') - .update(Dyno.serialize({ id: expected.id })) + // key names here must be in alphabetical order to match impl which is expected to sort them + .update(Dyno.serialize({ arange: expected.arange, id: expected.id })) .digest('hex'); key = [prefix, dynamodb.tableName, key].join('/'); diff --git a/test/index.test.js b/test/index.test.js index 3399bf8..dd10479 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -44,7 +44,7 @@ replica.test('[replicate] insert', function(assert) { assert.ifError(err, 'success'); dyno.scan(function(err, data) { if (err) throw err; - assert.deepEqual(data, { Count: 1, Items: [{ id: 'record-1', range: 1 }], ScannedCount: 1 }, 'inserted desired record'); + assert.deepEqual(data, { Count: 1, Items: [{ id: 'record-1', arange: 1 }], ScannedCount: 1 }, 'inserted desired record'); assert.end(); }); }); @@ -56,7 +56,7 @@ replica.test('[replicate] insert & modify', function(assert) { assert.ifError(err, 'success'); dyno.scan(function(err, data) { if (err) throw err; - assert.deepEqual(data, { Count: 1, Items: [{ id: 'record-1', range: 2 }], ScannedCount: 1 }, 'inserted & modified desired record'); + assert.deepEqual(data, { Count: 1, Items: [{ id: 'record-1', arange: 1, data: 'data-2' }], ScannedCount: 1 }, 'inserted & modified desired record'); assert.end(); }); }); @@ -82,8 +82,8 @@ replica.test('[replicate] adjust many', function(assert) { if (err) throw err; var expected = [ - { range: 22, id: 'record-2' }, - { range: 33, id: 'record-3' } + { data: 'data-2', arange: 1, id: 'record-2' }, + { data: 'data-2', arange: 1, id: 'record-3' } ]; data = data.Items.map(Dyno.serialize); @@ -108,7 +108,7 @@ replica.test('[lambda] insert with buffers', function(assert) { if (err) throw err; var expected = { - range: 1, + arange: 1, id: 'record-1', val: new Buffer('hello'), map: { prop: new Buffer('hello') }, @@ -118,7 +118,7 @@ replica.test('[lambda] insert with buffers', function(assert) { data = data.Items[0]; - assert.equal(data.range, expected.range, 'expected range'); + assert.equal(data.arange, expected.arange, 'expected range'); assert.equal(data.id, expected.id, 'expected id'); assert.deepEqual(data.val, expected.val, 'expected val'); assert.deepEqual(data.map, expected.map, 'expected map'); @@ -156,7 +156,8 @@ test('[incremental backup] insert', function(assert) { var event = require(path.join(events, 'insert.json')); var table = event.Records[0].eventSourceARN.split('/')[1]; var id = crypto.createHash('md5') - .update(JSON.stringify(event.Records[0].dynamodb.Keys)) + // key names here must be in alphabetical order to match impl which is expected to sort them + .update(JSON.stringify({ arange: { N: '1'}, id: { S: 'record-1' } })) .digest('hex'); backup(event, {}, function(err) { @@ -170,7 +171,7 @@ test('[incremental backup] insert', function(assert) { assert.ok(data.Body, 'got S3 object'); var found = JSON.parse(data.Body.toString()); - var expected = { range: { N:'1' }, id: { S: 'record-1' } }; + var expected = { arange: { N:'1' }, id: { S: 'record-1' } }; assert.deepEqual(found, expected, 'expected item put to S3'); assert.end(); }); @@ -183,7 +184,8 @@ test('[incremental backup] insert & modify', function(assert) { var event = require(path.join(events, 'insert-modify.json')); var table = event.Records[0].eventSourceARN.split('/')[1]; var id = crypto.createHash('md5') - .update(JSON.stringify(event.Records[0].dynamodb.Keys)) + // key names here must be in alphabetical order to match impl which is expected to sort them + .update(JSON.stringify({ arange: { N: '1'}, id: { S: 'record-1' } })) .digest('hex'); backup(event, {}, function(err) { @@ -197,7 +199,7 @@ test('[incremental backup] insert & modify', function(assert) { assert.ok(data.Body, 'got S3 object'); var found = JSON.parse(data.Body.toString()); - var expected = { range: { N:'2' }, id: { S: 'record-1' } }; + var expected = { data: { S: 'data-2' }, arange: { N:'1' }, id: { S: 'record-1' } }; assert.deepEqual(found, expected, 'expected item modified on S3'); assert.end(); }); @@ -210,7 +212,8 @@ test('[incremental backup] insert, modify & delete', function(assert) { var event = require(path.join(events, 'insert-modify-delete.json')); var table = event.Records[0].eventSourceARN.split('/')[1]; var id = crypto.createHash('md5') - .update(JSON.stringify(event.Records[0].dynamodb.Keys)) + // key names here must be in alphabetical order to match impl which is expected to sort them + .update(JSON.stringify({ arange: { N: '1'}, id: { S: 'record-1' } })) .digest('hex'); backup(event, {}, function(err) { @@ -233,8 +236,8 @@ test('[incremental backup] adjust many', function(assert) { var table = event.Records[0].eventSourceARN.split('/')[1]; var expected = [ - { range: { N: '22' }, id: { S: 'record-2' } }, - { range: { N: '33' }, id: { S: 'record-3' } } + { data: { S: 'data-2' }, arange: { N: '1' }, id: { S: 'record-2' } }, + { data: { S: 'data-2' }, arange: { N: '1' }, id: { S: 'record-3' } } ]; backup(event, {}, function(err) { @@ -243,7 +246,8 @@ test('[incremental backup] adjust many', function(assert) { expected.forEach(function(record) { q.defer(function(next) { - var key = { id: record.id }; + // key names here must be in alphabetical order to match impl which is expected to sort them + var key = { arange: record.arange, id: record.id }; var id = crypto.createHash('md5') .update(JSON.stringify(key)) .digest('hex'); From a597ddcbfd0d5a574d244ca0452729e83b01ce3e Mon Sep 17 00:00:00 2001 From: Bryan Talbot Date: Mon, 8 May 2017 19:33:54 -0700 Subject: [PATCH 2/2] always order DDB keys by name when hashing for S3 storage. fixes #90. see previous commit 0c065a5bb4fe1beccc446f0549606871a2ea5a51 for tests which this commit allows to pass --- index.js | 7 ++++++- s3-backfill.js | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index 4c3b298..33f13b4 100644 --- a/index.js +++ b/index.js @@ -181,8 +181,13 @@ function incrementalBackup(event, context, callback) { changes.forEach(function(change) { q.defer(function(next) { + var unordered = change.dynamodb.Keys; + var ordered = Object.keys(unordered).sort().reduce(function (ordered, key) { + ordered[key] = unordered[key]; + return ordered + }, {}); var id = crypto.createHash('md5') - .update(JSON.stringify(change.dynamodb.Keys)) + .update(JSON.stringify(ordered)) .digest('hex'); var table = change.eventSourceARN.split('/')[1]; diff --git a/s3-backfill.js b/s3-backfill.js index b70e0af..46b5c9b 100644 --- a/s3-backfill.js +++ b/s3-backfill.js @@ -33,7 +33,7 @@ function backfill(config, done) { var keys = data.Table.KeySchema.map(function(schema) { return schema.AttributeName; - }); + }).sort(); // ensure keys are sorted by name and not declaration order which DDB requires to be HASH first var count = 0; var starttime = Date.now();