From 449f4125bd8095e8dabed979ec10324d30fc05fb Mon Sep 17 00:00:00 2001 From: Eric Norris Date: Mon, 20 Apr 2015 23:40:51 -0400 Subject: [PATCH] Fixes #3. Check the actual byte length of strings. UTF-8 means that a string's 'length' property might not equal the byte length. This fix uses Buffer.byteLength to check for the true number of bytes that keys (or data) will take up on disk. Additionally this adds a test suite to ensure that UTF-8 is supported. --- src/readable-cdb.js | 6 ++- src/writable-cdb.js | 10 ++-- test/cdb-utf8-test.js | 112 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 test/cdb-utf8-test.js diff --git a/src/readable-cdb.js b/src/readable-cdb.js index e017015..fb9d998 100644 --- a/src/readable-cdb.js +++ b/src/readable-cdb.js @@ -58,6 +58,7 @@ readable.prototype.get = function(key, offset, callback) { position = hashtable.position, slotCount = hashtable.slotCount, slot = (hash >>> 8) % slotCount, + trueKeyLength = Buffer.byteLength(key), self = this, hashPosition, recordHash, recordPosition, keyLength, dataLength; @@ -103,8 +104,9 @@ readable.prototype.get = function(key, offset, callback) { keyLength = buffer.readUInt32LE(0); dataLength = buffer.readUInt32LE(4); - if (keyLength != key.length) { - // speedup in the rare case of a hash collision + // In the rare case that there is a hash collision, check the key size + // to prevent reading in a key that will definitely not match. + if (keyLength != trueKeyLength) { return readSlot(++slot); } diff --git a/src/writable-cdb.js b/src/writable-cdb.js index f083d67..707fddd 100644 --- a/src/writable-cdb.js +++ b/src/writable-cdb.js @@ -53,16 +53,18 @@ writable.prototype.open = function(cb) { }; writable.prototype.put = function(key, data, callback) { - var record = new Buffer(8 + key.length + data.length), + var keyLength = Buffer.byteLength(key), + dataLength = Buffer.byteLength(data), + record = new Buffer(8 + keyLength + dataLength), hash = _.cdbHash(key), hashtableIndex = hash & 255, hashtable = this.hashtables[hashtableIndex], okayToWrite; - record.writeUInt32LE(key.length, 0); - record.writeUInt32LE(data.length, 4); + record.writeUInt32LE(keyLength, 0); + record.writeUInt32LE(dataLength, 4); record.write(key, 8); - record.write(data, 8 + key.length); + record.write(data, 8 + keyLength); okayToWrite = this.recordStream.write(record, callback); diff --git a/test/cdb-utf8-test.js b/test/cdb-utf8-test.js new file mode 100644 index 0000000..87cd1f7 --- /dev/null +++ b/test/cdb-utf8-test.js @@ -0,0 +1,112 @@ +'use strict'; + +var vows = require('vows'), + assert = require('assert'), + fs = require('fs'), + writable = require('../src/writable-cdb'), + readable = require('../src/readable-cdb'), + tempFile = 'test/tmp'; + +try { + fs.unlinkSync(tempFile); +} catch (err) {} + +vows.describe('cdb-utf8-test').addBatch({ + 'A writable cdb': { + topic: function() { + return new writable(tempFile); + }, + + 'when opened': { + topic: function(cdb) { + cdb.open(this.callback); + }, + + 'should write UTF8 characters': { + topic: function(cdb) { + cdb.put('é', 'unicode test'); + cdb.put('€', 'unicode test'); + cdb.put('key', 'ᚠᛇᚻ'); + cdb.put('대한민국', '안성기'); + + cdb.close(this.callback); + }, + + 'and close successfully': function(err) { + assert.equal(err, null); + }, + } + } + } +}).addBatch({ + 'A readable cdb should find that': { + topic: function() { + (new readable(tempFile)).open(this.callback); + }, + + 'é': { + topic: function(cdb) { + cdb.get('é', this.callback); + }, + + 'exists': function(err, data) { + assert.isNull(err); + assert.isNotNull(data); + }, + + 'has the right value': function(err, data) { + assert.equal(data, 'unicode test'); + } + }, + + '€': { + topic: function(cdb) { + cdb.get('€', this.callback); + }, + + 'exists': function(err, data) { + assert.isNull(err); + assert.isNotNull(data); + }, + + 'has the right value': function(err, data) { + assert.equal(data, 'unicode test'); + } + }, + + 'key': { + topic: function(cdb) { + cdb.get('key', this.callback); + }, + + 'exists': function(err, data) { + assert.isNull(err); + assert.isNotNull(data); + }, + + 'has the right value': function(err, data) { + assert.equal(data, 'ᚠᛇᚻ'); + } + }, + + '대한민국': { + topic: function(cdb) { + cdb.get('대한민국', this.callback); + }, + + 'exists': function(err, data) { + assert.isNull(err); + assert.isNotNull(data); + }, + + 'has the right value': function(err, data) { + assert.equal(data, '안성기'); + } + }, + + teardown: function() { + fs.unlinkSync(tempFile); + } + } + +}).export(module);