Skip to content

Commit

Permalink
Fixes #3. Check the actual byte length of strings.
Browse files Browse the repository at this point in the history
UTF-8 means that a string's 'length' property might not equal the byte
length. This fix uses Buffer.byteLength to check for the true number of
bytes that keys (or data) will take up on disk. Additionally this adds a
test suite to ensure that UTF-8 is supported.
  • Loading branch information
ericnorris committed Apr 21, 2015
1 parent 57ea854 commit 449f412
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 6 deletions.
6 changes: 4 additions & 2 deletions src/readable-cdb.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ readable.prototype.get = function(key, offset, callback) {
position = hashtable.position,
slotCount = hashtable.slotCount,
slot = (hash >>> 8) % slotCount,
trueKeyLength = Buffer.byteLength(key),
self = this,
hashPosition, recordHash, recordPosition, keyLength, dataLength;

Expand Down Expand Up @@ -103,8 +104,9 @@ readable.prototype.get = function(key, offset, callback) {
keyLength = buffer.readUInt32LE(0);
dataLength = buffer.readUInt32LE(4);

if (keyLength != key.length) {
// speedup in the rare case of a hash collision
// In the rare case that there is a hash collision, check the key size
// to prevent reading in a key that will definitely not match.
if (keyLength != trueKeyLength) {
return readSlot(++slot);
}

Expand Down
10 changes: 6 additions & 4 deletions src/writable-cdb.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,18 @@ writable.prototype.open = function(cb) {
};

writable.prototype.put = function(key, data, callback) {
var record = new Buffer(8 + key.length + data.length),
var keyLength = Buffer.byteLength(key),
dataLength = Buffer.byteLength(data),
record = new Buffer(8 + keyLength + dataLength),
hash = _.cdbHash(key),
hashtableIndex = hash & 255,
hashtable = this.hashtables[hashtableIndex],
okayToWrite;

record.writeUInt32LE(key.length, 0);
record.writeUInt32LE(data.length, 4);
record.writeUInt32LE(keyLength, 0);
record.writeUInt32LE(dataLength, 4);
record.write(key, 8);
record.write(data, 8 + key.length);
record.write(data, 8 + keyLength);

okayToWrite = this.recordStream.write(record, callback);

Expand Down
112 changes: 112 additions & 0 deletions test/cdb-utf8-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
'use strict';

var vows = require('vows'),
assert = require('assert'),
fs = require('fs'),
writable = require('../src/writable-cdb'),
readable = require('../src/readable-cdb'),
tempFile = 'test/tmp';

try {
fs.unlinkSync(tempFile);
} catch (err) {}

vows.describe('cdb-utf8-test').addBatch({
'A writable cdb': {
topic: function() {
return new writable(tempFile);
},

'when opened': {
topic: function(cdb) {
cdb.open(this.callback);
},

'should write UTF8 characters': {
topic: function(cdb) {
cdb.put('é', 'unicode test');
cdb.put('€', 'unicode test');
cdb.put('key', 'ᚠᛇᚻ');
cdb.put('대한민국', '안성기');

cdb.close(this.callback);
},

'and close successfully': function(err) {
assert.equal(err, null);
},
}
}
}
}).addBatch({
'A readable cdb should find that': {
topic: function() {
(new readable(tempFile)).open(this.callback);
},

'é': {
topic: function(cdb) {
cdb.get('é', this.callback);
},

'exists': function(err, data) {
assert.isNull(err);
assert.isNotNull(data);
},

'has the right value': function(err, data) {
assert.equal(data, 'unicode test');
}
},

'€': {
topic: function(cdb) {
cdb.get('€', this.callback);
},

'exists': function(err, data) {
assert.isNull(err);
assert.isNotNull(data);
},

'has the right value': function(err, data) {
assert.equal(data, 'unicode test');
}
},

'key': {
topic: function(cdb) {
cdb.get('key', this.callback);
},

'exists': function(err, data) {
assert.isNull(err);
assert.isNotNull(data);
},

'has the right value': function(err, data) {
assert.equal(data, 'ᚠᛇᚻ');
}
},

'대한민국': {
topic: function(cdb) {
cdb.get('대한민국', this.callback);
},

'exists': function(err, data) {
assert.isNull(err);
assert.isNotNull(data);
},

'has the right value': function(err, data) {
assert.equal(data, '안성기');
}
},

teardown: function() {
fs.unlinkSync(tempFile);
}
}

}).export(module);

0 comments on commit 449f412

Please sign in to comment.