Skip to content

Commit

Permalink
Performance of object (Array) encoding :
Browse files Browse the repository at this point in the history
Instead of a recursive approche for encoding/decoding which implies
multiple calls to escape/unescape for nested values and ever-growing
escaped strings use another scheme :

Encoding:
1. Walk the nested array as if flattend, encode each primitive value
(= non-array value) and escape them.
2. Prefix the item with a `K` for each array opened directly before it,
and suffix it with a `!` for each array closed directly after it
3. Join all items with a `"`
```javascript
encode(['a', [['b']]]) // KJa"KKJb!!!
```
Encoding is done in a single pass on all items of the nested array.

Decoding:
1. Split on `"`.
2. Walk the flat array, keeping track of opened/closed array (counting
prefix `K`s and suffix `!`s), and push the unescaped and decoded value
at the appropriate depth of the nestedarray
```javascript
decode('KJb"KJa!"KJb!!') // ['b',['a'],['b']]
```
  • Loading branch information
PaulBlanche committed Jan 23, 2018
1 parent 3f45e54 commit d8498fb
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 89 deletions.
131 changes: 79 additions & 52 deletions codec/object.js
Original file line number Diff line number Diff line change
@@ -1,63 +1,90 @@
var dictEscape = { '?': '?@', '!': '??', '"': '?%' };
function escape(str) {
if (!/[!"]/.test(str)) { return str; }
return str.replace(/[\?!"]/g, function (match) {
return dictEscape[match];
});

}

var dictUnescape = { '?@': '?', '??': '!', '?%': '"' };
function unescape(str) {
if (!/\?[%\?@]/.test(str)) { return str; }
return str.replace(/\?[%\?@]/g, function (match) {
return dictUnescape[match];
});
}

exports.factory = function (codec) {

return {
encode: function (array) {
if (array === null) { return 'A'; }
if (!Array.isArray(array)) { throw new Error('can only encode arrays'); }
if(array.length == 0) return 'K'
var s = 'K'+escape(codec.encode(array[0]))
var l = array.length
for(var i = 1; i < l; i++)
s += '!' + escape(codec.encode(array[i]))
return s
},
decode: function (encoded) {
if (encoded === 'A') { return null; }
if (encoded === 'K') { return []; }
var buffer = "";
var array = [];
for (var i = 1; i < encoded.length; i++) {
var char = encoded[i];
if (char === '!') {
array.push(codec.decode(unescape(buffer)))
buffer = '';
} else {
buffer += char;
}
}
array.push(codec.decode(unescape(buffer)))
return array;
encode: encode,
decode: decode
};

function encode(array) {
if (array === null) { return 'A'; }
if (!Array.isArray(array)) { throw new Error('can only encode arrays'); }
var l = array.length;
if (l == 0) { return 'K!'; }

var s = encodeItem(array[0]);
for (var i = 1; i < l; i++) {
s += '"' + encodeItem(array[i]);
}

return 'K'+ s + '!';
}
}

function escape (string) {
var l = string.length;
var buffer = '';
for (var i = 0; i < l; i++) {
if (string[i] === '!') {
buffer += '??';
} else if (string[i] === '?') {
buffer += '?@';
} else {
buffer += string[i];
function encodeItem(item) {
if (typeof item === 'object') {
return encode(item);
}
return escape(codec.encode(item));
}
return buffer;
}

function unescape (string) {
var l = string.length;
var buffer = '';
for (var i = 0; i < l; i++) {
if (string[i] === '?' && string[i+1] === '?') {
buffer += '!';
i++;
} else if (string[i] === '?' && string[i+1] === '@') {
buffer += '?';
i++;
} else {
buffer += string[i];
function decode(encoded) {
if (encoded === 'A') { return null; }
if (encoded === 'K!') { return []; }
var items = encoded.split('"');

var pointers = [[]];
var array;
var depth = 0;

var l = items.length;
for (var i = 0; i < l; i++) {
var item = items[i];
var itemLength = item.length;

var open = 0;
while (item[open] == 'K') { open++; }

var close = 0;
while (item[itemLength-close - 1] == '!') { close++; }

var content = item.slice(open, itemLength-close);

var newdepth = depth + open;
for (var j = depth; j < newdepth; j++) {
pointers[j + 1] = [];
pointers[j].push(pointers[j + 1]);
depth = newdepth;
array = pointers[depth];
}

if (content.length !== 0) {
array.push(codec.decode(unescape(content)));
}

var newdepth = depth - close;
for (var j = newdepth; j < depth; j++) {
pointers[j + 1] = [];
depth = newdepth;
array = pointers[depth];
}

}
return pointers[0][0];
}
return buffer;
}
97 changes: 62 additions & 35 deletions test/bench.js
Original file line number Diff line number Diff line change
@@ -1,44 +1,71 @@
var bytewise = require('bytewise')
var charwise = require('../')

var words = ['foo', 'bar','hello world', 'aosenthuaosnetuhaosnetu']

function bench (name, codec) {
var start = Date.now(), c = 0
while(Date.now() < start+1000) {
c++
codec.encode([
words[~~(Math.random()*words.length)],
~~(Math.random()*1000),
Math.random(),
~(Math.random()*10000),
Date.now()
])
}
var time = (Date.now()-start)/1000
console.log(name+'.encode', c) //, c/time, time)

var start = Date.now(), c = 0
var a = []
for(var i = 0; i < 100; i++)
a.push(codec.encode([
words[~~(Math.random()*words.length)],
~~(Math.random()*1000),
Math.random(),
~(Math.random()*10000),
Date.now()
]))
while(Date.now() < start+1000) {
c++
codec.decode(a[~~(Math.random()*a.length)])
}
var time = (Date.now()-start)/1000
console.log(name+'.decode', c) //, c/time, time)
var randString = function() {
var a = ' abcdefghijklmnopqrstuvwxyz';
var str = '';
for (var i = 0; i < 4 + Math.random() * 10; i++) {
str += a[Math.floor(Math.random() * a.length)];
}
return str;
}
var randArray = function (opt, depth) {
depth = depth || 0;
var length = Math.random() < (depth === 0 ? 0 : 0.5) ? 0 : Math.ceil(Math.random() * Math.max(0, opt.length));
var array = []
for(var i = 0; i < length; i++) {
if (opt.depth != 0 && Math.random() < Math.max(0, opt.depth-depth)/opt.depth) {
array.push(randArray(opt, depth + 1));
} else {
var dice = Math.floor(Math.random() * 5)
if(dice === 0) {
array.push(randString())
}
if(dice === 1) {
array.push(Math.random() > 0.5 ? true : false);
}
if(dice === 2) {
array.push(Math.random() > 0.5 ? null : undefined);
}
if(dice === 3) {
array.push((Math.random()*2 - 1));
}
if(dice === 4) {
array.push((Math.random()*2 - 1)*1e10);
}
}
}
return array;
}

var items = [];
for(var i = 0; i < 100; i++) {
items.push(randArray({
depth: 0,
length: 8
}));
}

function bench (fn, items, samples) {
var time = process.hrtime();
var iter = 0;
var l = items.length;
while(process.hrtime(time)[0] < 1) {
fn(items[iter%l]);
iter++;
}
return iter;

}

var bencode = bench(bytewise.encode, items);
console.log('bytewise encode', Math.floor(bencode));

bench('bytewise', bytewise)
bench('charwise', charwise)
var cencode = bench(charwise.encode, items);
console.log('charwise encode', Math.floor(cencode), 'x' + Math.floor(cencode / bencode * 10)/10);

var bdecode = bench(bytewise.decode, items.map(function(item) { return bytewise.encode(item); }));
console.log('bytewise decode', Math.floor(bdecode));

var cdecode = bench(charwise.decode, items.map(function(item) { return charwise.encode(item); }));
console.log('charwise decode', Math.floor(cdecode), 'x' + Math.floor(cdecode / bdecode * 10)/10);
4 changes: 2 additions & 2 deletions test/object.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ tape("Object: 1000 random array", function (t) {
}
if (bytewise(array1) < bytewise(array2) && encode(array1) >= encode(array2)) {
t.fail('encode(' + array1 + ') >= encode(' + array2 + ')');
break;
break;
}
if (bytewise(array1) > bytewise(array2) && encode(array1) <= encode(array2)) {
t.fail('encode(' + array1 + ') >= encode(' + array2 + ')');
break;
}
if (bytewise(array1) === bytewise(array2) && encode(array1) === encode(array2)) {
t.fail('encode(' + array1 + ') >= encode(' + array2 + ')');
t.fail('encode(' + array1 + ') !== encode(' + array2 + ')');
break;
}
}
Expand Down

0 comments on commit d8498fb

Please sign in to comment.