From 1c7c0eff5c098b4879afcc7270e2f6abe65829dd Mon Sep 17 00:00:00 2001 From: Damien Arrachequesne Date: Sun, 16 Oct 2016 04:50:51 +0200 Subject: [PATCH 1/2] Revert "[chore] Use wtf-8 instead of utf8 to prevent lone surrogates from generating parsing error (#68)" This reverts commit f5d966bd667fec4276f304723b034a94960ada99. --- lib/browser.js | 2 +- lib/index.js | 2 +- package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/browser.js b/lib/browser.js index 83c586b..faff1a0 100644 --- a/lib/browser.js +++ b/lib/browser.js @@ -6,7 +6,7 @@ var keys = require('./keys'); var hasBinary = require('has-binary'); var sliceBuffer = require('arraybuffer.slice'); var after = require('after'); -var utf8 = require('wtf-8'); +var utf8 = require('utf8'); var base64encoder; if (global.ArrayBuffer) { diff --git a/lib/index.js b/lib/index.js index 3c60d66..469366d 100644 --- a/lib/index.js +++ b/lib/index.js @@ -2,7 +2,7 @@ * Module dependencies. */ -var utf8 = require('wtf-8'); +var utf8 = require('utf8'); var after = require('after'); var keys = require('./keys'); diff --git a/package.json b/package.json index 7f7f4b5..cdd2250 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,7 @@ "base64-arraybuffer": "0.1.5", "blob": "0.0.4", "has-binary": "0.1.6", - "wtf-8": "1.0.0" + "utf8": "2.1.0" }, "scripts": { "test": "make test" From 09f360625bc4f59945638c3714f34c53151e08b4 Mon Sep 17 00:00:00 2001 From: Damien Arrachequesne Date: Sun, 16 Oct 2016 04:54:16 +0200 Subject: [PATCH 2/2] [fix] Sanitize strings by removing lone surrogates --- lib/browser.js | 6 +++--- lib/index.js | 6 +++--- lib/keys.js | 19 ------------------- lib/utils.js | 38 ++++++++++++++++++++++++++++++++++++++ test/parser.js | 9 +++++++++ 5 files changed, 53 insertions(+), 25 deletions(-) delete mode 100644 lib/keys.js create mode 100644 lib/utils.js diff --git a/lib/browser.js b/lib/browser.js index faff1a0..687dc9b 100644 --- a/lib/browser.js +++ b/lib/browser.js @@ -2,7 +2,7 @@ * Module dependencies. */ -var keys = require('./keys'); +var utils = require('./utils'); var hasBinary = require('has-binary'); var sliceBuffer = require('arraybuffer.slice'); var after = require('after'); @@ -56,7 +56,7 @@ var packets = exports.packets = { , noop: 6 }; -var packetslist = keys(packets); +var packetslist = utils.keys(packets); /** * Premade error packet. @@ -117,7 +117,7 @@ exports.encodePacket = function (packet, supportsBinary, utf8encode, callback) { // data fragment is optional if (undefined !== packet.data) { - encoded += utf8encode ? utf8.encode(String(packet.data)) : String(packet.data); + encoded += utf8encode ? utf8.encode(utils.sanitizeString(String(packet.data))) : String(packet.data); } return callback('' + encoded); diff --git a/lib/index.js b/lib/index.js index 469366d..7029c4e 100644 --- a/lib/index.js +++ b/lib/index.js @@ -4,7 +4,7 @@ var utf8 = require('utf8'); var after = require('after'); -var keys = require('./keys'); +var utils = require('./utils'); /** * Current protocol version. @@ -25,7 +25,7 @@ var packets = exports.packets = { , noop: 6 }; -var packetslist = keys(packets); +var packetslist = utils.keys(packets); /** * Premade error packet. @@ -72,7 +72,7 @@ exports.encodePacket = function (packet, supportsBinary, utf8encode, callback) { // data fragment is optional if (undefined !== packet.data) { - encoded += utf8encode ? utf8.encode(String(packet.data)) : String(packet.data); + encoded += utf8encode ? utf8.encode(utils.sanitizeString(String(packet.data))) : String(packet.data); } return callback('' + encoded); diff --git a/lib/keys.js b/lib/keys.js deleted file mode 100644 index 947dafd..0000000 --- a/lib/keys.js +++ /dev/null @@ -1,19 +0,0 @@ - -/** - * Gets the keys for an object. - * - * @return {Array} keys - * @api private - */ - -module.exports = Object.keys || function keys (obj){ - var arr = []; - var has = Object.prototype.hasOwnProperty; - - for (var i in obj) { - if (has.call(obj, i)) { - arr.push(i); - } - } - return arr; -}; diff --git a/lib/utils.js b/lib/utils.js new file mode 100644 index 0000000..dd28f44 --- /dev/null +++ b/lib/utils.js @@ -0,0 +1,38 @@ + +/** + * Gets the keys for an object. + * + * @return {Array} keys + * @api private + */ + +var keys = Object.keys || function keys (obj) { + var arr = []; + var has = Object.prototype.hasOwnProperty; + + for (var i in obj) { + if (has.call(obj, i)) { + arr.push(i); + } + } + return arr; +}; + +// from https://gist.github.com/mathiasbynens/bbe7f870208abcfec860 +var loneSurrogatesRegex = /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/g; + +/** + * Sanitize a WTF-8 string, replacing lone surrogates with + * U+FFFD 'REPLACEMENT CHARACTER' + * + * @return {String} str + * @api private + */ +var sanitizeString = function (str) { + return str.replace(loneSurrogatesRegex, '\uFFFD'); +}; + +module.exports = { + keys: keys, + sanitizeString: sanitizeString +}; diff --git a/test/parser.js b/test/parser.js index da852d8..807ae28 100644 --- a/test/parser.js +++ b/test/parser.js @@ -124,6 +124,15 @@ module.exports = function(parser) { expect(data).to.match(/^[0-9]$/); }); }); + + it('should encode a string message with lone surrogates replaced', function(done) { + var data = '\uDC00 a\uDC00 \uDBFF\uDC00 \uDBFFb \uDBFF'; + encode({ type: 'message', data: data }, null, true, function(encoded) { + expect(decode(encoded, null, true)).to.eql({ type: 'message', data: '\uFFFD \uFFFD \uDBFF\uDC00 \uFFFDb \uFFFD' }); + done(); + }); + }); + }); describe('decoding error handing', function () {