diff --git a/lib/readability.js b/lib/readability.js index c8b5a52..cd8e0c3 100644 --- a/lib/readability.js +++ b/lib/readability.js @@ -1,6 +1,7 @@ /*jslint undef: true, nomen: true, eqeqeq: true, plusplus: true, newcap: true, immed: true, browser: true, devel: true, passfail: false */ /*global window: false, readConvertLinksToFootnotes: false, readStyle: false, readSize: false, readMargin: false, Typekit: false, ActiveXObject: false */ - +var Buffer = require('buffer').Buffer; +var Iconv = require('iconv').Iconv; var dbg = (typeof console !== 'undefined') ? function(s) { if (readability.debugging) { console.log("Readability: " + s); @@ -2225,7 +2226,11 @@ exports.parse = function parse(theHtml, url, options, callback) { removeClassNames: true }; options = Utils.extend({}, defaultOptions, options); - + if(options.encoding && options.encoding != 'utf8') { + body = new Buffer(theHtml, 'binary'); + iconv = new Iconv(options.encoding, 'utf8'); + theHtml = iconv.convert(body).toString('utf8'); + } var startTime = new Date().getTime(); //dbg(html); var html = theHtml.replace(/]*>([\s\S]*?)<\/script>/gi, ''); @@ -2239,7 +2244,7 @@ exports.parse = function parse(theHtml, url, options, callback) { features : { FetchExternalResources : [], ProcessExternalResources : false - } + }, }; function createDocWithHTMLParser() { diff --git a/package.json b/package.json index 417beaa..cad8180 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,10 @@ "dependencies": { "mjsunit.runner": ">=0.1.0", "jsdom": ">=0.1.21", - "htmlparser": ">=1.7.3" + "htmlparser": ">=1.7.3", + "html5":">0.1", + "main": "iconv", + "iconv":">=1.1.3" }, "engines" : { "node" : ">=0.2.5" }, "directories": {