diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..967c29f --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +[*] +charset=utf-8 +end_of_line=lf +insert_final_newline=true +indent_style=tab +tab_width=4 + +[{.babelrc,.stylelintrc,.eslintrc,*.bowerrc,*.jsb3,*.jsb2,*.json}] +indent_style=space +indent_size=2 + +[{*.yml,*.yaml}] +indent_style=space +indent_size=2 + diff --git a/.gitignore b/.gitignore index f59aca0..4043a53 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ temp/ tmp/ +/test/temp/* +!/test/temp/.gitkeep +.idea diff --git a/README.md b/README.md index 7cd1f7b..e290a7d 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,21 @@ -[![NPM version](https://badge.fury.io/js/json-parser.svg)](http://badge.fury.io/js/json-parser) -[![Build Status](https://travis-ci.org/kaelzhang/node-json-parser.svg?branch=master)](https://travis-ci.org/kaelzhang/node-json-parser) - - # json-parser JSON parser to parse JSON object and MAINTAIN comments. This is a very low level module. For most situations, recommend to use [`comment-json`](https://www.npmjs.org/package/comment-json) instead. +## this version + +this version base on kaelzhang/node-json-parser + +but add support load unsafe/un-strict json + +see test file + ## Install ```sh -$ npm install json-parser --save +$ npm install json-parser2 --save ``` ## Usage diff --git a/index.js b/index.js index 14f71e4..4f94775 100644 --- a/index.js +++ b/index.js @@ -1,286 +1,6 @@ 'use strict'; -var esprima = require('esprima'); - -exports.tokenize = tokenize; -exports.parse = parse; - - -function tokenize (code) { - var list = esprima.tokenize(code, { - comment: true, - loc: true - }); - if (list.comment) { - return list; - } - - var result = []; - var comments = []; - list.map(function (t) { - if (t.type === 'LineComment') { - t.type = 'Line'; - comments.push(t); - } else if (t.type === 'BlockComment') { - t.type = 'Block'; - comments.push(t); - } else { - result.push(t); - } - }); - result.comments = comments; - return result; -} - -var tokens; -var current; -var index; -var reviver; -var remove_comments; - -function parse (code, rev, no_comments) { - tokens = tokenize(code); - reviver = rev; - remove_comments = no_comments; - - if (!tokens.length) { - unexpected_end(); - } - - sort_comment_tokens(); - - index = -1; - next(); - - var result = walk(); - - if (Object(result) === result && !remove_comments) { - if (tokens.head_comments.length) { - result['//^'] = tokens.head_comments; - } - - if (tokens.foot_comments.length) { - result['//$'] = tokens.foot_comments; - } - } - - result = transform('', result); - reviver = null; - return result; -} - - -function transform (k, v) { - return reviver - ? reviver(k, v) - : v; -} - - -function walk () { - var tt = type(); - var negative = ''; - if (tt === '-') { - next(); - tt = type(); - negative = '-'; - } - switch (tt) { - case '{': - next(); - return parse_object(); - case '[': - next(); - return parse_array(); - case 'String': - case 'Boolean': - case 'Null': - case 'Numeric': - var value = current.value; - next(); - return JSON.parse(negative + value); - } - - unexpected(); -} - - -function next () { - return current = tokens[++ index]; -} - - -function expect (a) { - if (!is(a)) { - unexpected(); - } -} - - -function unexpected () { - throw new SyntaxError('Unexpected token ' + current.value.slice(0, 1)); -} - -function unexpected_end () { - throw new SyntaxError('Unexpected end of input'); -} - - -function parse_object () { - var obj = {}; - var comment; - var started; - var name; - while (!is('}')){ - if (started) { - expect(','); - next(); - } - started = true; - expect('String'); - name = JSON.parse(current.value); - if (current.comments && !remove_comments) { - obj['// ' + name] = current.comments; - } - next(); - expect(':'); - next(); - obj[name] = transform(name, walk()); - } - next(); - return obj; -} - - -function parse_array () { - var array = []; - var started; - var i = 0; - while(!is(']')){ - if (started) { - expect(','); - next(); - } - started = true; - array[i] = transform(i, walk()); - i ++; - } - next(); - return array; -} - - -function type () { - if (!current) { - unexpected_end(); - } - - return current.type === 'Punctuator' - ? current.value - : current.type; -} - - -function is (t) { - return type() === t; -} - - -function sort_comment_tokens () { - var ts = tokens; - var comments = ts.comments; - if (!comments) { - return; - } - - function compare_to_then_push (condition, to, setup) { - var comment; - var first = true; - var host; - while((comment = comments[ci ++]) && condition(comment, to)){ - if (first) { - host = setup(); - } - first = false; - host.push(comment_content(comment)); - } - - ci --; - // Whether there are comments left. - return !!comment; - } - - var head_comments = []; - var foot_comments = []; - - var first = ts[0]; - var ci = 0; - var comment = compare_to_then_push(left, first, function () { - return head_comments; - }); - - var i = 0; - var token; - var next; - for (; i < ts.length; i ++) { - if (!comment) { - break; - } - - token = ts[i]; - next = ts[i + 1]; - - if (token.type === 'String' && next && next.value === ':') { - comment = compare_to_then_push(left, token, function () { - token.comments || (token.comments = []); - return token.comments[0] || (token.comments[0] = []); - }); - - if (!comment) { - break; - } - - comment = compare_to_then_push(right, token, function () { - token.comments || (token.comments = []); - return token.comments[1] || (token.comments[1] = []); - }); - } - } - - compare_to_then_push(function () { - return true - }, null, function () { - return foot_comments; - }); - - comments.length = 0; - delete ts.comments; - - tokens.head_comments = head_comments; - tokens.foot_comments = foot_comments; -} - - -function left (a, b) { - return a - && ( - a.loc.start.line < b.loc.start.line - || - a.loc.start.line === b.loc.start.line - && a.loc.start.column < b.loc.start.column - ); -} - - -function right (a, b) { - return a - && a.loc.start.line === b.loc.start.line - && a.loc.start.column > b.loc.start.column; -} - - -function comment_content (comment) { - return comment.type === 'Block' - ? '/*' + comment.value + '*/' - : '//' + comment.value; -} +Object.assign(module.exports, { + tokenize: require('./src/tokenize').tokenize, + parse: require('./src/index').parse, +}); diff --git a/package.json b/package.json index d6a0256..f9afbe5 100644 --- a/package.json +++ b/package.json @@ -1,14 +1,14 @@ { - "name": "json-parser", - "version": "1.1.5", + "name": "json-parser2", + "version": "2.1.5", "description": "JSON parser to parse JSON object and MAINTAIN comments.", "main": "index.js", "scripts": { - "test": "./node_modules/.bin/mocha --reporter spec ./test/*.js" + "test": "mocha" }, "repository": { "type": "git", - "url": "git://github.com/kaelzhang/node-json-parser.git" + "url": "git://github.com/bluelovers/node-json-parser.git" }, "keywords": [ "json-parser", @@ -26,7 +26,7 @@ "author": "kaelzhang", "license": "MIT", "bugs": { - "url": "https://github.com/kaelzhang/node-json-parser/issues" + "url": "https://github.com/bluelovers/node-json-parser/issues" }, "devDependencies": { "mocha": "*", diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..95c2339 --- /dev/null +++ b/src/index.js @@ -0,0 +1,397 @@ +/** + * Created by user on 2017/1/18. + */ + +const tokenize = exports.tokenize = require('./tokenize').tokenize; + +const parse = exports.parse = function parse (code, ...args) +{ + let tokens; + let current; + let index; + + let options = {}; + + if (args.length && typeof args[args.length-1] == 'object') + { + options = args.pop(); + } + + if (args.length) + { + let [reviver, remove_comments] = args; + + if (typeof options.reviver == 'undefined') + { + options.reviver = reviver; + } + + if (typeof options.remove_comments == 'undefined') + { + options.remove_comments = remove_comments; + } + } + + options = Object.assign({}, parse.defaultOptions, options); + + let transform = options.reviver ? transform1 : transform2; + + return _parse(code, options) + + function _parse(code, options) + { + tokens = tokenize(code); + + if (!tokens.length) + { + unexpected_end(); + } + + if (sort_comment_tokens() === false && !options.reviver && options.unsafe !== true) + { + return JSON.parse(code) + } + + index = -1; + next(); + + var result = walk(); + + if (Object(result) === result && !options.remove_comments) + { + if (tokens.head_comments.length) + { + result['//^'] = tokens.head_comments; + } + + if (tokens.foot_comments.length) + { + result['//$'] = tokens.foot_comments; + } + } + + result = transform('', result); +// options.reviver = null; + return result; + } + + function transform1(k, v) + { + return options.reviver + ? options.reviver(k, v) + : v; + } + + function transform2(k, v) + { + return v; + } + + function walk() + { + var tt = type(); + var negative = ''; + if (tt === '-') + { + next(); + tt = type(); + negative = '-'; + } + switch (tt) + { + case '{': + next(); + return parse_object(); + case '[': + next(); + return parse_array(); + case 'String': + case 'Boolean': + case 'Null': + case 'Numeric': + var value = current.value; + next(); + return JSON.parse((options.unsafe && tt == 'String') + ? unsafe_quoted(negative + value) + : negative + value + ); + } + unexpected(); + } + + function next() + { + return current = tokens[++index]; + } + + function expect(a) + { + if (!is(a)) + { + unexpected(); + } + } + + function unexpected_debug() + { + if (options.debug) + { + this.message += "\n\n" + JSON.stringify({ + index: index, + current: current, + prev: tokens[index - 1], + next: tokens[index + 1] + }, null, "\t" + ) + } + } + + function unexpected() + { + throw new JsonSyntaxError('Unexpected token ' + current.value.slice(0, 1) + ' in JSON at position ' + current.loc.start.column, unexpected_debug); + } + + function unexpected_end() + { + throw new JsonRangeError('Unexpected end of JSON input', unexpected_debug); + } + + function unsafe_quoted(str) + { + if (str.indexOf('\'') !== 0) return str; + + return str.replace(/^'(.*)'$/, function ($0, $1) + { + let s = '"' + $1.replace(/\\?(")/g, "\\$1").replace(/\\(')/g, "$1") + '"'; + //console.log(str, '=', s); + return s; + } + ) + } + + function parse_object() + { + var obj = {}; + var comment; + var started; + var name; + while (!is('}')) + { + if (started) + { + expect(','); + next(); + + if (options.unsafe && is('}')) + { + break; + } + } + + started = true; + expect('String'); + name = JSON.parse(options.unsafe ? unsafe_quoted(current.value) : current.value); + if (current.comments && !options.remove_comments) + { + obj['// ' + name] = current.comments; + } + next(); + expect(':'); + next(); + obj[name] = transform(name, walk()); + } + next(); + + return obj; + } + + function parse_array() + { + var array = []; + var started; + var i = 0; + while (!is(']')) + { + if (started) + { + expect(','); + next(); + + if (options.unsafe && is(']')) + { + break; + } + } + + started = true; + array[i] = transform(i, walk()); + i++; + } + next(); + return array; + } + + function type() + { + if (!current) + { + unexpected_end(); + } + + return current.type === 'Punctuator' + ? current.value + : current.type; + } + + function is(t) + { + return type() === t; + } + + function sort_comment_tokens() + { + var ts = tokens; + var comments = ts.comments; + if (!comments) + { + return false; + } + + function compare_to_then_push(condition, to, setup) + { + var comment; + var first = true; + var host; + while ((comment = comments[ci++]) && condition(comment, to)) + { + if (first) + { + host = setup(); + } + first = false; + host.push(comment_content(comment)); + } + + ci--; + // Whether there are comments left. + return !!comment; + } + + var head_comments = []; + var foot_comments = []; + + var first = ts[0]; + var ci = 0; + var comment = compare_to_then_push(left, first, function () + { + return head_comments; + } + ); + + var i = 0; + var token; + var next; + for (; i < ts.length; i++) + { + if (!comment) + { + break; + } + + token = ts[i]; + next = ts[i + 1]; + + if (token.type === 'String' && next && next.value === ':') + { + comment = compare_to_then_push(left, token, function () + { + token.comments || (token.comments = []); + return token.comments[0] || (token.comments[0] = []); + } + ); + + if (!comment) + { + break; + } + + comment = compare_to_then_push(right, token, function () + { + token.comments || (token.comments = []); + return token.comments[1] || (token.comments[1] = []); + } + ); + } + } + + compare_to_then_push(function () + { + return true + }, null, function () + { + return foot_comments; + } + ); + + comments.length = 0; + delete ts.comments; + + tokens.head_comments = head_comments; + tokens.foot_comments = foot_comments; + } + + function left(a, b) + { + return a + && ( + a.loc.start.line < b.loc.start.line + || + a.loc.start.line === b.loc.start.line + && a.loc.start.column < b.loc.start.column + ); + } + + function right(a, b) + { + return a + && a.loc.start.line === b.loc.start.line + && a.loc.start.column > b.loc.start.column; + } + + function comment_content(comment) + { + return comment.type === 'Block' + ? '/*' + comment.value + '*/' + : '//' + comment.value; + } + +} + +parse.defaultOptions = { + //debug: false, + unsafe: false, +}; + +class JsonSyntaxError extends SyntaxError { + constructor(message, callback) + { + super(message); + + if (typeof callback == "function") + { + callback.call(this) + } + } +} + +class JsonRangeError extends RangeError { + constructor(message, callback) + { + super(message); + + if (typeof callback == "function") + { + callback.call(this) + } + } +} diff --git a/src/tokenize.js b/src/tokenize.js new file mode 100644 index 0000000..88233d2 --- /dev/null +++ b/src/tokenize.js @@ -0,0 +1,41 @@ +/** + * Created by user on 2017/1/18. + */ + +const esprima = require('esprima'); + +exports.tokenize = function tokenize(code) +{ + var list = esprima.tokenize(code, { + comment: true, + loc: true + } + ); + if (list.comment) + { + return list; + } + + var result = []; + var comments = []; + list.map(function (t) + { + if (t.type === 'LineComment') + { + t.type = 'Line'; + comments.push(t); + } + else if (t.type === 'BlockComment') + { + t.type = 'Block'; + comments.push(t); + } + else + { + result.push(t); + } + } + ); + result.comments = comments; + return result; +} diff --git a/test/benchmark.perf.js b/test/benchmark.perf.js new file mode 100644 index 0000000..a4708b3 --- /dev/null +++ b/test/benchmark.perf.js @@ -0,0 +1,53 @@ +/** + * Created by user on 2017/1/18. + */ + +const myjson = require('../src'); + +var perf = require('mocha-benchmark').create({ + Benchmark: require('benchmark'), + versions: [ + /* order can be important when we introduce test failing options */ + ['JSON', (JSON.__name = 'JSON', JSON)], + //['old', require('../')], + ['new', (myjson.__name = 'json-parser', myjson)] + ], + /* could be describe */ +// suite: describe, +// /* or it */ +// test: it, +}); + +// perf.suite only runs the last version (usually your latest) +perf.suite('libGlobal', function(perf, libGlobal) { + + const fs = require('fs'); + const str = fs.readFileSync("./test/file/" + "unsafe_json.json.dist").toString() + + const options = { + unsafe: true, + debug: true, + }; + +// let p = require('../') + +// console.log(p.parse(str, options.reviver, options.remove_comments, options)) + +// perf.test('my lib should be fast', function() { +// //libGlobal.doStuff(); +// +// p.parse(str, options.reviver, options.remove_comments, options); +// +// }); + + // all tests inside compare run for each version + perf.compare(function(perf, libGlobal) { + + perf.test(libGlobal.__name, function() { + // libGlobal will be previous then latest + + libGlobal.parse(str, options.reviver, options.remove_comments, options); + + }); + }); +}); diff --git a/test/file/unsafe_array.json b/test/file/unsafe_array.json new file mode 100644 index 0000000..fbf6d3b --- /dev/null +++ b/test/file/unsafe_array.json @@ -0,0 +1,18 @@ +/** + blah + */ +// comment at top +{ + // comment for a + /* block comment */ + "a": 1, // comment at right + + + + "ignore": [ + "node_modules", + "dist", + ], + +} +// comment at bottom diff --git a/test/file/unsafe_array.json.dist b/test/file/unsafe_array.json.dist new file mode 100644 index 0000000..843a78e --- /dev/null +++ b/test/file/unsafe_array.json.dist @@ -0,0 +1,7 @@ +{ + "a": 1, + "ignore": [ + "node_modules", + "dist" + ] +} \ No newline at end of file diff --git a/test/file/unsafe_json.json b/test/file/unsafe_json.json new file mode 100644 index 0000000..0fe6bd9 --- /dev/null +++ b/test/file/unsafe_json.json @@ -0,0 +1,22 @@ +/** + blah + */ +// comment at top +{ + // comment for a + /* block comment */ + "a": 1, // comment at right + + "development": { + "retainLines": true, + "sourceMaps": true, + "auxiliaryCommentAfter": true + }, + + "ignore": [ + "node_modules", + "dist", + ], + +} +// comment at bottom diff --git a/test/file/unsafe_json.json.dist b/test/file/unsafe_json.json.dist new file mode 100644 index 0000000..5349b15 --- /dev/null +++ b/test/file/unsafe_json.json.dist @@ -0,0 +1,12 @@ +{ + "a": 1, + "development": { + "retainLines": true, + "sourceMaps": true, + "auxiliaryCommentAfter": true + }, + "ignore": [ + "node_modules", + "dist" + ] +} \ No newline at end of file diff --git a/test/file/unsafe_object.json b/test/file/unsafe_object.json new file mode 100644 index 0000000..bc9908c --- /dev/null +++ b/test/file/unsafe_object.json @@ -0,0 +1,19 @@ +/** + blah + */ +// comment at top +{ + // comment for a + /* block comment */ + "a": 1, // comment at right + + "development": { + "retainLines": true, + "sourceMaps": true, + "auxiliaryCommentAfter": true, + }, + + + +} +// comment at bottom diff --git a/test/file/unsafe_object.json.dist b/test/file/unsafe_object.json.dist new file mode 100644 index 0000000..ae05692 --- /dev/null +++ b/test/file/unsafe_object.json.dist @@ -0,0 +1,8 @@ +{ + "a": 1, + "development": { + "retainLines": true, + "sourceMaps": true, + "auxiliaryCommentAfter": true + } +} \ No newline at end of file diff --git a/test/file/unsafe_quoted.json b/test/file/unsafe_quoted.json new file mode 100644 index 0000000..213e705 --- /dev/null +++ b/test/file/unsafe_quoted.json @@ -0,0 +1,21 @@ +/** + blah + */ +// comment at top +{ + // comment for a + /* block comment */ + "a": 1, // comment at right + + 'ig"n\"ore': [ + "node_m'odules", + "dist", + ], + + 'ignore': [ + "node_modules", + "dist", + ], + +} +// comment at bottom diff --git a/test/file/unsafe_quoted.json.dist b/test/file/unsafe_quoted.json.dist new file mode 100644 index 0000000..65351b8 --- /dev/null +++ b/test/file/unsafe_quoted.json.dist @@ -0,0 +1,11 @@ +{ + "a": 1, + "ig\"n\"ore": [ + "node_m'odules", + "dist" + ], + "ignore": [ + "node_modules", + "dist" + ] +} \ No newline at end of file diff --git a/test/file/unsafe_quoted2.json b/test/file/unsafe_quoted2.json new file mode 100644 index 0000000..1065efa --- /dev/null +++ b/test/file/unsafe_quoted2.json @@ -0,0 +1,21 @@ +/** + blah + */ +// comment at top +{ + // comment for a + /* block comment */ + "a": 1, // comment at right + + 'ig"n\"o\'r\'e': [ + "node_m'odules", + 'dist', + ], + + 'ignore': [ + "node_modules", + "dist", + ], + +} +// comment at bottom diff --git a/test/file/unsafe_quoted2.json.dist b/test/file/unsafe_quoted2.json.dist new file mode 100644 index 0000000..6173d1d --- /dev/null +++ b/test/file/unsafe_quoted2.json.dist @@ -0,0 +1,11 @@ +{ + "a": 1, + "ig\"n\"o'r'e": [ + "node_m'odules", + "dist" + ], + "ignore": [ + "node_modules", + "dist" + ] +} \ No newline at end of file diff --git a/test/json-parser.js b/test/json-parser.test.js similarity index 98% rename from test/json-parser.js rename to test/json-parser.test.js index b7db424..1b92885 100644 --- a/test/json-parser.js +++ b/test/json-parser.test.js @@ -1,7 +1,7 @@ 'use strict'; var expect = require('chai').expect; -var parser = require('../'); +var parser = require('../src'); // var a = parser.parse('//top\n{// top a\n/* abc */"a":1,//right\n/* bcd */"b":{"a":1}}//bottom'); // // var a = parser.parse('{/*top*/"a":1,//right\n/*abc*/"b":{"a":1}}'); diff --git a/test/unsafe_json.test.js b/test/unsafe_json.test.js new file mode 100644 index 0000000..4869e60 --- /dev/null +++ b/test/unsafe_json.test.js @@ -0,0 +1,56 @@ +/** + * Created by user on 2017/1/17. + */ + +'use strict'; + +var expect = require('chai').expect; +var parser = require('../src') + +const fs = require('fs'); + +const options = { + unsafe: true, + debug: true, + }; + +const dir = "./test/file/"; + +describe("parse(unsafe)", function () + { + let list = fs.readdirSync(dir) + +//let file = 'unsafe_object.json'; + + list.forEach(function (file) + { + if (file.match(/^unsafe_.+\.json$/)) + { + it(file, function (done) + { + let j = parser.parse(fs.readFileSync(dir + file).toString(), null, true, options); + let d = ''; + + try + { + d = JSON.parse(fs.readFileSync(dir + file + '.dist').toString()); + } + catch (e) + {} + + let o = JSON.stringify(j, null, "\t"); + + fs.writeFileSync(dir + '/../temp/' + file + '.dist', o) + + expect(JSON.parse(o)).to.deep.equal(d); + + done() + } + ) + } + } + ) + + } +) +