From 0005d7e1e72090d83c4de73d85475d08d2c34e14 Mon Sep 17 00:00:00 2001 From: shahafal Date: Sun, 23 Jul 2017 19:47:37 +0300 Subject: [PATCH 1/3] bbCodeParser written in type script hi, i found out about this javascript parser from this article: http://coursesweb.net/javascript/convert-bbcode-html-javascript_cs i needed this integrated with an Angular4 app i'm working on, so i converted it into TypeScript for easier integration. in case someone else might be searching for a bbcode parser written in type script - i thought this could be a good place to publish it at. --- bbCodeParser.ts | 158 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 bbCodeParser.ts diff --git a/bbCodeParser.ts b/bbCodeParser.ts new file mode 100644 index 0000000..dbbacfb --- /dev/null +++ b/bbCodeParser.ts @@ -0,0 +1,158 @@ +export class BBCodeParser { + token_match: any = /{[A-Z_]+[0-9]*}/ig; + + // regular expressions for the different bbcode tokens + tokens: any = { + URL: '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)))', + LINK: '([a-z0-9\-\./]+[^"\' ]*)', + TEXT: '(.*?)' + }; + + bbcode_matches: any[] = []; // matches for bbcode to html + + html_tpls: any[] = []; // html templates for html to bbcode + + html_matches: any[] = []; // matches for html to bbcode + + bbcode_tpls: any[] = []; // bbcode templates for bbcode to html + + constructor() { + this.addBBCode('[b]{TEXT}[/b]', '{TEXT}'); + this.addBBCode('[i]{TEXT}[/i]', '{TEXT}'); + this.addBBCode('[u]{TEXT}[/u]', '{TEXT}'); + this.addBBCode('[url={URL}]{TEXT}[/url]', '{TEXT}'); + this.addBBCode('[url]{URL}[/url]', '{URL}'); + this.addBBCode('[url={LINK}]{TEXT}[/url]', '{TEXT}'); + this.addBBCode('[url]{LINK}[/url]', '{LINK}'); + this.addBBCode('[img]{URL}[/img]', '{URL}'); + this.addBBCode('[img]{LINK}[/img]', '{LINK}'); + this.addBBCode('[quote]{TEXT}[/quote]', '
{TEXT}
'); + } + + /** + * Turns a bbcode into a regular rexpression by changing the tokens into + * their regex form + */ + private _getRegEx(str: string) { + var matches = str.match(this.token_match); + var nrmatches = matches.length; + var i = 0; + var replacement = ''; + + if (nrmatches <= 0) { + return new RegExp(this.preg_quote(str), 'g'); // no tokens so return the escaped string + } + + for (; i < nrmatches; i += 1) { + // Remove {, } and numbers from the token so it can match the + // keys in tokens + var token = matches[i].replace(/[{}0-9]/g, ''); + + if (this.tokens[token]) { + // Escape everything before the token + replacement += this.preg_quote(str.substr(0, str.indexOf(matches[i]))) + this.tokens[token]; + + // Remove everything before the end of the token so it can be used + // with the next token. Doing this so that parts can be escaped + str = str.substr(str.indexOf(matches[i]) + matches[i].length); + } + } + + replacement += this.preg_quote(str); // add whatever is left to the string + + return new RegExp(replacement, 'gi'); + }; + + /** + * Turns a bbcode template into the replacement form used in regular expressions + * by turning the tokens in $1, $2, etc. + */ + private _getTpls(str: string) { + var matches = str.match(this.token_match); + var nrmatches = matches.length; + var i = 0; + var replacement = ''; + var positions = {}; + var next_position = 0; + + if (nrmatches <= 0) { + return str; // no tokens so return the string + } + + for (; i < nrmatches; i += 1) { + // Remove {, } and numbers from the token so it can match the + // keys in tokens + var token = matches[i].replace(/[{}0-9]/g, ''); + var position; + + // figure out what $# to use ($1, $2) + if (positions[matches[i]]) { + position = positions[matches[i]]; // if the token already has a position then use that + } else { + // token doesn't have a position so increment the next position + // and record this token's position + next_position += 1; + position = next_position; + positions[matches[i]] = position; + } + + if (this.tokens[token]) { + replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position; + str = str.substr(str.indexOf(matches[i]) + matches[i].length); + } + } + + replacement += str; + + return replacement; + }; + + /** + * Adds a bbcode to the list + */ + private addBBCode(bbcode_match: any, bbcode_tpl: any) { + // add the regular expressions and templates for bbcode to html + this.bbcode_matches.push(this._getRegEx(bbcode_match)); + this.html_tpls.push(this._getTpls(bbcode_tpl)); + + // add the regular expressions and templates for html to bbcode + this.html_matches.push(this._getRegEx(bbcode_tpl)); + this.bbcode_tpls.push(this._getTpls(bbcode_match)); + }; + + /** + * Turns all of the added bbcodes into html + */ + public bbcodeToHtml(str: string) { + var nrbbcmatches = this.bbcode_matches.length; + var i = 0; + + for (; i < nrbbcmatches; i += 1) { + str = str.replace(this.bbcode_matches[i], this.html_tpls[i]); + } + + return str; + }; + + /** + * Turns html into bbcode + */ + public htmlToBBCode(str: string) { + var nrhtmlmatches = this.html_matches.length; + var i = 0; + + for (; i < nrhtmlmatches; i += 1) { + str = str.replace(this.html_matches[i], this.bbcode_tpls[i]); + } + + return str; + } + + /** + * Quote regular expression characters plus an optional character + * taken from phpjs.org + */ + private preg_quote(str: string, delimiter: string = null) { + return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&'); + } +} From cb1ab94132f1afcc898369f6267d6a8983d5c73c Mon Sep 17 00:00:00 2001 From: shahafal Date: Sun, 23 Jul 2017 21:16:54 +0300 Subject: [PATCH 2/3] text regex miss multilines; image regex timeouts 1) regex did not match multilines, had to add [\\S\\s] to TEXT token 2) image html regex was getting timeouts due to an extra " and the ending /, had to take them out --- bbCodeParser.ts | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/bbCodeParser.ts b/bbCodeParser.ts index dbbacfb..74a386f 100644 --- a/bbCodeParser.ts +++ b/bbCodeParser.ts @@ -1,6 +1,165 @@ export class BBCodeParser { token_match: any = /{[A-Z_]+[0-9]*}/ig; + // regular expressions for the different bbcode tokens + tokens: any = { + URL: '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)))', + LINK: '([a-z0-9\-\./]+[^"\' ]*)', + TEXT: '([\\S\\s]*?)' + }; + + bbcode_matches: any[] = []; // matches for bbcode to html + + html_tpls: any[] = []; // html templates for html to bbcode + + html_matches: any[] = []; // matches for html to bbcode + + bbcode_tpls: any[] = []; // bbcode templates for bbcode to html + + constructor() { + this.addBBCode('\n', '
'); + this.addBBCode('[b]{TEXT}[/b]', '{TEXT}'); + this.addBBCode('[i]{TEXT}[/i]', '{TEXT}'); + this.addBBCode('[u]{TEXT}[/u]', '{TEXT}'); + this.addBBCode('[code]{TEXT}[/code]', '
{TEXT}
'); + this.addBBCode('[url={URL}]{TEXT}[/url]', '{TEXT}'); + this.addBBCode('[url]{URL}[/url]', '{URL}'); + this.addBBCode('[url={LINK}]{TEXT}[/url]', '{TEXT}'); + this.addBBCode('[url]{LINK}[/url]', '{LINK}'); + this.addBBCode('[img]{URL}[/img]', ''); + this.addBBCode('[img]{LINK}[/img]', ''); + this.addBBCode('[quote]{TEXT}[/quote]', '
{TEXT}
'); + } + + /** + * Turns a bbcode into a regular rexpression by changing the tokens into + * their regex form + */ + private _getRegEx(str: string) { + var matches = str.match(this.token_match); + var nrmatches = matches === null ? 0 : matches.length; + var i = 0; + var replacement = ''; + + if (nrmatches <= 0) { + return new RegExp(this.preg_quote(str), 'g'); // no tokens so return the escaped string + } + + for (; i < nrmatches; i += 1) { + // Remove {, } and numbers from the token so it can match the + // keys in tokens + var token = matches[i].replace(/[{}0-9]/g, ''); + + if (this.tokens[token]) { + // Escape everything before the token + replacement += this.preg_quote(str.substr(0, str.indexOf(matches[i]))) + this.tokens[token]; + + // Remove everything before the end of the token so it can be used + // with the next token. Doing this so that parts can be escaped + str = str.substr(str.indexOf(matches[i]) + matches[i].length); + } + } + + replacement += this.preg_quote(str); // add whatever is left to the string + + return new RegExp(replacement, 'gi'); + }; + + /** + * Turns a bbcode template into the replacement form used in regular expressions + * by turning the tokens in $1, $2, etc. + */ + private _getTpls(str: string) { + var matches = str.match(this.token_match); + var nrmatches = matches === null ? 0 : matches.length; + var i = 0; + var replacement = ''; + var positions = {}; + var next_position = 0; + + if (nrmatches <= 0) { + return str; // no tokens so return the string + } + + for (; i < nrmatches; i += 1) { + // Remove {, } and numbers from the token so it can match the + // keys in tokens + var token = matches[i].replace(/[{}0-9]/g, ''); + var position; + + // figure out what $# to use ($1, $2) + if (positions[matches[i]]) { + position = positions[matches[i]]; // if the token already has a position then use that + } else { + // token doesn't have a position so increment the next position + // and record this token's position + next_position += 1; + position = next_position; + positions[matches[i]] = position; + } + + if (this.tokens[token]) { + replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position; + str = str.substr(str.indexOf(matches[i]) + matches[i].length); + } + } + + replacement += str; + + return replacement; + }; + + /** + * Adds a bbcode to the list + */ + private addBBCode(bbcode_match: any, bbcode_tpl: any) { + // add the regular expressions and templates for bbcode to html + this.bbcode_matches.push(this._getRegEx(bbcode_match)); + this.html_tpls.push(this._getTpls(bbcode_tpl)); + + // add the regular expressions and templates for html to bbcode + this.html_matches.push(this._getRegEx(bbcode_tpl)); + this.bbcode_tpls.push(this._getTpls(bbcode_match)); + }; + + /** + * Turns all of the added bbcodes into html + */ + public bbcodeToHtml(str: string) { + var nrbbcmatches = this.bbcode_matches.length; + var i = 0; + + for (; i < nrbbcmatches; i += 1) { + str = str.replace(this.bbcode_matches[i], this.html_tpls[i]); + } + + return str; + }; + + /** + * Turns html into bbcode + */ + public htmlToBBCode(str: string) { + var nrhtmlmatches = this.html_matches.length; + var i = 0; + + for (; i < nrhtmlmatches; i += 1) { + str = str.replace(this.html_matches[i], this.bbcode_tpls[i]); + } + + return str; + } + + /** + * Quote regular expression characters plus an optional character + * taken from phpjs.org + */ + private preg_quote(str: string, delimiter: string = null) { + return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&'); + } +} + token_match: any = /{[A-Z_]+[0-9]*}/ig; + // regular expressions for the different bbcode tokens tokens: any = { URL: '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)))', From 75a80a96147a9d83943ce9768ca0b085a3dc904b Mon Sep 17 00:00:00 2001 From: shahafal Date: Sun, 23 Jul 2017 21:18:47 +0300 Subject: [PATCH 3/3] remove duplicate lines file duplicated in last commit --- bbCodeParser.ts | 157 ------------------------------------------------ 1 file changed, 157 deletions(-) diff --git a/bbCodeParser.ts b/bbCodeParser.ts index 74a386f..dd0a656 100644 --- a/bbCodeParser.ts +++ b/bbCodeParser.ts @@ -158,160 +158,3 @@ export class BBCodeParser { return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&'); } } - token_match: any = /{[A-Z_]+[0-9]*}/ig; - - // regular expressions for the different bbcode tokens - tokens: any = { - URL: '((?:(?:[a-z][a-z\\d+\\-.]*:\\/{2}(?:(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+|[0-9.]+|\\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\\])(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)|(?:www\\.(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})+(?::\\d*)?(?:\\/(?:[a-z0-9\\-._~\\!$&\'*+,;=:@|]+|%[\\dA-F]{2})*)*(?:\\?(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?(?:#(?:[a-z0-9\\-._~\\!$&\'*+,;=:@\\/?|]+|%[\\dA-F]{2})*)?)))', - LINK: '([a-z0-9\-\./]+[^"\' ]*)', - TEXT: '(.*?)' - }; - - bbcode_matches: any[] = []; // matches for bbcode to html - - html_tpls: any[] = []; // html templates for html to bbcode - - html_matches: any[] = []; // matches for html to bbcode - - bbcode_tpls: any[] = []; // bbcode templates for bbcode to html - - constructor() { - this.addBBCode('[b]{TEXT}[/b]', '{TEXT}'); - this.addBBCode('[i]{TEXT}[/i]', '{TEXT}'); - this.addBBCode('[u]{TEXT}[/u]', '{TEXT}'); - this.addBBCode('[url={URL}]{TEXT}[/url]', '{TEXT}'); - this.addBBCode('[url]{URL}[/url]', '{URL}'); - this.addBBCode('[url={LINK}]{TEXT}[/url]', '{TEXT}'); - this.addBBCode('[url]{LINK}[/url]', '{LINK}'); - this.addBBCode('[img]{URL}[/img]', '{URL}'); - this.addBBCode('[img]{LINK}[/img]', '{LINK}'); - this.addBBCode('[quote]{TEXT}[/quote]', '
{TEXT}
'); - } - - /** - * Turns a bbcode into a regular rexpression by changing the tokens into - * their regex form - */ - private _getRegEx(str: string) { - var matches = str.match(this.token_match); - var nrmatches = matches.length; - var i = 0; - var replacement = ''; - - if (nrmatches <= 0) { - return new RegExp(this.preg_quote(str), 'g'); // no tokens so return the escaped string - } - - for (; i < nrmatches; i += 1) { - // Remove {, } and numbers from the token so it can match the - // keys in tokens - var token = matches[i].replace(/[{}0-9]/g, ''); - - if (this.tokens[token]) { - // Escape everything before the token - replacement += this.preg_quote(str.substr(0, str.indexOf(matches[i]))) + this.tokens[token]; - - // Remove everything before the end of the token so it can be used - // with the next token. Doing this so that parts can be escaped - str = str.substr(str.indexOf(matches[i]) + matches[i].length); - } - } - - replacement += this.preg_quote(str); // add whatever is left to the string - - return new RegExp(replacement, 'gi'); - }; - - /** - * Turns a bbcode template into the replacement form used in regular expressions - * by turning the tokens in $1, $2, etc. - */ - private _getTpls(str: string) { - var matches = str.match(this.token_match); - var nrmatches = matches.length; - var i = 0; - var replacement = ''; - var positions = {}; - var next_position = 0; - - if (nrmatches <= 0) { - return str; // no tokens so return the string - } - - for (; i < nrmatches; i += 1) { - // Remove {, } and numbers from the token so it can match the - // keys in tokens - var token = matches[i].replace(/[{}0-9]/g, ''); - var position; - - // figure out what $# to use ($1, $2) - if (positions[matches[i]]) { - position = positions[matches[i]]; // if the token already has a position then use that - } else { - // token doesn't have a position so increment the next position - // and record this token's position - next_position += 1; - position = next_position; - positions[matches[i]] = position; - } - - if (this.tokens[token]) { - replacement += str.substr(0, str.indexOf(matches[i])) + '$' + position; - str = str.substr(str.indexOf(matches[i]) + matches[i].length); - } - } - - replacement += str; - - return replacement; - }; - - /** - * Adds a bbcode to the list - */ - private addBBCode(bbcode_match: any, bbcode_tpl: any) { - // add the regular expressions and templates for bbcode to html - this.bbcode_matches.push(this._getRegEx(bbcode_match)); - this.html_tpls.push(this._getTpls(bbcode_tpl)); - - // add the regular expressions and templates for html to bbcode - this.html_matches.push(this._getRegEx(bbcode_tpl)); - this.bbcode_tpls.push(this._getTpls(bbcode_match)); - }; - - /** - * Turns all of the added bbcodes into html - */ - public bbcodeToHtml(str: string) { - var nrbbcmatches = this.bbcode_matches.length; - var i = 0; - - for (; i < nrbbcmatches; i += 1) { - str = str.replace(this.bbcode_matches[i], this.html_tpls[i]); - } - - return str; - }; - - /** - * Turns html into bbcode - */ - public htmlToBBCode(str: string) { - var nrhtmlmatches = this.html_matches.length; - var i = 0; - - for (; i < nrhtmlmatches; i += 1) { - str = str.replace(this.html_matches[i], this.bbcode_tpls[i]); - } - - return str; - } - - /** - * Quote regular expression characters plus an optional character - * taken from phpjs.org - */ - private preg_quote(str: string, delimiter: string = null) { - return (str + '').replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&'); - } -}