diff --git a/package-lock.json b/package-lock.json index 774b6c6..d16194c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,7 +22,7 @@ "@babel/preset-env": "^7.23.7", "eslint": "^8.28.0", "jest-cli": "^29.3.1", - "prettier": "^1.11.1", + "prettier": "^3.0.0", "shelljs": "^0.8.5" } }, @@ -7138,15 +7138,19 @@ } }, "node_modules/prettier": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-1.12.1.tgz", - "integrity": "sha1-wa0g6APndJ+vkFpAnSNn4Gu+cyU=", + "version": "3.7.4", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.7.4.tgz", + "integrity": "sha512-v6UNi1+3hSlVvv8fSaoUbggEM5VErKmmpGA7Pl3HF8V6uKY7rvClBOJlH6yNwQtfTueNkGVpOv/mtWL9L4bgRA==", "dev": true, + "license": "MIT", "bin": { - "prettier": "bin-prettier.js" + "prettier": "bin/prettier.cjs" }, "engines": { - "node": ">=4" + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" } }, "node_modules/pretty-format": { diff --git a/package.json b/package.json index a71a0a5..67b50fc 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,6 @@ "shelljs": "^0.8.5", "jest-cli": "^29.3.1", "eslint": "^8.28.0", - "prettier": "^1.11.1" + "prettier": "^3.0.0" } } diff --git a/src/__tests__/sets-generator-test.js b/src/__tests__/sets-generator-test.js index c8cfb63..535049a 100644 --- a/src/__tests__/sets-generator-test.js +++ b/src/__tests__/sets-generator-test.js @@ -85,6 +85,34 @@ describe('sets-generator', () => { }); }); + it('self-epsilon', () => { + const setsGenerator = new SetsGenerator({ + grammar: Grammar.fromString(` + %% + S : 'a' | B 'c'; + B : B 'b' | /* empty */; + `), + }); + + expect(setsGenerator.firstOf(new GrammarSymbol('S'))) + // No ε from B, since 'c' stops the sets. + .toEqual({"'a'": true, "'b'": true, "'c'": true}); + + expect(setsGenerator.firstOf(new GrammarSymbol(`B`))).toEqual({ + // ε is dervied from #2 RHS, B -> 'b' + "'b'": true, + ε: true, + }); + + expect(setsGenerator.firstOf(new GrammarSymbol(`'a'`))).toEqual({ + "'a'": true, + }); + + expect(setsGenerator.firstOf(new GrammarSymbol(`'b'`))).toEqual({ + "'b'": true, + }); + }); + it('RHS', () => { const grammar = Grammar.fromString(` %% diff --git a/src/grammar/grammar-symbol.js b/src/grammar/grammar-symbol.js index bb772c7..fbc07d3 100644 --- a/src/grammar/grammar-symbol.js +++ b/src/grammar/grammar-symbol.js @@ -34,7 +34,7 @@ export default class GrammarSymbol { /** * Returns original symbol from an extended name. 1X3 => X */ - getOrignialSymbol() { + getOriginalSymbol() { if (!this._originalSymbol) { this._originalSymbol = this._symbol .replace(/^\d+\|/, '') diff --git a/src/grammar/grammar.js b/src/grammar/grammar.js index dc396c8..10f84a7 100644 --- a/src/grammar/grammar.js +++ b/src/grammar/grammar.js @@ -10,6 +10,7 @@ import LexGrammar from './lex-grammar'; import LexRule from './lex-rule'; import LexParser from '../generated/lex-parser.gen.js'; import Production from './production'; +import {EPSILON} from '../special-symbols.js'; import colors from 'colors'; import fs from 'fs'; @@ -168,7 +169,7 @@ export default class Grammar { * for the specific options. */ static fromGrammarFile(grammarFile, options = {}, grammarType = 'bnf') { - const grammarData = Grammar.dataFromGrammarFile(grammarFile, { grammarType }); + const grammarData = Grammar.dataFromGrammarFile(grammarFile, {grammarType}); return Grammar.fromData(grammarData, options); } @@ -176,7 +177,10 @@ export default class Grammar { * Reads grammar file data. Supports reading `bnf`, * and `lex` grammars based on mode. */ - static dataFromGrammarFile(grammarFile, { grammarType = 'bnf', useLocation = false }) { + static dataFromGrammarFile( + grammarFile, + {grammarType = 'bnf', useLocation = false} + ) { const grammar = fs.readFileSync(grammarFile, 'utf8'); // check if the bnf grammar contains location capture characters @@ -195,10 +199,12 @@ export default class Grammar { .replace(/%{[\n\s\S]*?%}/g, ''); if (/@\w+/.test(bnf)) { - console.info(colors.red( - 'The grammar file contains location capture characters (@), which require the ' + - '"--loc" option, but it has not been provided. The generated parser will throw an error.' - )); + console.info( + colors.red( + 'The grammar file contains location capture characters (@), which require the ' + + '"--loc" option, but it has not been provided. The generated parser will throw an error.' + ) + ); } } @@ -357,8 +363,8 @@ export default class Grammar { this._terminalsMap = {}; - this._bnf.forEach(production => { - production.getRHS().forEach(symbol => { + this._bnf.forEach((production) => { + production.getRHS().forEach((symbol) => { if ( symbol.isTerminal() && !this._terminalsMap.hasOwnProperty(symbol.getSymbol()) @@ -378,7 +384,7 @@ export default class Grammar { */ getTerminalSymbols() { if (!this._terminalSymbols) { - this._terminalSymbols = this.getTerminals().map(symbol => + this._terminalSymbols = this.getTerminals().map((symbol) => symbol.getSymbol() ); } @@ -391,21 +397,29 @@ export default class Grammar { getNonTerminals() { if (!this._nonTerminals) { this._nonTerminals = []; - this._nonTerminalsMap = {}; - - this._bnf.forEach(production => { + this._bnf.forEach((production) => { if (production.isAugmented()) { return; } let nonTerminal = production.getLHS(); + // Function Helper + const isEpsilon = (production) => + production._RHS.length === 1 && + production._RHS[0].getSymbol() === EPSILON; + // Mark Non-Terminal And Check If It Contains Direct Epsilon RHS if (!this._nonTerminalsMap.hasOwnProperty(nonTerminal.getSymbol())) { - this._nonTerminalsMap[nonTerminal.getSymbol()] = true; + this._nonTerminalsMap[nonTerminal.getSymbol()] = { + hasDirectEpsilon: isEpsilon(production), + }; this._nonTerminals.push(nonTerminal); + } else if (isEpsilon(production)) { + this._nonTerminalsMap[nonTerminal.getSymbol()] = { + hasDirectEpsilon: true, + }; } }); } - return this._nonTerminals; } @@ -414,7 +428,7 @@ export default class Grammar { */ getNonTerminalSymbols() { if (!this._nonTerminalSymbols) { - this._nonTerminalSymbols = this.getNonTerminals().map(symbol => + this._nonTerminalSymbols = this.getNonTerminals().map((symbol) => symbol.getSymbol() ); } @@ -431,11 +445,11 @@ export default class Grammar { this._tokensMap = {}; - this._bnf.forEach(production => { + this._bnf.forEach((production) => { if (production.isAugmented() || production.isEpsilon()) { return; } - production.getRHS().forEach(symbol => { + production.getRHS().forEach((symbol) => { let rawSymbol = symbol.getSymbol(); if ( !symbol.isTerminal() && @@ -457,7 +471,7 @@ export default class Grammar { */ getTokenSymbols() { if (!this._tokenSymbols) { - this._tokenSymbols = this.getTokens().map(symbol => symbol.getSymbol()); + this._tokenSymbols = this.getTokens().map((symbol) => symbol.getSymbol()); } return this._tokenSymbols; } @@ -474,7 +488,7 @@ export default class Grammar { */ getProductionsForSymbol(symbol) { if (!this._productionsForSymbol.hasOwnProperty(symbol)) { - this._productionsForSymbol[symbol] = this._bnf.filter(production => { + this._productionsForSymbol[symbol] = this._bnf.filter((production) => { return production.getLHS().isSymbol(symbol); }); } @@ -486,7 +500,7 @@ export default class Grammar { */ getProductionsWithSymbol(symbol) { if (!this._productionsWithSymbol.hasOwnProperty(symbol)) { - this._productionsWithSymbol[symbol] = this._bnf.filter(production => { + this._productionsWithSymbol[symbol] = this._bnf.filter((production) => { return production.getRHSSymbolsMap().hasOwnProperty(symbol); }); } @@ -549,7 +563,7 @@ export default class Grammar { let productions = this.getProductions(); let numberPad = productions.length.toString().length; - productions.forEach(production => { + productions.forEach((production) => { let productionOutput = `${pad}${this._padLeft(production.getNumber(), numberPad)}. ` + production.toString(); @@ -574,7 +588,7 @@ export default class Grammar { if (operators) { operators.forEach((opData, i) => { - opData.slice(1).forEach(op => { + opData.slice(1).forEach((op) => { processedOperators[op] = { precedence: i + 1, assoc: opData[0], @@ -590,7 +604,7 @@ export default class Grammar { * Generates data arrays for lex rules inferred from terminals. */ _generateLexRulesDataForTerminals() { - return this.getTerminals().map(terminal => [ + return this.getTerminals().map((terminal) => [ LexRule.matcherFromTerminal(terminal.getSymbol()), // matcher `return ${terminal.quotedTerminal()}`, // token handler ]); @@ -624,7 +638,7 @@ export default class Grammar { this._tokensMap = {}; return Array.isArray(tokens) - ? tokens.map(token => { + ? tokens.map((token) => { this._tokensMap[token] = true; return GrammarSymbol.get(token); }) @@ -658,7 +672,7 @@ export default class Grammar { processedBnf[0] = augmentedProduction; } - nonTerminals.forEach(LHS => { + nonTerminals.forEach((LHS) => { originalBnf[LHS].forEach((RHS, k) => { let semanticAction = null; let precedence = null; diff --git a/src/lr/canonical-collection.js b/src/lr/canonical-collection.js index 7e94c4b..de23f76 100644 --- a/src/lr/canonical-collection.js +++ b/src/lr/canonical-collection.js @@ -156,7 +156,7 @@ export default class CanonicalCollection { const LHS = production.getLHS(); const RHS = production.getRHS(); const lastSymbol = RHS[RHS.length - 1]; - const originalLHS = LHS.getOrignialSymbol(); + const originalLHS = LHS.getOriginalSymbol(); const finalSet = lastSymbol.getEndContext(); if (!this._groupedFinalSets.hasOwnProperty(finalSet)) { diff --git a/src/sets-generator.js b/src/sets-generator.js index aa26610..8f9f871 100644 --- a/src/sets-generator.js +++ b/src/sets-generator.js @@ -84,7 +84,7 @@ export default class SetsGenerator { let productionsForSymbol = this._grammar.getProductionsForSymbol(symbol); - productionsForSymbol.forEach(production => { + productionsForSymbol.forEach((production) => { let RHS = production.getRHS(); this._mergeSets(firstSet, this.firstOfRHS(RHS)); }); @@ -97,8 +97,8 @@ export default class SetsGenerator { */ firstOfRHS(RHS) { let firstSet = {}; - - for (let i = 0; i < RHS.length; i++) { + let i = 0; + for (; i < RHS.length; i++) { let productionSymbol = RHS[i]; // Direct epsilon goes to the First set. @@ -114,17 +114,25 @@ export default class SetsGenerator { // excluding the EPSILON. this._mergeSets(firstSet, firstOfCurrent, EXCLUDE_EPSILON); - // And if there was no EPSILON, we're done (otherwise, we - // don't break the loop, and proceed to the next symbol of the RHS. - if (!firstOfCurrent.hasOwnProperty(EPSILON)) { - break; - } + const nonTerminal = + this._grammar._nonTerminalsMap[productionSymbol.getSymbol()]; - // If all symbols on RHS are eliminated, or the last - // symbol contains EPSILON, add it to the set. - else if (i === RHS.length - 1) { - firstSet[EPSILON] = true; + // And if there was EPSILON, don't break the loop + // proceed to the next symbol of the RHS (otherwise, we + // are done). + if ( + firstOfCurrent.hasOwnProperty(EPSILON) || + (nonTerminal && nonTerminal.hasDirectEpsilon) + ) { + continue; } + + break; + } + // If all symbols on RHS are eliminated, or the last + // symbol contains EPSILON, add it to the set. + if (i === RHS.length) { + firstSet[EPSILON] = true; } return firstSet; @@ -173,7 +181,7 @@ export default class SetsGenerator { // symbol is used (i.e. where it appears on RHS). let productionsWithSymbol = this._grammar.getProductionsWithSymbol(symbol); - productionsWithSymbol.forEach(production => { + productionsWithSymbol.forEach((production) => { let RHS = production.getRHSSymbols(); let symbolIndex; @@ -229,7 +237,7 @@ export default class SetsGenerator { this._predictSets = {}; debug.time('Building Predict sets'); - this._grammar.getProductions().forEach(production => { + this._grammar.getProductions().forEach((production) => { let LHS = production.getLHS(); let RHS = production.getRHS(); @@ -303,7 +311,7 @@ export default class SetsGenerator { * Builds a set based on the `builder` function. */ _buildSet(builder) { - this._grammar.getProductions().forEach(production => { + this._grammar.getProductions().forEach((production) => { builder.call(this, production.getLHS()); }); }