From 128daed53d7e6317b457eed6d3aec438eb3f73db Mon Sep 17 00:00:00 2001 From: MammarDr <140546722+MammarDr@users.noreply.github.com> Date: Sat, 20 Dec 2025 18:07:36 +0100 Subject: [PATCH 1/3] Fix Issue #138 --- src/__tests__/sets-generator-test.js | 28 ++++++++++++++++++++++++++++ src/grammar/grammar-symbol.js | 2 +- src/grammar/grammar.js | 20 ++++++++++++++------ src/lr/canonical-collection.js | 2 +- src/sets-generator.js | 22 ++++++++++++---------- 5 files changed, 56 insertions(+), 18 deletions(-) diff --git a/src/__tests__/sets-generator-test.js b/src/__tests__/sets-generator-test.js index c8cfb63..535049a 100644 --- a/src/__tests__/sets-generator-test.js +++ b/src/__tests__/sets-generator-test.js @@ -85,6 +85,34 @@ describe('sets-generator', () => { }); }); + it('self-epsilon', () => { + const setsGenerator = new SetsGenerator({ + grammar: Grammar.fromString(` + %% + S : 'a' | B 'c'; + B : B 'b' | /* empty */; + `), + }); + + expect(setsGenerator.firstOf(new GrammarSymbol('S'))) + // No ε from B, since 'c' stops the sets. + .toEqual({"'a'": true, "'b'": true, "'c'": true}); + + expect(setsGenerator.firstOf(new GrammarSymbol(`B`))).toEqual({ + // ε is dervied from #2 RHS, B -> 'b' + "'b'": true, + ε: true, + }); + + expect(setsGenerator.firstOf(new GrammarSymbol(`'a'`))).toEqual({ + "'a'": true, + }); + + expect(setsGenerator.firstOf(new GrammarSymbol(`'b'`))).toEqual({ + "'b'": true, + }); + }); + it('RHS', () => { const grammar = Grammar.fromString(` %% diff --git a/src/grammar/grammar-symbol.js b/src/grammar/grammar-symbol.js index bb772c7..fbc07d3 100644 --- a/src/grammar/grammar-symbol.js +++ b/src/grammar/grammar-symbol.js @@ -34,7 +34,7 @@ export default class GrammarSymbol { /** * Returns original symbol from an extended name. 1X3 => X */ - getOrignialSymbol() { + getOriginalSymbol() { if (!this._originalSymbol) { this._originalSymbol = this._symbol .replace(/^\d+\|/, '') diff --git a/src/grammar/grammar.js b/src/grammar/grammar.js index dc396c8..c5dbaff 100644 --- a/src/grammar/grammar.js +++ b/src/grammar/grammar.js @@ -10,6 +10,7 @@ import LexGrammar from './lex-grammar'; import LexRule from './lex-rule'; import LexParser from '../generated/lex-parser.gen.js'; import Production from './production'; +import { EPSILON } from '../special-symbols.js'; import colors from 'colors'; import fs from 'fs'; @@ -391,18 +392,25 @@ export default class Grammar { getNonTerminals() { if (!this._nonTerminals) { this._nonTerminals = []; - + this._nonTerminalsMap = {}; - + this._bnf.forEach(production => { - if (production.isAugmented()) { + if (production.isAugmented()) return; - } + let nonTerminal = production.getLHS(); + + // Function Helper + const isEpsilon = (production) => production._RHS.length === 1 && production._RHS[0].getSymbol() === EPSILON; + + // Mark Non-Terminal And Check If It Contains Direct Epsilon RHS if (!this._nonTerminalsMap.hasOwnProperty(nonTerminal.getSymbol())) { - this._nonTerminalsMap[nonTerminal.getSymbol()] = true; + this._nonTerminalsMap[nonTerminal.getSymbol()] = {hasDirectEpsilon: isEpsilon(production)}; this._nonTerminals.push(nonTerminal); - } + } else + if(isEpsilon(production)) + this._nonTerminalsMap[nonTerminal.getSymbol()] = {hasDirectEpsilon: true}; }); } diff --git a/src/lr/canonical-collection.js b/src/lr/canonical-collection.js index 7e94c4b..de23f76 100644 --- a/src/lr/canonical-collection.js +++ b/src/lr/canonical-collection.js @@ -156,7 +156,7 @@ export default class CanonicalCollection { const LHS = production.getLHS(); const RHS = production.getRHS(); const lastSymbol = RHS[RHS.length - 1]; - const originalLHS = LHS.getOrignialSymbol(); + const originalLHS = LHS.getOriginalSymbol(); const finalSet = lastSymbol.getEndContext(); if (!this._groupedFinalSets.hasOwnProperty(finalSet)) { diff --git a/src/sets-generator.js b/src/sets-generator.js index aa26610..3fc608b 100644 --- a/src/sets-generator.js +++ b/src/sets-generator.js @@ -98,9 +98,10 @@ export default class SetsGenerator { firstOfRHS(RHS) { let firstSet = {}; - for (let i = 0; i < RHS.length; i++) { - let productionSymbol = RHS[i]; - + var i = 0; + for (; i < RHS.length; i++) { + var productionSymbol = RHS[i]; + // Direct epsilon goes to the First set. if (productionSymbol.isEpsilon()) { firstSet[EPSILON] = true; @@ -108,24 +109,25 @@ export default class SetsGenerator { } // Calculate First of current symbol on RHS. - let firstOfCurrent = this.firstOf(productionSymbol); + var firstOfCurrent = this.firstOf(productionSymbol); // Put the First set of this non-terminal in our set, // excluding the EPSILON. this._mergeSets(firstSet, firstOfCurrent, EXCLUDE_EPSILON); + + const hasEpsilon = this._grammar._nonTerminalsMap[productionSymbol.getSymbol()]?.hasDirectEpsilon; // And if there was no EPSILON, we're done (otherwise, we // don't break the loop, and proceed to the next symbol of the RHS. - if (!firstOfCurrent.hasOwnProperty(EPSILON)) { + if (!firstOfCurrent.hasOwnProperty(EPSILON) && !hasEpsilon) { break; } - // If all symbols on RHS are eliminated, or the last - // symbol contains EPSILON, add it to the set. - else if (i === RHS.length - 1) { - firstSet[EPSILON] = true; - } } + // If all symbols on RHS are eliminated, or the last + // symbol contains EPSILON, add it to the set. + if(i === RHS.length) + firstSet[EPSILON] = true; return firstSet; } From 3b6d4fce38762dff9c867facd42f9af7d33ea058 Mon Sep 17 00:00:00 2001 From: MammarDr <140546722+MammarDr@users.noreply.github.com> Date: Mon, 22 Dec 2025 22:01:33 +0100 Subject: [PATCH 2/3] Following review feedback --- package-lock.json | 16 +++++---- package.json | 2 +- src/grammar/grammar.js | 74 +++++++++++++++++++++++------------------- src/sets-generator.js | 27 +++++++-------- 4 files changed, 65 insertions(+), 54 deletions(-) diff --git a/package-lock.json b/package-lock.json index 774b6c6..d16194c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -22,7 +22,7 @@ "@babel/preset-env": "^7.23.7", "eslint": "^8.28.0", "jest-cli": "^29.3.1", - "prettier": "^1.11.1", + "prettier": "^3.0.0", "shelljs": "^0.8.5" } }, @@ -7138,15 +7138,19 @@ } }, "node_modules/prettier": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/prettier/-/prettier-1.12.1.tgz", - "integrity": "sha1-wa0g6APndJ+vkFpAnSNn4Gu+cyU=", + "version": "3.7.4", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.7.4.tgz", + "integrity": "sha512-v6UNi1+3hSlVvv8fSaoUbggEM5VErKmmpGA7Pl3HF8V6uKY7rvClBOJlH6yNwQtfTueNkGVpOv/mtWL9L4bgRA==", "dev": true, + "license": "MIT", "bin": { - "prettier": "bin-prettier.js" + "prettier": "bin/prettier.cjs" }, "engines": { - "node": ">=4" + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" } }, "node_modules/pretty-format": { diff --git a/package.json b/package.json index a71a0a5..67b50fc 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,6 @@ "shelljs": "^0.8.5", "jest-cli": "^29.3.1", "eslint": "^8.28.0", - "prettier": "^1.11.1" + "prettier": "^3.0.0" } } diff --git a/src/grammar/grammar.js b/src/grammar/grammar.js index c5dbaff..10f84a7 100644 --- a/src/grammar/grammar.js +++ b/src/grammar/grammar.js @@ -10,7 +10,7 @@ import LexGrammar from './lex-grammar'; import LexRule from './lex-rule'; import LexParser from '../generated/lex-parser.gen.js'; import Production from './production'; -import { EPSILON } from '../special-symbols.js'; +import {EPSILON} from '../special-symbols.js'; import colors from 'colors'; import fs from 'fs'; @@ -169,7 +169,7 @@ export default class Grammar { * for the specific options. */ static fromGrammarFile(grammarFile, options = {}, grammarType = 'bnf') { - const grammarData = Grammar.dataFromGrammarFile(grammarFile, { grammarType }); + const grammarData = Grammar.dataFromGrammarFile(grammarFile, {grammarType}); return Grammar.fromData(grammarData, options); } @@ -177,7 +177,10 @@ export default class Grammar { * Reads grammar file data. Supports reading `bnf`, * and `lex` grammars based on mode. */ - static dataFromGrammarFile(grammarFile, { grammarType = 'bnf', useLocation = false }) { + static dataFromGrammarFile( + grammarFile, + {grammarType = 'bnf', useLocation = false} + ) { const grammar = fs.readFileSync(grammarFile, 'utf8'); // check if the bnf grammar contains location capture characters @@ -196,10 +199,12 @@ export default class Grammar { .replace(/%{[\n\s\S]*?%}/g, ''); if (/@\w+/.test(bnf)) { - console.info(colors.red( - 'The grammar file contains location capture characters (@), which require the ' + - '"--loc" option, but it has not been provided. The generated parser will throw an error.' - )); + console.info( + colors.red( + 'The grammar file contains location capture characters (@), which require the ' + + '"--loc" option, but it has not been provided. The generated parser will throw an error.' + ) + ); } } @@ -358,8 +363,8 @@ export default class Grammar { this._terminalsMap = {}; - this._bnf.forEach(production => { - production.getRHS().forEach(symbol => { + this._bnf.forEach((production) => { + production.getRHS().forEach((symbol) => { if ( symbol.isTerminal() && !this._terminalsMap.hasOwnProperty(symbol.getSymbol()) @@ -379,7 +384,7 @@ export default class Grammar { */ getTerminalSymbols() { if (!this._terminalSymbols) { - this._terminalSymbols = this.getTerminals().map(symbol => + this._terminalSymbols = this.getTerminals().map((symbol) => symbol.getSymbol() ); } @@ -392,28 +397,29 @@ export default class Grammar { getNonTerminals() { if (!this._nonTerminals) { this._nonTerminals = []; - this._nonTerminalsMap = {}; - - this._bnf.forEach(production => { - if (production.isAugmented()) + this._bnf.forEach((production) => { + if (production.isAugmented()) { return; - + } let nonTerminal = production.getLHS(); - // Function Helper - const isEpsilon = (production) => production._RHS.length === 1 && production._RHS[0].getSymbol() === EPSILON; - + const isEpsilon = (production) => + production._RHS.length === 1 && + production._RHS[0].getSymbol() === EPSILON; // Mark Non-Terminal And Check If It Contains Direct Epsilon RHS if (!this._nonTerminalsMap.hasOwnProperty(nonTerminal.getSymbol())) { - this._nonTerminalsMap[nonTerminal.getSymbol()] = {hasDirectEpsilon: isEpsilon(production)}; + this._nonTerminalsMap[nonTerminal.getSymbol()] = { + hasDirectEpsilon: isEpsilon(production), + }; this._nonTerminals.push(nonTerminal); - } else - if(isEpsilon(production)) - this._nonTerminalsMap[nonTerminal.getSymbol()] = {hasDirectEpsilon: true}; + } else if (isEpsilon(production)) { + this._nonTerminalsMap[nonTerminal.getSymbol()] = { + hasDirectEpsilon: true, + }; + } }); } - return this._nonTerminals; } @@ -422,7 +428,7 @@ export default class Grammar { */ getNonTerminalSymbols() { if (!this._nonTerminalSymbols) { - this._nonTerminalSymbols = this.getNonTerminals().map(symbol => + this._nonTerminalSymbols = this.getNonTerminals().map((symbol) => symbol.getSymbol() ); } @@ -439,11 +445,11 @@ export default class Grammar { this._tokensMap = {}; - this._bnf.forEach(production => { + this._bnf.forEach((production) => { if (production.isAugmented() || production.isEpsilon()) { return; } - production.getRHS().forEach(symbol => { + production.getRHS().forEach((symbol) => { let rawSymbol = symbol.getSymbol(); if ( !symbol.isTerminal() && @@ -465,7 +471,7 @@ export default class Grammar { */ getTokenSymbols() { if (!this._tokenSymbols) { - this._tokenSymbols = this.getTokens().map(symbol => symbol.getSymbol()); + this._tokenSymbols = this.getTokens().map((symbol) => symbol.getSymbol()); } return this._tokenSymbols; } @@ -482,7 +488,7 @@ export default class Grammar { */ getProductionsForSymbol(symbol) { if (!this._productionsForSymbol.hasOwnProperty(symbol)) { - this._productionsForSymbol[symbol] = this._bnf.filter(production => { + this._productionsForSymbol[symbol] = this._bnf.filter((production) => { return production.getLHS().isSymbol(symbol); }); } @@ -494,7 +500,7 @@ export default class Grammar { */ getProductionsWithSymbol(symbol) { if (!this._productionsWithSymbol.hasOwnProperty(symbol)) { - this._productionsWithSymbol[symbol] = this._bnf.filter(production => { + this._productionsWithSymbol[symbol] = this._bnf.filter((production) => { return production.getRHSSymbolsMap().hasOwnProperty(symbol); }); } @@ -557,7 +563,7 @@ export default class Grammar { let productions = this.getProductions(); let numberPad = productions.length.toString().length; - productions.forEach(production => { + productions.forEach((production) => { let productionOutput = `${pad}${this._padLeft(production.getNumber(), numberPad)}. ` + production.toString(); @@ -582,7 +588,7 @@ export default class Grammar { if (operators) { operators.forEach((opData, i) => { - opData.slice(1).forEach(op => { + opData.slice(1).forEach((op) => { processedOperators[op] = { precedence: i + 1, assoc: opData[0], @@ -598,7 +604,7 @@ export default class Grammar { * Generates data arrays for lex rules inferred from terminals. */ _generateLexRulesDataForTerminals() { - return this.getTerminals().map(terminal => [ + return this.getTerminals().map((terminal) => [ LexRule.matcherFromTerminal(terminal.getSymbol()), // matcher `return ${terminal.quotedTerminal()}`, // token handler ]); @@ -632,7 +638,7 @@ export default class Grammar { this._tokensMap = {}; return Array.isArray(tokens) - ? tokens.map(token => { + ? tokens.map((token) => { this._tokensMap[token] = true; return GrammarSymbol.get(token); }) @@ -666,7 +672,7 @@ export default class Grammar { processedBnf[0] = augmentedProduction; } - nonTerminals.forEach(LHS => { + nonTerminals.forEach((LHS) => { originalBnf[LHS].forEach((RHS, k) => { let semanticAction = null; let precedence = null; diff --git a/src/sets-generator.js b/src/sets-generator.js index 3fc608b..5839a0d 100644 --- a/src/sets-generator.js +++ b/src/sets-generator.js @@ -84,7 +84,7 @@ export default class SetsGenerator { let productionsForSymbol = this._grammar.getProductionsForSymbol(symbol); - productionsForSymbol.forEach(production => { + productionsForSymbol.forEach((production) => { let RHS = production.getRHS(); this._mergeSets(firstSet, this.firstOfRHS(RHS)); }); @@ -97,11 +97,10 @@ export default class SetsGenerator { */ firstOfRHS(RHS) { let firstSet = {}; - - var i = 0; + let i = 0; for (; i < RHS.length; i++) { - var productionSymbol = RHS[i]; - + let productionSymbol = RHS[i]; + // Direct epsilon goes to the First set. if (productionSymbol.isEpsilon()) { firstSet[EPSILON] = true; @@ -109,25 +108,27 @@ export default class SetsGenerator { } // Calculate First of current symbol on RHS. - var firstOfCurrent = this.firstOf(productionSymbol); + let firstOfCurrent = this.firstOf(productionSymbol); // Put the First set of this non-terminal in our set, // excluding the EPSILON. this._mergeSets(firstSet, firstOfCurrent, EXCLUDE_EPSILON); - - const hasEpsilon = this._grammar._nonTerminalsMap[productionSymbol.getSymbol()]?.hasDirectEpsilon; + + const hasEpsilon = + this._grammar._nonTerminalsMap[productionSymbol.getSymbol()] + ?.hasDirectEpsilon; // And if there was no EPSILON, we're done (otherwise, we // don't break the loop, and proceed to the next symbol of the RHS. if (!firstOfCurrent.hasOwnProperty(EPSILON) && !hasEpsilon) { break; } - } // If all symbols on RHS are eliminated, or the last // symbol contains EPSILON, add it to the set. - if(i === RHS.length) + if (i === RHS.length) { firstSet[EPSILON] = true; + } return firstSet; } @@ -175,7 +176,7 @@ export default class SetsGenerator { // symbol is used (i.e. where it appears on RHS). let productionsWithSymbol = this._grammar.getProductionsWithSymbol(symbol); - productionsWithSymbol.forEach(production => { + productionsWithSymbol.forEach((production) => { let RHS = production.getRHSSymbols(); let symbolIndex; @@ -231,7 +232,7 @@ export default class SetsGenerator { this._predictSets = {}; debug.time('Building Predict sets'); - this._grammar.getProductions().forEach(production => { + this._grammar.getProductions().forEach((production) => { let LHS = production.getLHS(); let RHS = production.getRHS(); @@ -305,7 +306,7 @@ export default class SetsGenerator { * Builds a set based on the `builder` function. */ _buildSet(builder) { - this._grammar.getProductions().forEach(production => { + this._grammar.getProductions().forEach((production) => { builder.call(this, production.getLHS()); }); } From c1ec069858fddf1f9e9df454e6cf331ed135729b Mon Sep 17 00:00:00 2001 From: MammarDr <140546722+MammarDr@users.noreply.github.com> Date: Wed, 24 Dec 2025 12:44:03 +0100 Subject: [PATCH 3/3] Improve transpiled output readability --- src/sets-generator.js | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/sets-generator.js b/src/sets-generator.js index 5839a0d..8f9f871 100644 --- a/src/sets-generator.js +++ b/src/sets-generator.js @@ -114,15 +114,20 @@ export default class SetsGenerator { // excluding the EPSILON. this._mergeSets(firstSet, firstOfCurrent, EXCLUDE_EPSILON); - const hasEpsilon = - this._grammar._nonTerminalsMap[productionSymbol.getSymbol()] - ?.hasDirectEpsilon; - - // And if there was no EPSILON, we're done (otherwise, we - // don't break the loop, and proceed to the next symbol of the RHS. - if (!firstOfCurrent.hasOwnProperty(EPSILON) && !hasEpsilon) { - break; + const nonTerminal = + this._grammar._nonTerminalsMap[productionSymbol.getSymbol()]; + + // And if there was EPSILON, don't break the loop + // proceed to the next symbol of the RHS (otherwise, we + // are done). + if ( + firstOfCurrent.hasOwnProperty(EPSILON) || + (nonTerminal && nonTerminal.hasDirectEpsilon) + ) { + continue; } + + break; } // If all symbols on RHS are eliminated, or the last // symbol contains EPSILON, add it to the set.