Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@
"shelljs": "^0.8.5",
"jest-cli": "^29.3.1",
"eslint": "^8.28.0",
"prettier": "^1.11.1"
"prettier": "^3.0.0"
}
}
28 changes: 28 additions & 0 deletions src/__tests__/sets-generator-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,34 @@ describe('sets-generator', () => {
});
});

it('self-epsilon', () => {
const setsGenerator = new SetsGenerator({
grammar: Grammar.fromString(`
%%
S : 'a' | B 'c';
B : B 'b' | /* empty */;
`),
});

expect(setsGenerator.firstOf(new GrammarSymbol('S')))
// No ε from B, since 'c' stops the sets.
.toEqual({"'a'": true, "'b'": true, "'c'": true});

expect(setsGenerator.firstOf(new GrammarSymbol(`B`))).toEqual({
// ε is dervied from #2 RHS, B -> 'b'
"'b'": true,
ε: true,
});

expect(setsGenerator.firstOf(new GrammarSymbol(`'a'`))).toEqual({
"'a'": true,
});

expect(setsGenerator.firstOf(new GrammarSymbol(`'b'`))).toEqual({
"'b'": true,
});
});

it('RHS', () => {
const grammar = Grammar.fromString(`
%%
Expand Down
2 changes: 1 addition & 1 deletion src/grammar/grammar-symbol.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export default class GrammarSymbol {
/**
* Returns original symbol from an extended name. 1X3 => X
*/
getOrignialSymbol() {
getOriginalSymbol() {
if (!this._originalSymbol) {
this._originalSymbol = this._symbol
.replace(/^\d+\|/, '')
Expand Down
64 changes: 39 additions & 25 deletions src/grammar/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import LexGrammar from './lex-grammar';
import LexRule from './lex-rule';
import LexParser from '../generated/lex-parser.gen.js';
import Production from './production';
import {EPSILON} from '../special-symbols.js';

import colors from 'colors';
import fs from 'fs';
Expand Down Expand Up @@ -168,15 +169,18 @@ export default class Grammar {
* for the specific options.
*/
static fromGrammarFile(grammarFile, options = {}, grammarType = 'bnf') {
const grammarData = Grammar.dataFromGrammarFile(grammarFile, { grammarType });
const grammarData = Grammar.dataFromGrammarFile(grammarFile, {grammarType});
return Grammar.fromData(grammarData, options);
}

/**
* Reads grammar file data. Supports reading `bnf`,
* and `lex` grammars based on mode.
*/
static dataFromGrammarFile(grammarFile, { grammarType = 'bnf', useLocation = false }) {
static dataFromGrammarFile(
grammarFile,
{grammarType = 'bnf', useLocation = false}
) {
const grammar = fs.readFileSync(grammarFile, 'utf8');

// check if the bnf grammar contains location capture characters
Expand All @@ -195,10 +199,12 @@ export default class Grammar {
.replace(/%{[\n\s\S]*?%}/g, '');

if (/@\w+/.test(bnf)) {
console.info(colors.red(
'The grammar file contains location capture characters (@), which require the ' +
'"--loc" option, but it has not been provided. The generated parser will throw an error.'
));
console.info(
colors.red(
'The grammar file contains location capture characters (@), which require the ' +
'"--loc" option, but it has not been provided. The generated parser will throw an error.'
)
);
}
}

Expand Down Expand Up @@ -357,8 +363,8 @@ export default class Grammar {

this._terminalsMap = {};

this._bnf.forEach(production => {
production.getRHS().forEach(symbol => {
this._bnf.forEach((production) => {
production.getRHS().forEach((symbol) => {
if (
symbol.isTerminal() &&
!this._terminalsMap.hasOwnProperty(symbol.getSymbol())
Expand All @@ -378,7 +384,7 @@ export default class Grammar {
*/
getTerminalSymbols() {
if (!this._terminalSymbols) {
this._terminalSymbols = this.getTerminals().map(symbol =>
this._terminalSymbols = this.getTerminals().map((symbol) =>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not against of any formatting styles (and we can reformat this whole codebase to the latest standard or AI-generated coding), but let's make a separate commit for formatting, and keep this change to its semantics.

symbol.getSymbol()
);
}
Expand All @@ -391,21 +397,29 @@ export default class Grammar {
getNonTerminals() {
if (!this._nonTerminals) {
this._nonTerminals = [];

this._nonTerminalsMap = {};

this._bnf.forEach(production => {
this._bnf.forEach((production) => {
if (production.isAugmented()) {
return;
}
let nonTerminal = production.getLHS();
// Function Helper
const isEpsilon = (production) =>
production._RHS.length === 1 &&
production._RHS[0].getSymbol() === EPSILON;
// Mark Non-Terminal And Check If It Contains Direct Epsilon RHS
if (!this._nonTerminalsMap.hasOwnProperty(nonTerminal.getSymbol())) {
this._nonTerminalsMap[nonTerminal.getSymbol()] = true;
this._nonTerminalsMap[nonTerminal.getSymbol()] = {
hasDirectEpsilon: isEpsilon(production),
};
this._nonTerminals.push(nonTerminal);
} else if (isEpsilon(production)) {
this._nonTerminalsMap[nonTerminal.getSymbol()] = {
hasDirectEpsilon: true,
};
}
});
}

return this._nonTerminals;
}

Expand All @@ -414,7 +428,7 @@ export default class Grammar {
*/
getNonTerminalSymbols() {
if (!this._nonTerminalSymbols) {
this._nonTerminalSymbols = this.getNonTerminals().map(symbol =>
this._nonTerminalSymbols = this.getNonTerminals().map((symbol) =>
symbol.getSymbol()
);
}
Expand All @@ -431,11 +445,11 @@ export default class Grammar {

this._tokensMap = {};

this._bnf.forEach(production => {
this._bnf.forEach((production) => {
if (production.isAugmented() || production.isEpsilon()) {
return;
}
production.getRHS().forEach(symbol => {
production.getRHS().forEach((symbol) => {
let rawSymbol = symbol.getSymbol();
if (
!symbol.isTerminal() &&
Expand All @@ -457,7 +471,7 @@ export default class Grammar {
*/
getTokenSymbols() {
if (!this._tokenSymbols) {
this._tokenSymbols = this.getTokens().map(symbol => symbol.getSymbol());
this._tokenSymbols = this.getTokens().map((symbol) => symbol.getSymbol());
}
return this._tokenSymbols;
}
Expand All @@ -474,7 +488,7 @@ export default class Grammar {
*/
getProductionsForSymbol(symbol) {
if (!this._productionsForSymbol.hasOwnProperty(symbol)) {
this._productionsForSymbol[symbol] = this._bnf.filter(production => {
this._productionsForSymbol[symbol] = this._bnf.filter((production) => {
return production.getLHS().isSymbol(symbol);
});
}
Expand All @@ -486,7 +500,7 @@ export default class Grammar {
*/
getProductionsWithSymbol(symbol) {
if (!this._productionsWithSymbol.hasOwnProperty(symbol)) {
this._productionsWithSymbol[symbol] = this._bnf.filter(production => {
this._productionsWithSymbol[symbol] = this._bnf.filter((production) => {
return production.getRHSSymbolsMap().hasOwnProperty(symbol);
});
}
Expand Down Expand Up @@ -549,7 +563,7 @@ export default class Grammar {
let productions = this.getProductions();
let numberPad = productions.length.toString().length;

productions.forEach(production => {
productions.forEach((production) => {
let productionOutput =
`${pad}${this._padLeft(production.getNumber(), numberPad)}. ` +
production.toString();
Expand All @@ -574,7 +588,7 @@ export default class Grammar {

if (operators) {
operators.forEach((opData, i) => {
opData.slice(1).forEach(op => {
opData.slice(1).forEach((op) => {
processedOperators[op] = {
precedence: i + 1,
assoc: opData[0],
Expand All @@ -590,7 +604,7 @@ export default class Grammar {
* Generates data arrays for lex rules inferred from terminals.
*/
_generateLexRulesDataForTerminals() {
return this.getTerminals().map(terminal => [
return this.getTerminals().map((terminal) => [
LexRule.matcherFromTerminal(terminal.getSymbol()), // matcher
`return ${terminal.quotedTerminal()}`, // token handler
]);
Expand Down Expand Up @@ -624,7 +638,7 @@ export default class Grammar {
this._tokensMap = {};

return Array.isArray(tokens)
? tokens.map(token => {
? tokens.map((token) => {
this._tokensMap[token] = true;
return GrammarSymbol.get(token);
})
Expand Down Expand Up @@ -658,7 +672,7 @@ export default class Grammar {
processedBnf[0] = augmentedProduction;
}

nonTerminals.forEach(LHS => {
nonTerminals.forEach((LHS) => {
originalBnf[LHS].forEach((RHS, k) => {
let semanticAction = null;
let precedence = null;
Expand Down
2 changes: 1 addition & 1 deletion src/lr/canonical-collection.js
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ export default class CanonicalCollection {
const LHS = production.getLHS();
const RHS = production.getRHS();
const lastSymbol = RHS[RHS.length - 1];
const originalLHS = LHS.getOrignialSymbol();
const originalLHS = LHS.getOriginalSymbol();
const finalSet = lastSymbol.getEndContext();

if (!this._groupedFinalSets.hasOwnProperty(finalSet)) {
Expand Down
38 changes: 23 additions & 15 deletions src/sets-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ export default class SetsGenerator {

let productionsForSymbol = this._grammar.getProductionsForSymbol(symbol);

productionsForSymbol.forEach(production => {
productionsForSymbol.forEach((production) => {
let RHS = production.getRHS();
this._mergeSets(firstSet, this.firstOfRHS(RHS));
});
Expand All @@ -97,8 +97,8 @@ export default class SetsGenerator {
*/
firstOfRHS(RHS) {
let firstSet = {};

for (let i = 0; i < RHS.length; i++) {
let i = 0;
for (; i < RHS.length; i++) {
let productionSymbol = RHS[i];

// Direct epsilon goes to the First set.
Expand All @@ -114,17 +114,25 @@ export default class SetsGenerator {
// excluding the EPSILON.
this._mergeSets(firstSet, firstOfCurrent, EXCLUDE_EPSILON);

// And if there was no EPSILON, we're done (otherwise, we
// don't break the loop, and proceed to the next symbol of the RHS.
if (!firstOfCurrent.hasOwnProperty(EPSILON)) {
break;
}
const nonTerminal =
this._grammar._nonTerminalsMap[productionSymbol.getSymbol()];

// If all symbols on RHS are eliminated, or the last
// symbol contains EPSILON, add it to the set.
else if (i === RHS.length - 1) {
firstSet[EPSILON] = true;
// And if there was EPSILON, don't break the loop
// proceed to the next symbol of the RHS (otherwise, we
// are done).
if (
firstOfCurrent.hasOwnProperty(EPSILON) ||
(nonTerminal && nonTerminal.hasDirectEpsilon)
) {
continue;
}

break;
}
// If all symbols on RHS are eliminated, or the last
// symbol contains EPSILON, add it to the set.
if (i === RHS.length) {
firstSet[EPSILON] = true;
}

return firstSet;
Expand Down Expand Up @@ -173,7 +181,7 @@ export default class SetsGenerator {
// symbol is used (i.e. where it appears on RHS).
let productionsWithSymbol = this._grammar.getProductionsWithSymbol(symbol);

productionsWithSymbol.forEach(production => {
productionsWithSymbol.forEach((production) => {
let RHS = production.getRHSSymbols();
let symbolIndex;

Expand Down Expand Up @@ -229,7 +237,7 @@ export default class SetsGenerator {
this._predictSets = {};
debug.time('Building Predict sets');

this._grammar.getProductions().forEach(production => {
this._grammar.getProductions().forEach((production) => {
let LHS = production.getLHS();
let RHS = production.getRHS();

Expand Down Expand Up @@ -303,7 +311,7 @@ export default class SetsGenerator {
* Builds a set based on the `builder` function.
*/
_buildSet(builder) {
this._grammar.getProductions().forEach(production => {
this._grammar.getProductions().forEach((production) => {
builder.call(this, production.getLHS());
});
}
Expand Down