From d747c17d613ebd275469668bf900d8903b9f1c14 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Tue, 15 Oct 2024 11:56:45 +0200 Subject: [PATCH 1/8] add-parser: added primitive parser --- polyparser/io/position.py | 18 +- polyparser/io/reader.py | 23 +- polyparser/languages/json.py | 120 +++++++++ polyparser/languages/language.py | 22 ++ polyparser/lexer/token/__init__.py | 7 + polyparser/parser/__init__.py | 36 +++ polyparser/parser/context.py | 21 ++ polyparser/parser/cursor.py | 70 ++++++ polyparser/parser/node.py | 8 + polyparser/parser/primitives/augmented.py | 56 +++++ polyparser/parser/primitives/branch.py | 28 +++ polyparser/parser/primitives/call.py | 25 ++ polyparser/parser/primitives/list.py | 29 +++ polyparser/parser/primitives/token.py | 37 +++ polyparser/parser/result.py | 14 ++ polyparser/parser/stream.py | 16 ++ tests/alphabet/test_json_language.py | 157 ++++++++++++ tests/alphabet/test_language.py | 12 + tests/io/test_position.py | 20 +- tests/io/test_reader.py | 12 +- tests/lexer/rules/test_ignore.py | 2 +- tests/lexer/rules/test_keyword.py | 4 +- tests/lexer/rules/test_name.py | 2 +- tests/lexer/rules/test_string.py | 4 +- tests/lexer/test_lexer.py | 44 +++- .../primitives/test_primitive_augmented.py | 237 ++++++++++++++++++ .../parser/primitives/test_primitive_call.py | 36 +++ .../parser/primitives/test_primitive_list.py | 69 +++++ tests/parser/primitives/test_primitive_or.py | 70 ++++++ .../parser/primitives/test_primitive_token.py | 67 +++++ tests/parser/test_context.py | 34 +++ tests/parser/test_node.py | 15 ++ tests/parser/test_parser_init.py | 9 + tests/parser/test_result.py | 22 ++ tests/parser/test_stream.py | 56 +++++ 35 files changed, 1371 insertions(+), 31 deletions(-) create mode 100644 polyparser/languages/json.py create mode 100644 polyparser/languages/language.py create mode 100644 polyparser/parser/__init__.py create mode 100644 polyparser/parser/context.py create mode 100644 polyparser/parser/cursor.py create mode 100644 polyparser/parser/node.py create mode 100644 polyparser/parser/primitives/augmented.py create mode 100644 polyparser/parser/primitives/branch.py create mode 100644 polyparser/parser/primitives/call.py create mode 100644 polyparser/parser/primitives/list.py create mode 100644 polyparser/parser/primitives/token.py create mode 100644 polyparser/parser/result.py create mode 100644 polyparser/parser/stream.py create mode 100644 tests/alphabet/test_json_language.py create mode 100644 tests/alphabet/test_language.py create mode 100644 tests/parser/primitives/test_primitive_augmented.py create mode 100644 tests/parser/primitives/test_primitive_call.py create mode 100644 tests/parser/primitives/test_primitive_list.py create mode 100644 tests/parser/primitives/test_primitive_or.py create mode 100644 tests/parser/primitives/test_primitive_token.py create mode 100644 tests/parser/test_context.py create mode 100644 tests/parser/test_node.py create mode 100644 tests/parser/test_parser_init.py create mode 100644 tests/parser/test_result.py create mode 100644 tests/parser/test_stream.py diff --git a/polyparser/io/position.py b/polyparser/io/position.py index 13060cb..62ee05b 100644 --- a/polyparser/io/position.py +++ b/polyparser/io/position.py @@ -22,9 +22,12 @@ class PositionRange: __height : int __last_column : int + __offset : int + __size : int + __reader : "FileReader" - def __init__(self, reader: "FileReader", line: int, column: int, height: int, last_column: int): + def __init__(self, reader: "FileReader", line: int, column: int, height: int, last_column: int, offset: int, size: int): self.__reader = reader self.__line = line @@ -33,6 +36,9 @@ def __init__(self, reader: "FileReader", line: int, column: int, height: int, la self.__height = height self.__last_column = last_column + self.__offset = offset + self.__size = size + @property def line (self): return self.__line @@ -46,4 +52,14 @@ def last_column (self): def height (self): return self.__height + @property + def offset (self): + return self.__offset + @property + def size (self): + return self.__size + @property + def value (self): + return self.__reader.content[self.__offset:self.__offset + self.__size] + from polyparser.io.reader import FileReader diff --git a/polyparser/io/reader.py b/polyparser/io/reader.py index 23c307f..35edd73 100644 --- a/polyparser/io/reader.py +++ b/polyparser/io/reader.py @@ -1,7 +1,6 @@ from typing import Self -from polyparser.io.position import PositionRange from polyparser.io.savestream import SaveStream, SavedState """ @@ -23,6 +22,7 @@ class FileReaderState(SavedState): __start_line : int __start_column : int + __start_offset : int __current_line : int __current_column : int @@ -48,6 +48,7 @@ def copy_into(self, other: "FileReaderState"): other.__start_line = self.__current_line other.__start_column = self.__current_column + other.__start_offset = self.__offset other.__offset = self.__offset other.__current_line = self.__current_line @@ -71,7 +72,10 @@ def as_position (self) -> "PositionRange": self.__reader, self.__start_line, self.__start_column, self.__current_line - self.__start_line + 1, - self.__current_column ) + self.__current_column, + self.__start_offset, + self.__offset - self.__start_offset + ) @property def size (self): @@ -92,17 +96,22 @@ class FileReader(SaveStream[FileReaderState]): __path : str __content : str - def __init__(self, path: str): + def __init__(self, path: str, content: str): super().__init__( FileReaderState, self ) - self.__path = path - + self.__path = path + self.__content = content + @staticmethod + def open (path: str): with open(path, "r") as file: - self.__content = file.read() + content = file.read() + return FileReader(path, content) @property def content (self): return self.__content @property def path (self): - return self.__path \ No newline at end of file + return self.__path + +from polyparser.io.position import PositionRange \ No newline at end of file diff --git a/polyparser/languages/json.py b/polyparser/languages/json.py new file mode 100644 index 0000000..ecee132 --- /dev/null +++ b/polyparser/languages/json.py @@ -0,0 +1,120 @@ + +import enum +import string +from polyparser.languages.language import Language +from polyparser.lexer import Lexer +from polyparser.lexer.rules.ignore import IgnoreLexerRule +from polyparser.lexer.rules.keyword import KeywordLexerRule +from polyparser.lexer.rules.string import StringLexerRule +from polyparser.lexer.token.factory import TokenTypeFactory +from polyparser.parser import FixedContextParser, Parser +from polyparser.parser.context import ParserContext +from polyparser.parser.primitives.augmented import AugmentedPrimitive, AugmentedType +from polyparser.parser.primitives.branch import OrPrimitive +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.primitives.token import TokenPrimitive +from ast import literal_eval + + +class JsonLanguage(Language): + alphabet: None | enum.Enum + def __init__(self): + self.alphabet = None + + super().__init__() + + def get_alphabet (self): + if self.alphabet is None: + type_factory = TokenTypeFactory( "json-type-factory" ) + type_factory.add_token_type( "LCB" ) # Left Curly Bracket '{' + type_factory.add_token_type( "RCB" ) # Right Curly Bracket '}' + type_factory.add_token_type( "LSB" ) # Left Squared Bracket '[' + type_factory.add_token_type( "RSB" ) # Right Squared Bracket '[' + + type_factory.add_token_type( "COMMA" ) # COMMA ',' + type_factory.add_token_type( "EQUIV" ) # EQUIV ':' + + type_factory.add_token_type( "STRING" ) # String + + # TODO the language is incomplete, add number, true, false and null + + self.alphabet = type_factory.as_enumeration() + return self.alphabet + + def get_lexer(self) -> Lexer: + alphabet = self.get_alphabet() + + lexer = Lexer([ + StringLexerRule( "\"", alphabet.STRING ), + StringLexerRule( "'", alphabet.STRING ), + KeywordLexerRule({ + '{': alphabet.LCB, + '}': alphabet.RCB, + '[': alphabet.LSB, + ']': alphabet.RSB, + ',': alphabet.COMMA, + ':': alphabet.EQUIV + }), + IgnoreLexerRule(string.whitespace) + ]) + + return lexer + def get_parser(self) -> Parser: + alphabet = self.get_alphabet() + + context = ParserContext() + + context.set_element( "string", AugmentedPrimitive( + TokenPrimitive("STRING", True), + prim_type=lambda x: literal_eval(x.value))) + context.set_element( "list", AugmentedPrimitive( + ListPrimitive( + TokenPrimitive( "LSB" ), + AugmentedPrimitive( + ListPrimitive( + CallPrimitive( "main" ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("COMMA"), + CallPrimitive("main")), + augment=AugmentedType.ANY_AMOUNT), + AugmentedPrimitive( + TokenPrimitive("COMMA"), + augment=AugmentedType.OPTIONAL)), + augment=AugmentedType.OPTIONAL), + TokenPrimitive( "RSB" ) + ), + prim_type=lambda *args: list(args))) + context.set_element( "dict.equiv", ListPrimitive( + CallPrimitive("string"), + TokenPrimitive("EQUIV"), + CallPrimitive("main"))) + context.set_element( "dict", AugmentedPrimitive( + ListPrimitive( + TokenPrimitive( "LCB" ), + AugmentedPrimitive( + ListPrimitive( + CallPrimitive( "dict.equiv" ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("COMMA"), + CallPrimitive("dict.equiv")), + augment=AugmentedType.ANY_AMOUNT), + AugmentedPrimitive( + TokenPrimitive("COMMA"), + augment=AugmentedType.OPTIONAL)), + augment=AugmentedType.OPTIONAL), + TokenPrimitive( "RCB" ) + ), + prim_type=lambda *args: { + args[i]:args[i + 1] + for i in range(0, len(args), 2) + })) + context.set_element( "main", OrPrimitive( + CallPrimitive( "list" ), + CallPrimitive( "dict" ), + CallPrimitive( "string" ) + ) ) + + return FixedContextParser(context) \ No newline at end of file diff --git a/polyparser/languages/language.py b/polyparser/languages/language.py new file mode 100644 index 0000000..4844643 --- /dev/null +++ b/polyparser/languages/language.py @@ -0,0 +1,22 @@ + +from polyparser.io.reader import FileReader +from polyparser.lexer import Lexer +from polyparser.parser import Parser + +class Language: + __lexer : Lexer + __parser : Parser + + def __init__(self): + self.__lexer = self.get_lexer () + self.__parser = self.get_parser () + + def get_lexer (self) -> Lexer: + raise NotImplementedError() + def get_parser (self) -> Parser: + raise NotImplementedError() + + def parse (self, reader: "FileReader"): + tokens = self.__lexer.try_lexing(reader) + + return self.__parser.try_parsing(tokens) \ No newline at end of file diff --git a/polyparser/lexer/token/__init__.py b/polyparser/lexer/token/__init__.py index ea881fc..892dc6d 100644 --- a/polyparser/lexer/token/__init__.py +++ b/polyparser/lexer/token/__init__.py @@ -25,3 +25,10 @@ def __setattr__(self, name: str, value: Any) -> None: raise AttributeError("A token is immutable once created") super().__setattr__(name, value) + + @property + def name (self): + return self.token_type.name + @property + def value (self): + return self.position.value diff --git a/polyparser/parser/__init__.py b/polyparser/parser/__init__.py new file mode 100644 index 0000000..427f5f2 --- /dev/null +++ b/polyparser/parser/__init__.py @@ -0,0 +1,36 @@ + +from typing import List + +from polyparser.lexer.token import Token +from polyparser.parser.context import ParserContext +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.stream import ParserStream + +class Parser: + __context: ParserContext + + def __init__(self) -> None: + self.__context = self.get_context() + + def get_context (self) -> ParserContext: + raise NotImplementedError() + def try_parsing (self, tokens: List[Token]): + stream = ParserStream( tokens ) + + primitive = CallPrimitive( "main" ) + context = ParserContext( self.__context ) + + with stream as (atomic, state): + primitive.evaluate(stream, context) + + assert state.size == 0, "Could not parse everything" + return state.poll_stored() + +class FixedContextParser(Parser): + __context: ParserContext + def __init__(self, context: ParserContext) -> None: + self.__context = context + + super().__init__() + def get_context(self) -> ParserContext: + return self.__context diff --git a/polyparser/parser/context.py b/polyparser/parser/context.py new file mode 100644 index 0000000..f951b25 --- /dev/null +++ b/polyparser/parser/context.py @@ -0,0 +1,21 @@ + +from typing import Any, Dict, Tuple + +class ParserContext: + __next : "ParserContext | None" + __ctx : Dict[str, Any] + + def __init__(self, next: "ParserContext | None" = None) -> None: + self.__next = next + self.__ctx = {} + + def is_in_current (self, name: str): + return name in self.__ctx + def get_element (self, name: str) -> Tuple[Any, bool]: + if self.is_in_current(name): + return (self.__ctx[name], True) + if self.__next is not None: + return self.__next.get_element(name) + return (None, False) + def set_element (self, name: str, value: Any): + self.__ctx[name] = value diff --git a/polyparser/parser/cursor.py b/polyparser/parser/cursor.py new file mode 100644 index 0000000..d505798 --- /dev/null +++ b/polyparser/parser/cursor.py @@ -0,0 +1,70 @@ + +from typing import Any, List, Self + +from polyparser.io.savestream import SavedState + +class ParserCursor (SavedState): + __offset: int + __stream: "ParserStream" + + __arguments : List[Any] + __arg_size : int + __cur_size : int + + __new : bool + + def store (self, arg: Any): + if self.__arg_size == len(self.__arguments): + self.__arguments.append(arg) + else: + self.__arguments[self.__arg_size] = arg + self.__arg_size += 1 + def poll_stored (self): + result = self.__arguments[self.__cur_size:self.__arg_size].copy() + + self.__arg_size = self.__cur_size + return result + + def __init__(self, stream: "ParserStream") -> None: + super().__init__() + + self.__offset = 0 + self.__stream = stream + + self.__arguments = [] + self.__arg_size = 0 + self.__cur_size = 0 + + self.__new = True + + def __len__ (self): + return len(self.__stream.tokens) - self.__offset + @property + def size (self): + return len(self) + + def peek (self): + return self.__stream.tokens[self.__offset] + def poll (self): + result = self.__stream.tokens[self.__offset] + self.__offset += 1 + return result + + @staticmethod + def empty(stream: "ParserStream") -> Self: + return ParserCursor(stream) + def copy_into(self, other: Self): + self.__new = False + + other.__offset = self.__offset + other.__stream = self.__stream + + other.__arguments = self.__arguments + other.__arg_size = self.__arg_size + + if other.__new: + other.__new = False + other.__cur_size = self.__arg_size + + +from polyparser.parser.stream import ParserStream diff --git a/polyparser/parser/node.py b/polyparser/parser/node.py new file mode 100644 index 0000000..c31439f --- /dev/null +++ b/polyparser/parser/node.py @@ -0,0 +1,8 @@ + +from polyparser.parser.context import ParserContext +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + +class ParserNode: + def evaluate (self, stream: ParserStream, context: "ParserContext") -> ParsingResult: + raise NotImplementedError() diff --git a/polyparser/parser/primitives/augmented.py b/polyparser/parser/primitives/augmented.py new file mode 100644 index 0000000..685c002 --- /dev/null +++ b/polyparser/parser/primitives/augmented.py @@ -0,0 +1,56 @@ + +import enum +from typing import Any +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + +class AugmentedType(enum.Enum): + NONE = 0 + OPTIONAL = 1 + AT_LEAST_ONE = 2 + ANY_AMOUNT = 3 + +class AugmentedPrimitive (ParserNode): + def __init__(self, subprimitive: ParserNode, augment: AugmentedType = AugmentedType.NONE, prim_type: Any = None) -> None: + self.__sub_primitive = subprimitive + self.__augment = augment + self.__prim_type = prim_type + + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + min_amount = 0 + if (self.__augment.value & 1) == 0: + min_amount = 1 + max_amount = -1 + if (self.__augment.value & 2) == 0: + max_amount = 1 + + with stream as (atomic, state): + amount = 0 + last_res = None + + while amount != max_amount: + with stream as (subatomic, substate): + last_res = self.__sub_primitive.evaluate(stream, context) + if not last_res.is_success (): + break + + if self.__prim_type is not None: + args = substate.poll_stored() + + try: + substate.store( self.__prim_type(*args) ) + except Exception as exception: + print(exception) + subatomic.rollback() + last_res = ParsingResult.FAILED + break + + amount += 1 + + if amount == 0: + if min_amount == 0: + return ParsingResult.IGNORED + return last_res + return ParsingResult.SUCCESS diff --git a/polyparser/parser/primitives/branch.py b/polyparser/parser/primitives/branch.py new file mode 100644 index 0000000..bb06ead --- /dev/null +++ b/polyparser/parser/primitives/branch.py @@ -0,0 +1,28 @@ + +from typing import List +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + + +class OrPrimitive (ParserNode): + __primitives: List[ParserNode] + + def __init__(self, *primitives: ParserNode) -> None: + super().__init__() + + self.__primitives = list(primitives) + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + with stream as (atomic, state): + had_one_ignored = False + + for subprimitive in self.__primitives: + subresult = subprimitive.evaluate(stream, context) + + if subresult.is_success(): return ParsingResult.SUCCESS + if subresult.is_ignored(): had_one_ignored = True + + if had_one_ignored: + return ParsingResult.IGNORED + return ParsingResult.FAILED diff --git a/polyparser/parser/primitives/call.py b/polyparser/parser/primitives/call.py new file mode 100644 index 0000000..acba777 --- /dev/null +++ b/polyparser/parser/primitives/call.py @@ -0,0 +1,25 @@ + +from typing import List +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + + +class CallPrimitive(ParserNode): + __name: str + __args: List[ParserNode] + + def __init__(self, name: str, *arguments: ParserNode) -> None: + self.__name = name + + self.__args = list(arguments) + + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + target, exists = context.get_element(self.__name) + + if exists and isinstance(target, ParserNode): + # TODO instantiate with self context + return target.evaluate(stream, context) + return ParsingResult.FAILED diff --git a/polyparser/parser/primitives/list.py b/polyparser/parser/primitives/list.py new file mode 100644 index 0000000..ba3067c --- /dev/null +++ b/polyparser/parser/primitives/list.py @@ -0,0 +1,29 @@ + +from typing import List +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + + +class ListPrimitive(ParserNode): + primitives: List[ParserNode] + + def __init__(self, *primitives: List[ParserNode]) -> None: + super().__init__() + + self.primitives = primitives + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + with stream as (atomic, state): + result = ParsingResult.IGNORED + + for subprimitive in self.primitives: + next_result = subprimitive.evaluate(stream, context) + + if next_result == ParsingResult.SUCCESS: + result = next_result + elif next_result == ParsingResult.FAILED: + atomic.rollback() + return ParsingResult.FAILED + + return result \ No newline at end of file diff --git a/polyparser/parser/primitives/token.py b/polyparser/parser/primitives/token.py new file mode 100644 index 0000000..ea70232 --- /dev/null +++ b/polyparser/parser/primitives/token.py @@ -0,0 +1,37 @@ + +from polyparser.lexer.token import Token +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + + +class TokenPrimitive(ParserNode): + __name : str + __expects : str | None + __stored : bool + + def __init__(self, name: str, stored: bool = False, expects: str | None = None) -> None: + super().__init__() + + self.__name = name + self.__expects = expects + self.__stored = stored + + def is_valid (self, token: Token): + return token.name == self.__name \ + and (self.__expects is None \ + or self.__expects == token.value) + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + with stream as (atomic, state): + if state.size == 0: return ParsingResult.FAILED + + token = state.poll() + + if self.is_valid (token): + if self.__stored: + state.store( token ) + return ParsingResult.SUCCESS + else: + atomic.rollback() + return ParsingResult.FAILED diff --git a/polyparser/parser/result.py b/polyparser/parser/result.py new file mode 100644 index 0000000..9f552a9 --- /dev/null +++ b/polyparser/parser/result.py @@ -0,0 +1,14 @@ + +import enum + +class ParsingResult(enum.Enum): + FAILED = 0 + IGNORED = 1 + SUCCESS = 2 + + def is_failed (self) -> bool: + return self == ParsingResult.FAILED + def is_ignored (self) -> bool: + return self == ParsingResult.IGNORED + def is_success (self) -> bool: + return self == ParsingResult.SUCCESS diff --git a/polyparser/parser/stream.py b/polyparser/parser/stream.py new file mode 100644 index 0000000..e7c66b1 --- /dev/null +++ b/polyparser/parser/stream.py @@ -0,0 +1,16 @@ + +from typing import List, Type + +from polyparser.lexer.token import Token +from polyparser.io.savestream import SaveStream + + +class ParserStream(SaveStream["ParserCursor"]): + tokens: List[Token] + + def __init__(self, tokens: List[Token]) -> None: + super().__init__(ParserCursor, self) + + self.tokens = tokens + +from polyparser.parser.cursor import ParserCursor \ No newline at end of file diff --git a/tests/alphabet/test_json_language.py b/tests/alphabet/test_json_language.py new file mode 100644 index 0000000..4686df4 --- /dev/null +++ b/tests/alphabet/test_json_language.py @@ -0,0 +1,157 @@ + +from ast import literal_eval +import enum +import json +import random +import string + +from polyparser.io.reader import FileReader +from polyparser.languages.json import JsonLanguage +from polyparser.languages.language import Language +from polyparser.lexer import Lexer +from polyparser.lexer.rules.ignore import IgnoreLexerRule +from polyparser.lexer.rules.keyword import KeywordLexerRule +from polyparser.lexer.rules.string import StringLexerRule +from polyparser.lexer.token.factory import TokenTypeFactory +from polyparser.parser import FixedContextParser, Parser +from polyparser.parser.context import ParserContext +from polyparser.parser.primitives.augmented import AugmentedPrimitive, AugmentedType +from polyparser.parser.primitives.branch import OrPrimitive +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.primitives.token import TokenPrimitive + + +class JsonLanguage_V1(Language): + alphabet: None | enum.Enum + def __init__(self): + self.alphabet = None + + super().__init__() + + def get_alphabet (self): + if self.alphabet is None: + type_factory = TokenTypeFactory( "json-type-factory" ) + type_factory.add_token_type( "LCB" ) # Left Curly Bracket '{' + type_factory.add_token_type( "RCB" ) # Right Curly Bracket '}' + type_factory.add_token_type( "LSB" ) # Left Squared Bracket '[' + type_factory.add_token_type( "RSB" ) # Right Squared Bracket '[' + + type_factory.add_token_type( "COMMA" ) # COMMA ',' + type_factory.add_token_type( "EQUIV" ) # EQUIV ':' + + type_factory.add_token_type( "STRING" ) # String + + # TODO the language is incomplete, add number, true, false and null + + self.alphabet = type_factory.as_enumeration() + return self.alphabet + + def get_lexer(self) -> Lexer: + alphabet = self.get_alphabet() + + lexer = Lexer([ + StringLexerRule( "\"", alphabet.STRING ), + StringLexerRule( "'", alphabet.STRING ), + KeywordLexerRule({ + '{': alphabet.LCB, + '}': alphabet.RCB, + '[': alphabet.LSB, + ']': alphabet.RSB, + ',': alphabet.COMMA, + ':': alphabet.EQUIV + }), + IgnoreLexerRule(string.whitespace) + ]) + + return lexer + def get_parser(self) -> Parser: + alphabet = self.get_alphabet() + + context = ParserContext() + + context.set_element( "string", AugmentedPrimitive( + TokenPrimitive("STRING", True), + prim_type=lambda x: literal_eval(x.value))) + context.set_element( "list", AugmentedPrimitive( + ListPrimitive( + TokenPrimitive( "LSB" ), + AugmentedPrimitive( + ListPrimitive( + CallPrimitive( "main" ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("COMMA"), + CallPrimitive("main")), + augment=AugmentedType.ANY_AMOUNT), + AugmentedPrimitive( + TokenPrimitive("COMMA"), + augment=AugmentedType.OPTIONAL)), + augment=AugmentedType.OPTIONAL), + TokenPrimitive( "RSB" ) + ), + prim_type=lambda *args: list(args))) + context.set_element( "dict.equiv", ListPrimitive( + CallPrimitive("string"), + TokenPrimitive("EQUIV"), + CallPrimitive("main"))) + context.set_element( "dict", AugmentedPrimitive( + ListPrimitive( + TokenPrimitive( "LCB" ), + AugmentedPrimitive( + ListPrimitive( + CallPrimitive( "dict.equiv" ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("COMMA"), + CallPrimitive("dict.equiv")), + augment=AugmentedType.ANY_AMOUNT), + AugmentedPrimitive( + TokenPrimitive("COMMA"), + augment=AugmentedType.OPTIONAL)), + augment=AugmentedType.OPTIONAL), + TokenPrimitive( "RCB" ) + ), + prim_type=lambda *args: { + args[i]:args[i + 1] + for i in range(0, len(args), 2) + })) + context.set_element( "main", OrPrimitive( + CallPrimitive( "list" ), + CallPrimitive( "dict" ), + CallPrimitive( "string" ) + ) ) + + return FixedContextParser(context) + +ALPHABET = string.ascii_letters + string.digits + +def random_string (): + return "".join([ random.choice(ALPHABET) for _ in range(10) ]) + +def random_json (maxdepth=3, edepth=1, maxamount=3): + chr = random.choice([ 'L' ] * maxdepth + [ 'D' ] * maxdepth + [ 'S' ] * (0 if maxdepth > edepth else 1)) + + if chr == 'L': + size = random.randint(2 - (0 if maxdepth > edepth else 1), maxamount) + return [ random_json(maxdepth - 1, edepth, maxamount) for _ in range(size) ] + elif chr == 'D': + size = random.randint(2 - (0 if maxdepth > edepth else 1), maxamount) + return { random_string(): random_json(maxdepth - 1, edepth, maxamount) for _ in range(size) } + return random_string() + +def test_simple_json (): + for _ in range(10): + _json = random_json() + + string = json.dumps(_json) + + if random.choice([False, True]): + string = string.replace("'", "\"") + + json_langs = [ JsonLanguage(), JsonLanguage_V1() ] + + for json_lang in json_langs: + json_result = json_lang.parse( FileReader( "", string ) ) + + assert json_result[0] == _json diff --git a/tests/alphabet/test_language.py b/tests/alphabet/test_language.py new file mode 100644 index 0000000..a7dcfb0 --- /dev/null +++ b/tests/alphabet/test_language.py @@ -0,0 +1,12 @@ + +import pytest +from polyparser.languages.language import Language + + +def test_exceptions (): + with pytest.raises(NotImplementedError): + language = Language() + with pytest.raises(NotImplementedError): + Language.get_lexer(object()) + with pytest.raises(NotImplementedError): + Language.get_parser(object()) diff --git a/tests/io/test_position.py b/tests/io/test_position.py index 1624f9d..9e8dd92 100644 --- a/tests/io/test_position.py +++ b/tests/io/test_position.py @@ -4,16 +4,30 @@ from tests.utils.immutable import check_immutable def test_simple_position (): - reader = FileReader("tests/io/file_tests/01.txt") + reader = FileReader.open("tests/io/file_tests/01.txt") - pos = PositionRange(reader, 0, 0, 1, 10) + pos = PositionRange(reader, 0, 0, 1, 10, 0, 10) assert pos.line == 0 assert pos.column == 0 assert pos.last_column == 10 assert pos.height == 1 + assert pos.offset == 0 + assert pos.size == 10 + + assert pos.value == "Lorem ipsu" + check_immutable( pos, "line", 11 ) check_immutable( pos, "column", 11 ) check_immutable( pos, "last_column", 11 ) - check_immutable( pos, "height", 11 ) \ No newline at end of file + check_immutable( pos, "height", 11 ) + check_immutable( pos, "offset", 11 ) + check_immutable( pos, "size", 11 ) + check_immutable( pos, "value", "Lorem ipsa" ) +def test_offset_position (): + reader = FileReader.open("tests/io/file_tests/01.txt") + + pos = PositionRange(reader, 0, 0, 4, 10, 3, 10) + + assert pos.value == "em ipsum d" \ No newline at end of file diff --git a/tests/io/test_reader.py b/tests/io/test_reader.py index 69c4e5e..850e0f1 100644 --- a/tests/io/test_reader.py +++ b/tests/io/test_reader.py @@ -10,7 +10,7 @@ def test_simple_reader (): PATH = PATHS[0] CONTENT = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed non risus. Suspendisse lectus tortor, dignissim sit amet, adipiscing nec, ultricies sed, dolor. Cras elementum ultrices diam. Maecenas ligula massa, varius a, semper congue, euismod non, mi. Proin porttitor, orci nec nonummy molestie, enim est eleifend mi, non fermentum diam nisl sit amet erat. Duis semper. Duis arcu massa, scelerisque vitae, consequat in, pretium a, enim. Pellentesque congue. Ut in risus volutpat libero pharetra tempor. Cras vestibulum bibendum augue. Praesent egestas leo in pede. Praesent blandit odio eu enim. Pellentesque sed dui ut augue blandit sodales. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Aliquam nibh. Mauris ac mauris sed pede pellentesque fermentum. Maecenas adipiscing ante non diam sodales hendrerit." - reader = FileReader(PATH) + reader = FileReader.open(PATH) assert reader.path == PATH assert reader.content == CONTENT @@ -21,13 +21,13 @@ def test_simple_reader (): assert isinstance(reader.path, str) assert isinstance(reader.content, str) def test_reader_immutable (): - reader = FileReader(PATHS[0]) + reader = FileReader.open(PATHS[0]) check_immutable(reader, "path", "new_path.txt") check_immutable(reader, "content", "Hello, World !") def test_reader_alphabet (): - reader = FileReader(PATHS[1]) + reader = FileReader.open(PATHS[1]) with reader as (atomic, state): for i in range(26): @@ -44,7 +44,7 @@ def test_reader_alphabet (): assert state._FileReaderState__offset == 26 assert state.size == 0 def test_reader_alphabet_rollback (): - reader = FileReader(PATHS[1]) + reader = FileReader.open(PATHS[1]) with reader as (atomic, state): with reader as (atomic2, state2): @@ -55,7 +55,7 @@ def test_reader_alphabet_rollback (): for i in range(26): assert chr(i + ord('a')) == state.poll() def test_reader_with_newlines (): - reader = FileReader(PATHS[2]) + reader = FileReader.open(PATHS[2]) with open(PATHS[2], 'r') as file: text = file.read() @@ -74,7 +74,7 @@ def test_reader_with_newlines (): atomic.rollback() def test_reader_with_newlines (): - reader = FileReader(PATHS[2]) + reader = FileReader.open(PATHS[2]) content = reader.content diff --git a/tests/lexer/rules/test_ignore.py b/tests/lexer/rules/test_ignore.py index e0ee8f7..f9bc94b 100644 --- a/tests/lexer/rules/test_ignore.py +++ b/tests/lexer/rules/test_ignore.py @@ -6,7 +6,7 @@ import string def test_ignore_rule (): - reader = FileReader( "tests/lexer/rules/file_tests/simple-name-working.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/simple-name-working.txt" ) lexer = Lexer( [ NameLexerRule("NAME"), IgnoreLexerRule(string.whitespace + ".,;") ] ) tokens = lexer.try_lexing(reader) diff --git a/tests/lexer/rules/test_keyword.py b/tests/lexer/rules/test_keyword.py index dd5beb8..6a94b40 100644 --- a/tests/lexer/rules/test_keyword.py +++ b/tests/lexer/rules/test_keyword.py @@ -6,7 +6,7 @@ def test_empty_rulebook (): rule = KeywordLexerRule({ }) - reader = FileReader( "tests/lexer/rules/file_tests/eq-neq-set.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/eq-neq-set.txt" ) assert rule.try_lexing( reader ) is None @@ -14,7 +14,7 @@ def test_empty_rulebook (): def test_simple_equals_rulebook (): rule = KeywordLexerRule({ "==": "EQ", "=": "SET", "!=": "NEQ" }) - reader = FileReader( "tests/lexer/rules/file_tests/eq-neq-set.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/eq-neq-set.txt" ) eq1 = rule.try_lexing(reader) assert eq1.exists \ and eq1.value.token_type == "EQ" \ diff --git a/tests/lexer/rules/test_name.py b/tests/lexer/rules/test_name.py index fa3762f..ae7ab8d 100644 --- a/tests/lexer/rules/test_name.py +++ b/tests/lexer/rules/test_name.py @@ -24,7 +24,7 @@ def test_name_valid (): ) def test_name_rule (): - reader = FileReader( "tests/lexer/rules/file_tests/simple-name.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/simple-name.txt" ) rule = NameLexerRule("NAME") diff --git a/tests/lexer/rules/test_string.py b/tests/lexer/rules/test_string.py index b7e5a37..91d06cf 100644 --- a/tests/lexer/rules/test_string.py +++ b/tests/lexer/rules/test_string.py @@ -14,7 +14,7 @@ def test_strings (): "\"abc\\nhi\"" ] - reader = FileReader( "tests/lexer/rules/file_tests/strings.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/strings.txt" ) lexer = Lexer( [ StringLexerRule( "\"", "STRING" ), @@ -33,7 +33,7 @@ def test_strings (): assert word.position.line == index index += 1 def test_non_finished_string (): - reader = FileReader( "tests/lexer/rules/file_tests/strings-false.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/strings-false.txt" ) lexer = Lexer( [ StringLexerRule( "\"\"\"", "STRING" ), diff --git a/tests/lexer/test_lexer.py b/tests/lexer/test_lexer.py index e99f408..50a3814 100644 --- a/tests/lexer/test_lexer.py +++ b/tests/lexer/test_lexer.py @@ -11,9 +11,17 @@ def test_lexer_eq_neq_set (): - lexer = Lexer( [ KeywordLexerRule({ "==": "EQ", "=": "SET", "!=": "NEQ" }) ] ) + class F: + def __init__(self, value: str): + self.value = value + def __eq__(self, value: object) -> bool: + return value == self.value + @property + def name (self): + return self.value + lexer = Lexer( [ KeywordLexerRule({ "==": F("EQ"), "=": F("SET"), "!=": F("NEQ") }) ] ) - reader = FileReader( "tests/lexer/rules/file_tests/eq-neq-set.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/eq-neq-set.txt" ) tokens = lexer.try_lexing( reader ) @@ -27,28 +35,48 @@ def test_lexer_eq_neq_set (): and eq1.position.line == 1 \ and eq1.position.column == 1 \ and eq1.position.last_column == 3 \ - and eq1.position.height == 1 + and eq1.position.height == 1 \ + and eq1.position.offset == 0 \ + and eq1.position.size == 2 \ + and eq1.position.value == "==" \ + and eq1.value == "==" \ + and eq1.name == "EQ" eq2 = tokens[1] assert eq2.token_type == "SET" \ and eq2.position.line == 1 \ and eq2.position.column == 3 \ and eq2.position.last_column == 4 \ - and eq2.position.height == 1 + and eq2.position.height == 1 \ + and eq2.position.offset == 2 \ + and eq2.position.size == 1 \ + and eq2.position.value == "=" \ + and eq2.value == "=" \ + and eq2.name == "SET" eq3 = tokens[2] assert eq3.token_type == "NEQ" \ and eq3.position.line == 1 \ and eq3.position.column == 4 \ and eq3.position.last_column == 6 \ - and eq3.position.height == 1 + and eq3.position.height == 1 \ + and eq3.position.offset == 3 \ + and eq3.position.size == 2 \ + and eq3.position.value == "!=" \ + and eq3.value == "!=" \ + and eq3.name == "NEQ" eq4 = tokens[3] assert eq4.token_type == "SET" \ and eq4.position.line == 1 \ and eq4.position.column == 6 \ and eq4.position.last_column == 7 \ - and eq4.position.height == 1 + and eq4.position.height == 1\ + and eq4.position.offset == 5 \ + and eq4.position.size == 1 \ + and eq4.position.value == "=" \ + and eq4.value == "=" \ + and eq4.name == "SET" def test_lexer_fail (): - reader = FileReader( "tests/lexer/rules/file_tests/simple-name.txt" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/simple-name.txt" ) lexer = Lexer( [ NameLexerRule("NAME"), IgnoreLexerRule(string.whitespace + ".,;") ] ) @@ -56,7 +84,7 @@ def test_lexer_fail (): tokens = lexer.try_lexing(reader) def test_json_lexer (): - reader = FileReader( "tests/lexer/rules/file_tests/json-file-test.json" ) + reader = FileReader.open( "tests/lexer/rules/file_tests/json-file-test.json" ) lexer = Lexer( [ KeywordLexerRule( { diff --git a/tests/parser/primitives/test_primitive_augmented.py b/tests/parser/primitives/test_primitive_augmented.py new file mode 100644 index 0000000..ecc2923 --- /dev/null +++ b/tests/parser/primitives/test_primitive_augmented.py @@ -0,0 +1,237 @@ + +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.primitives.augmented import AugmentedPrimitive, AugmentedType +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream +from tests.parser.primitives.test_primitive_token import MockToken + +def test_simple_none (): + primitive1 = AugmentedPrimitive( TokenPrimitive( "NAME", expects="if" ) ) + primitive2 = AugmentedPrimitive( TokenPrimitive( "EOL", stored=True ) ) + primitive3 = AugmentedPrimitive( TokenPrimitive( "NAME" ) ) + primitive4 = AugmentedPrimitive( TokenPrimitive( "NAME", expects="!if", stored=True ) ) + + T = [ MockToken("EOL", "\n"), MockToken("NAME", "if"), MockToken("NAME", "!if") ] + context = ParserContext() + stream = ParserStream(T) + + def check_eq (arr): + with stream as (atomic, state): + args = state._ParserCursor__arguments + size = state._ParserCursor__arg_size + + assert size == len(arr) + assert args[:size] == arr + def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True): + with stream as (atomic, state): + assert prim.evaluate(stream, context) == result + + check_eq(arr) + if rollback: atomic.rollback() + def advance (): + with stream as (atomic, state): + state.poll() + + evaluate( primitive1, ParsingResult.FAILED, [] ) + evaluate( primitive2, ParsingResult.SUCCESS, [ T[0] ] ) + evaluate( primitive3, ParsingResult.FAILED, [] ) + evaluate( primitive4, ParsingResult.FAILED, [] ) + advance() + + evaluate( primitive1, ParsingResult.SUCCESS, [] ) + evaluate( primitive2, ParsingResult.FAILED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.FAILED, [] ) + advance() + + evaluate( primitive1, ParsingResult.FAILED, [] ) + evaluate( primitive2, ParsingResult.FAILED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.SUCCESS, [ T[2] ] ) + advance() + +def test_simple_optional (): + primitive1 = AugmentedPrimitive( TokenPrimitive( "NAME", expects="if" ), AugmentedType.OPTIONAL ) + primitive2 = AugmentedPrimitive( TokenPrimitive( "EOL", stored=True ), AugmentedType.OPTIONAL ) + primitive3 = AugmentedPrimitive( TokenPrimitive( "NAME" ), AugmentedType.OPTIONAL ) + primitive4 = AugmentedPrimitive( TokenPrimitive( "NAME", expects="!if", stored=True ), AugmentedType.OPTIONAL ) + + T = [ MockToken("EOL", "\n"), MockToken("NAME", "if"), MockToken("NAME", "!if") ] + context = ParserContext() + stream = ParserStream(T) + + def check_eq (arr): + with stream as (atomic, state): + args = state._ParserCursor__arguments + size = state._ParserCursor__arg_size + + assert size == len(arr) + assert args[:size] == arr + def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True): + with stream as (atomic, state): + assert prim.evaluate(stream, context) == result + + check_eq(arr) + if rollback: atomic.rollback() + def advance (): + with stream as (atomic, state): + state.poll() + + evaluate( primitive1, ParsingResult.IGNORED, [] ) + evaluate( primitive2, ParsingResult.SUCCESS, [ T[0] ] ) + evaluate( primitive3, ParsingResult.IGNORED, [] ) + evaluate( primitive4, ParsingResult.IGNORED, [] ) + advance() + + evaluate( primitive1, ParsingResult.SUCCESS, [] ) + evaluate( primitive2, ParsingResult.IGNORED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.IGNORED, [] ) + advance() + + evaluate( primitive1, ParsingResult.IGNORED, [] ) + evaluate( primitive2, ParsingResult.IGNORED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.SUCCESS, [ T[2] ] ) + advance() + +def test_simple_at_least_one (): + primitive1 = AugmentedPrimitive( TokenPrimitive( "NAME", stored=True ), augment=AugmentedType.AT_LEAST_ONE ) + primitive2 = AugmentedPrimitive( TokenPrimitive( "EOL", stored=True ), augment=AugmentedType.AT_LEAST_ONE ) + + tokens = [ + MockToken( "NAME", "A" ), + MockToken( "NAME", "A" ), + MockToken( "EOL", "A" ), + MockToken( "EOL", "A" ), + MockToken( "EOL", "A" ), + MockToken( "NAME", "A" ) + ] + stream = ParserStream(tokens) + context = ParserContext() + + def check (prim: ParserNode, should_work: bool, res: str, amount: int): + with stream as (atomic, state): + result = prim.evaluate(stream, context) + if should_work: assert result == ParsingResult.SUCCESS + else : assert result == ParsingResult.FAILED + + args = state.poll_stored() + assert len(args) == amount + for i, a in enumerate(args): + assert a.name == res + + check(primitive2, False, "EOL", 0) + check(primitive1, True, "NAME", 2) + check(primitive1, False, "NAME", 0) + check(primitive2, True, "EOL", 3) + check(primitive2, False, "EOL", 0) + check(primitive1, True, "NAME", 1) + check(primitive1, False, "NAME", 0) + check(primitive2, False, "EOL", 0) +def test_simple_any_amount (): + primitive1 = AugmentedPrimitive( TokenPrimitive( "NAME", stored=True ), augment=AugmentedType.ANY_AMOUNT ) + primitive2 = AugmentedPrimitive( TokenPrimitive( "EOL", stored=True ), augment=AugmentedType.ANY_AMOUNT ) + + tokens = [ + MockToken( "NAME", "A" ), + MockToken( "NAME", "A" ), + MockToken( "EOL", "A" ), + MockToken( "EOL", "A" ), + MockToken( "EOL", "A" ), + MockToken( "NAME", "A" ) + ] + stream = ParserStream(tokens) + context = ParserContext() + + def check (prim: ParserNode, should_work: bool, res: str, amount: int): + with stream as (atomic, state): + result = prim.evaluate(stream, context) + if should_work: assert result == ParsingResult.SUCCESS + else : assert result == ParsingResult.IGNORED + + args = state.poll_stored() + assert len(args) == amount + for i, a in enumerate(args): + assert a.name == res + + check(primitive2, False, "EOL", 0) + check(primitive1, True, "NAME", 2) + check(primitive1, False, "NAME", 0) + check(primitive2, True, "EOL", 3) + check(primitive2, False, "EOL", 0) + check(primitive1, True, "NAME", 1) + check(primitive1, False, "NAME", 0) + check(primitive2, False, "EOL", 0) + +def test_prim_type_err (): + def prim_type__f (*args): + if args[0].value == "!if": + raise NotImplementedError() + else: return args[0] + prim = AugmentedPrimitive( TokenPrimitive("NAME", True), prim_type=prim_type__f ) + + stream = ParserStream([ MockToken("NAME", "if"), MockToken("NAME", "!if") ]) + ctx = ParserContext() + + with stream as (atomic, state): + assert state.size == 2 + assert prim.evaluate(stream, ctx) == ParsingResult.SUCCESS + stored = state.poll_stored() + assert state.size == 1 + assert len(stored) == 1 + assert stored[0].name == "NAME" and stored[0].value == "if" + assert prim.evaluate(stream, ctx) == ParsingResult.FAILED + stored = state.poll_stored() + assert len(stored) == 0 + assert state.size == 1 +def test_prim_type_none (): + def prim_type__f (*args): + return args + + primitive1 = AugmentedPrimitive( TokenPrimitive( "NAME", expects="if" ), prim_type=prim_type__f ) + primitive2 = AugmentedPrimitive( TokenPrimitive( "EOL", stored=True ), prim_type=prim_type__f ) + primitive3 = AugmentedPrimitive( TokenPrimitive( "NAME" ), prim_type=prim_type__f ) + primitive4 = AugmentedPrimitive( TokenPrimitive( "NAME", expects="!if", stored=True ), prim_type=prim_type__f ) + + T = [ MockToken("EOL", "\n"), MockToken("NAME", "if"), MockToken("NAME", "!if") ] + context = ParserContext() + stream = ParserStream(T) + + def check_eq (arr): + with stream as (atomic, state): + args = state._ParserCursor__arguments + size = state._ParserCursor__arg_size + + assert size == len(arr) + assert args[:size] == arr + def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True): + with stream as (atomic, state): + assert prim.evaluate(stream, context) == result + + check_eq([tuple(arr)] if result == ParsingResult.SUCCESS else []) + if rollback: atomic.rollback() + def advance (): + with stream as (atomic, state): + state.poll() + + evaluate( primitive1, ParsingResult.FAILED, [] ) + evaluate( primitive2, ParsingResult.SUCCESS, [ T[0] ] ) + evaluate( primitive3, ParsingResult.FAILED, [] ) + evaluate( primitive4, ParsingResult.FAILED, [] ) + advance() + + evaluate( primitive1, ParsingResult.SUCCESS, [] ) + evaluate( primitive2, ParsingResult.FAILED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.FAILED, [] ) + advance() + + evaluate( primitive1, ParsingResult.FAILED, [] ) + evaluate( primitive2, ParsingResult.FAILED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.SUCCESS, [ T[2] ] ) + advance() + diff --git a/tests/parser/primitives/test_primitive_call.py b/tests/parser/primitives/test_primitive_call.py new file mode 100644 index 0000000..db1daad --- /dev/null +++ b/tests/parser/primitives/test_primitive_call.py @@ -0,0 +1,36 @@ + +from polyparser.parser.context import ParserContext +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream +from tests.parser.primitives.test_primitive_token import MockToken + + +def test_error_calls (): + context = ParserContext() + + context.set_element( "name2", "not a node" ) + + parser = ParserStream ( [ "abc", "def" ] ) + + assert CallPrimitive( "name1" ).evaluate( parser, context ) == ParsingResult.FAILED + assert CallPrimitive( "name2" ).evaluate( parser, context ) == ParsingResult.FAILED +def test_simple_call (): + context = ParserContext() + + context.set_element("if", TokenPrimitive( "if", True )) + + parser = ParserStream( + [ + MockToken("if", "if"), + MockToken("fi", "fi") + ] + ) + + primitive = CallPrimitive( "if" ) + assert primitive.evaluate( parser, context ) == ParsingResult.SUCCESS + assert primitive.evaluate( parser, context ) == ParsingResult.FAILED + + with parser as (atomic, state): + assert state._ParserCursor__arguments == [ parser.tokens[0] ] diff --git a/tests/parser/primitives/test_primitive_list.py b/tests/parser/primitives/test_primitive_list.py new file mode 100644 index 0000000..5bd844c --- /dev/null +++ b/tests/parser/primitives/test_primitive_list.py @@ -0,0 +1,69 @@ + +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream +from tests.parser.primitives.test_primitive_token import MockToken + + +class AlwaysIgnore (ParserNode): + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + return ParsingResult.IGNORED + +def test_simple_list (): + primitive = ListPrimitive( + TokenPrimitive( "DASH" ), + TokenPrimitive( "NAME", True ), + TokenPrimitive( "DASH" ) + ) + + def test_tokens (*tokens: MockToken, should_work = False): + stream = ParserStream(tokens) + ctx = ParserContext() + + with stream as (atomic, state): + result = primitive.evaluate(stream, ctx) + if should_work: + assert result == ParsingResult.SUCCESS + + args = state.poll_stored() + assert len(args) == 1 and args[0] is tokens[1] + else: + assert result == ParsingResult.FAILED + + args = state.poll_stored() + assert len(args) == 0 + + test_tokens( + MockToken( "DASH", "/" ), + MockToken( "NAME", "DASH" ), + MockToken( "DASH", "/" ), + should_work=True + ) + test_tokens( + MockToken( "DASH", "/" ), + MockToken( "NAME2", "DASH" ), + MockToken( "DASH", "/" ) + ) +def test_ignored_list (): + primitive = ListPrimitive( AlwaysIgnore() ) + + stream = ParserStream([ MockToken("A", "B") ]) + ctx = ParserContext() + + with stream as (atomic, state): + assert primitive.evaluate(stream, ctx) == ParsingResult.IGNORED + assert state.poll_stored() == [] + assert state.poll().name == "A" + primitive = ListPrimitive( AlwaysIgnore(), TokenPrimitive( "A", True ) ) + + stream = ParserStream([ MockToken("A", "B") ]) + ctx = ParserContext() + + with stream as (atomic, state): + assert primitive.evaluate(stream, ctx) == ParsingResult.SUCCESS + stored = state.poll_stored() + assert len(stored) == 1 and stored[0].name == "A" and state.size == 0 + diff --git a/tests/parser/primitives/test_primitive_or.py b/tests/parser/primitives/test_primitive_or.py new file mode 100644 index 0000000..c1114ad --- /dev/null +++ b/tests/parser/primitives/test_primitive_or.py @@ -0,0 +1,70 @@ + +from polyparser.parser.context import ParserContext +from polyparser.parser.primitives.branch import OrPrimitive +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream +from tests.parser.primitives.test_primitive_list import AlwaysIgnore +from tests.parser.primitives.test_primitive_token import MockToken + + +def test_trivial_or (): + primitive = OrPrimitive( + TokenPrimitive( "NAME", True, "true" ), + TokenPrimitive( "NAME", True, "false" ) + ) + + def parse (string: str, expects: bool): + stream = ParserStream( [ MockToken( "NAME", string ) ] ) + context = ParserContext() + + with stream as (atomic, state): + assert state.size == 1 + result = primitive.evaluate( stream, context ) + stored = state.poll_stored() + + if expects: + assert state.size == 0 + assert result == ParsingResult.SUCCESS + assert len(stored) == 1 + assert stored[0].name == "NAME" and stored[0].value == string + else: + assert state.size == 1 + assert result == ParsingResult.FAILED + assert len(stored) == 0 + + parse ("true", True) + parse ("false", True) + parse ("none", False) + parse ("nothing", False) +def test_trivial_or_with_ignored (): + primitive = OrPrimitive( + AlwaysIgnore(), + TokenPrimitive( "NAME", True, "true" ), + TokenPrimitive( "NAME", True, "false" ) + ) + + def parse (string: str, expects: bool): + stream = ParserStream( [ MockToken( "NAME", string ) ] ) + context = ParserContext() + + with stream as (atomic, state): + assert state.size == 1 + result = primitive.evaluate( stream, context ) + stored = state.poll_stored() + + if expects: + assert state.size == 0 + assert result == ParsingResult.SUCCESS + assert len(stored) == 1 + assert stored[0].name == "NAME" and stored[0].value == string + else: + assert state.size == 1 + assert result == ParsingResult.IGNORED + assert len(stored) == 0 + + parse ("true", True) + parse ("false", True) + parse ("none", False) + parse ("nothing", False) diff --git a/tests/parser/primitives/test_primitive_token.py b/tests/parser/primitives/test_primitive_token.py new file mode 100644 index 0000000..5ee482c --- /dev/null +++ b/tests/parser/primitives/test_primitive_token.py @@ -0,0 +1,67 @@ + +from polyparser.parser.context import ParserContext +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + +class MockToken: + def __init__(self, name: str, value: str): + self.name = name + self.value = value + +def test_is_valid_primitive (): + primitive = TokenPrimitive( "if" ) + + assert primitive.is_valid( MockToken( "if", "if(expr) {}" ) ) + assert not primitive.is_valid( MockToken( "!if", "if(expr) {}" ) ) + + primitive = TokenPrimitive( "if", expects="if(expr) {}" ) + + assert primitive.is_valid( MockToken( "if", "if(expr) {}" ) ) + assert not primitive.is_valid( MockToken( "!if", "if(expr) {}" ) ) + assert not primitive.is_valid( MockToken( "if", "if(expr2) {}" ) ) +def test_primitive (): + primitive1 = TokenPrimitive( "NAME", expects="if" ) + primitive2 = TokenPrimitive( "EOL", stored=True ) + primitive3 = TokenPrimitive( "NAME" ) + primitive4 = TokenPrimitive( "NAME", expects="!if", stored=True ) + + T = [ MockToken("EOL", "\n"), MockToken("NAME", "if"), MockToken("NAME", "!if") ] + context = ParserContext() + stream = ParserStream(T) + + def check_eq (arr): + with stream as (atomic, state): + args = state._ParserCursor__arguments + size = state._ParserCursor__arg_size + + assert size == len(arr) + assert args[:size] == arr + def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True): + with stream as (atomic, state): + assert prim.evaluate(stream, context) == result + + check_eq(arr) + if rollback: atomic.rollback() + def advance (): + with stream as (atomic, state): + state.poll() + + evaluate( primitive1, ParsingResult.FAILED, [] ) + evaluate( primitive2, ParsingResult.SUCCESS, [ T[0] ] ) + evaluate( primitive3, ParsingResult.FAILED, [] ) + evaluate( primitive4, ParsingResult.FAILED, [] ) + advance() + + evaluate( primitive1, ParsingResult.SUCCESS, [] ) + evaluate( primitive2, ParsingResult.FAILED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.FAILED, [] ) + advance() + + evaluate( primitive1, ParsingResult.FAILED, [] ) + evaluate( primitive2, ParsingResult.FAILED, [] ) + evaluate( primitive3, ParsingResult.SUCCESS, [] ) + evaluate( primitive4, ParsingResult.SUCCESS, [ T[2] ] ) + advance() + diff --git a/tests/parser/test_context.py b/tests/parser/test_context.py new file mode 100644 index 0000000..030260b --- /dev/null +++ b/tests/parser/test_context.py @@ -0,0 +1,34 @@ + +from typing import Any, Dict, List, Tuple +from polyparser.parser.context import ParserContext + +def check_context (context: ParserContext, names: List[str], values: Dict[str, Any] = {}): + for name in names: + if name in values: + assert context.get_element(name) == (values[name], True) + else : + assert context.get_element(name) == (None, False) + +def test_simple_context (): + context = ParserContext() + + names = [ "abc", "def", "rgb" ] + + check_context(context, names) + context.set_element( "abc", "Hi !" ) + check_context(context, names, { "abc": "Hi !" }) + context.set_element( "def", "def2" ) + check_context(context, names, { "abc": "Hi !", "def": "def2" }) +def test_double_context (): + context1 = ParserContext() + context2 = ParserContext(context1) + + names = [ "abc", "def", "rgb" ] + check_context(context2, names) + check_context(context1, names) + context2.set_element( "abc", "Hi !" ) + check_context(context2, names, { "abc": "Hi !" }) + check_context(context1, names) + context1.set_element( "def", "def2" ) + check_context(context2, names, { "abc": "Hi !", "def": "def2" }) + check_context(context1, names, { "def": "def2" }) diff --git a/tests/parser/test_node.py b/tests/parser/test_node.py new file mode 100644 index 0000000..aec6de0 --- /dev/null +++ b/tests/parser/test_node.py @@ -0,0 +1,15 @@ + +import pytest + +from polyparser.parser.context import ParserContext +from polyparser.parser.node import ParserNode +from polyparser.parser.stream import ParserStream + + +def test_node_interface (): + with pytest.raises(NotImplementedError): + context = ParserContext() + stream = ParserStream ([]) + + node = ParserNode() + node.evaluate( stream, context ) \ No newline at end of file diff --git a/tests/parser/test_parser_init.py b/tests/parser/test_parser_init.py new file mode 100644 index 0000000..d9c15cb --- /dev/null +++ b/tests/parser/test_parser_init.py @@ -0,0 +1,9 @@ + +import pytest + +from polyparser.parser import Parser + + +def test_parser_error (): + with pytest.raises(NotImplementedError): + Parser() diff --git a/tests/parser/test_result.py b/tests/parser/test_result.py new file mode 100644 index 0000000..490fdb9 --- /dev/null +++ b/tests/parser/test_result.py @@ -0,0 +1,22 @@ + +from polyparser.parser.result import ParsingResult + + +def test_enum (): + assert set(ParsingResult._member_names_) == set([ "SUCCESS", "FAILED", "IGNORED" ]) + + assert ParsingResult.FAILED .value == 0 + assert ParsingResult.IGNORED.value == 1 + assert ParsingResult.SUCCESS.value == 2 + + assert ParsingResult.FAILED .is_failed () + assert not ParsingResult.FAILED .is_ignored() + assert not ParsingResult.FAILED .is_success() + + assert not ParsingResult.IGNORED.is_failed () + assert ParsingResult.IGNORED.is_ignored() + assert not ParsingResult.IGNORED.is_success() + + assert not ParsingResult.SUCCESS.is_failed () + assert not ParsingResult.SUCCESS.is_ignored() + assert ParsingResult.SUCCESS.is_success() diff --git a/tests/parser/test_stream.py b/tests/parser/test_stream.py new file mode 100644 index 0000000..db4d132 --- /dev/null +++ b/tests/parser/test_stream.py @@ -0,0 +1,56 @@ + +import pytest +from polyparser.parser.stream import ParserStream + + +def test_parser_stream (): + L = [ "a", "b", "c", "d" ] + + stream = ParserStream(L) + + for i in range(len(L)): + with stream as (atomic, state): + assert state.size == len(state) == len(L) - i + assert L[i] == state.peek() + assert L[i] == state.poll() + assert state.size == len(state) == len(L) - i - 1 + + with stream as (atomic, state): + with pytest.raises(Exception): + state.peek() + with pytest.raises(Exception): + state.poll() + +def test_parser_stream_arglist (): + L = [ "a", "b", "c", "d" ] + + stream = ParserStream(L) + + with stream as (atomic1, state1): + state1.store( "H" ) + + with stream as (atomic2, state2): + state2.store("ello") + data = state2.poll_stored() + + state2.store("ello, World !") + assert data == [ "ello" ] + + assert state1.poll_stored() == [ "H", "ello, World !" ] + +def test_parser_stream_arglist_rollback (): + L = [ "a", "b", "c", "d" ] + + stream = ParserStream(L) + + with stream as (atomic1, state1): + state1.store( "H" ) + + with stream as (atomic2, state2): + state2.store("ello") + atomic2.rollback() + + with stream as (atomic2, state2): + state2.store("ello, World !") + + assert state1.poll_stored() == [ "H", "ello, World !" ] From 02679906d33fe82b283f722881c3e7b6201c6679 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Tue, 15 Oct 2024 12:02:32 +0200 Subject: [PATCH 2/8] add-parser: fixed doc --- .github/workflows/documentation.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml index 41c8d2f..45e4283 100644 --- a/.github/workflows/documentation.yaml +++ b/.github/workflows/documentation.yaml @@ -18,7 +18,7 @@ jobs: - uses: actions/setup-python@v5 - name: Install dependencies run: | - pip install sphinx + sudo apt-get install python3-sphinx - name: Sphinx build run: | branches=`git branch -r | cut -c 3- | sed 's/origin\///g'` From 27a6c223c5ef1c01644073da1156e422d7220a56 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Tue, 3 Dec 2024 18:45:45 +0100 Subject: [PATCH 3/8] add-parser: added functions into the parser --- polyparser/parser/node.py | 24 ++++ polyparser/parser/primitives/augmented.py | 8 +- polyparser/parser/primitives/branch.py | 6 +- polyparser/parser/primitives/call.py | 16 ++- polyparser/parser/primitives/function.py | 27 ++++ polyparser/parser/primitives/list.py | 27 +++- polyparser/parser/primitives/token.py | 7 +- tests/parser/primitives/test_function.py | 134 ++++++++++++++++++ .../primitives/test_primitive_augmented.py | 10 ++ .../parser/primitives/test_primitive_list.py | 87 ++++++++++++ tests/parser/primitives/test_primitive_or.py | 4 + .../parser/primitives/test_primitive_token.py | 14 +- tests/parser/test_node.py | 39 ++++- 13 files changed, 384 insertions(+), 19 deletions(-) create mode 100644 polyparser/parser/primitives/function.py create mode 100644 tests/parser/primitives/test_function.py diff --git a/polyparser/parser/node.py b/polyparser/parser/node.py index c31439f..5057dc5 100644 --- a/polyparser/parser/node.py +++ b/polyparser/parser/node.py @@ -1,8 +1,32 @@ +from typing import Any, List, Tuple from polyparser.parser.context import ParserContext from polyparser.parser.result import ParsingResult from polyparser.parser.stream import ParserStream +import enum + +class ParserNodeType(enum.Enum): + PRIMITIVE = 0 + FUNCTION = 1 + class ParserNode: + def call (self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + raise NotImplementedError() def evaluate (self, stream: ParserStream, context: "ParserContext") -> ParsingResult: raise NotImplementedError() + +class BoundNode(ParserNode): + __sub_node: ParserNode + __context : ParserContext + + def __init__(self, node: ParserNode, context: ParserContext) -> None: + super().__init__() + + self.__sub_node = node + self.__context = context + + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + return self.__sub_node.call(stream, self.__context, arguments) + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + return self.__sub_node.evaluate(stream, self.__context) diff --git a/polyparser/parser/primitives/augmented.py b/polyparser/parser/primitives/augmented.py index 685c002..2805223 100644 --- a/polyparser/parser/primitives/augmented.py +++ b/polyparser/parser/primitives/augmented.py @@ -1,6 +1,6 @@ import enum -from typing import Any +from typing import Any, List from polyparser.parser.context import ParserContext from polyparser.parser.node import ParserNode from polyparser.parser.result import ParsingResult @@ -18,6 +18,12 @@ def __init__(self, subprimitive: ParserNode, augment: AugmentedType = AugmentedT self.__augment = augment self.__prim_type = prim_type + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + if len(arguments) == 0: + return self.evaluate(stream, context) + if self.__augment != AugmentedType.NONE or self.__prim_type is not None: + raise NotImplementedError( "A non empty augmented primitive cannot be called" ) + return self.__sub_primitive.call( stream, context, arguments ) def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: min_amount = 0 if (self.__augment.value & 1) == 0: diff --git a/polyparser/parser/primitives/branch.py b/polyparser/parser/primitives/branch.py index bb06ead..792e260 100644 --- a/polyparser/parser/primitives/branch.py +++ b/polyparser/parser/primitives/branch.py @@ -1,5 +1,5 @@ -from typing import List +from typing import Any, List from polyparser.parser.context import ParserContext from polyparser.parser.node import ParserNode from polyparser.parser.result import ParsingResult @@ -13,6 +13,10 @@ def __init__(self, *primitives: ParserNode) -> None: super().__init__() self.__primitives = list(primitives) + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + if len(arguments) == 0: + return self.evaluate(stream, context) + raise NotImplementedError( "An or primitive cannot be called" ) def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: with stream as (atomic, state): had_one_ignored = False diff --git a/polyparser/parser/primitives/call.py b/polyparser/parser/primitives/call.py index acba777..02d5394 100644 --- a/polyparser/parser/primitives/call.py +++ b/polyparser/parser/primitives/call.py @@ -1,12 +1,11 @@ -from typing import List +from typing import Any, List from polyparser.parser.context import ParserContext -from polyparser.parser.node import ParserNode +from polyparser.parser.node import BoundNode, ParserNode, ParserNodeType from polyparser.parser.primitives.list import ListPrimitive from polyparser.parser.result import ParsingResult from polyparser.parser.stream import ParserStream - class CallPrimitive(ParserNode): __name: str __args: List[ParserNode] @@ -16,10 +15,13 @@ def __init__(self, name: str, *arguments: ParserNode) -> None: self.__args = list(arguments) - def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): target, exists = context.get_element(self.__name) - + if exists and isinstance(target, ParserNode): - # TODO instantiate with self context - return target.evaluate(stream, context) + new_args = list(map(lambda arg : BoundNode( arg, context ), self.__args)) + print(new_args, arguments) + return target.call(stream, context, new_args + arguments) return ParsingResult.FAILED + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + return self.call( stream, context, [] ) diff --git a/polyparser/parser/primitives/function.py b/polyparser/parser/primitives/function.py new file mode 100644 index 0000000..d6dd8bd --- /dev/null +++ b/polyparser/parser/primitives/function.py @@ -0,0 +1,27 @@ + +from typing import Any, List +from polyparser.parser.context import ParserContext +from polyparser.parser.node import BoundNode, ParserNode +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + + +class FunctionNode(ParserNode): + def __init__(self, name: str, target: ParserNode, args_names: List[str]): + self.__name = name + self.__target = target + + self.__args_names = args_names + + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + if len(arguments) == len(self.__args_names): + new_context = ParserContext( context ) + for arg_name, arg in zip(self.__args_names, arguments): + new_context.set_element( arg_name, arg ) + + return self.__target.evaluate( stream, new_context ) + else: + raise NotImplementedError( f"Argument count for method '{self.__name}' is wrong, expected {len(self.__args_names)} got {len(arguments)}." ) + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + context.set_element( self.__name, BoundNode( self, context ) ) + return ParsingResult.IGNORED diff --git a/polyparser/parser/primitives/list.py b/polyparser/parser/primitives/list.py index ba3067c..d24f045 100644 --- a/polyparser/parser/primitives/list.py +++ b/polyparser/parser/primitives/list.py @@ -1,23 +1,40 @@ -from typing import List +from typing import Any, List, Tuple from polyparser.parser.context import ParserContext -from polyparser.parser.node import ParserNode +from polyparser.parser.node import ParserNode, ParserNodeType from polyparser.parser.result import ParsingResult from polyparser.parser.stream import ParserStream class ListPrimitive(ParserNode): - primitives: List[ParserNode] + __primitives: List[ParserNode] def __init__(self, *primitives: List[ParserNode]) -> None: super().__init__() - self.primitives = primitives + self.__primitives = primitives + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + if len(arguments) == 0: + return self.evaluate(stream, context) + + with stream as (atomic, state): + result = ParsingResult.IGNORED + + for subprimitive in self.__primitives: + next_result = subprimitive.call(stream, context, arguments) + + if next_result == ParsingResult.SUCCESS: + result = next_result + elif next_result == ParsingResult.FAILED: + atomic.rollback() + return ParsingResult.FAILED + + return result def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: with stream as (atomic, state): result = ParsingResult.IGNORED - for subprimitive in self.primitives: + for subprimitive in self.__primitives: next_result = subprimitive.evaluate(stream, context) if next_result == ParsingResult.SUCCESS: diff --git a/polyparser/parser/primitives/token.py b/polyparser/parser/primitives/token.py index ea70232..cf704cb 100644 --- a/polyparser/parser/primitives/token.py +++ b/polyparser/parser/primitives/token.py @@ -1,7 +1,8 @@ +from typing import Any, List, Tuple from polyparser.lexer.token import Token from polyparser.parser.context import ParserContext -from polyparser.parser.node import ParserNode +from polyparser.parser.node import ParserNode, ParserNodeType from polyparser.parser.result import ParsingResult from polyparser.parser.stream import ParserStream @@ -22,6 +23,10 @@ def is_valid (self, token: Token): return token.name == self.__name \ and (self.__expects is None \ or self.__expects == token.value) + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + if len(arguments) == 0: + return self.evaluate(stream, context) + raise NotImplementedError( "A token primitive cannot be called" ) def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: with stream as (atomic, state): if state.size == 0: return ParsingResult.FAILED diff --git a/tests/parser/primitives/test_function.py b/tests/parser/primitives/test_function.py new file mode 100644 index 0000000..3362517 --- /dev/null +++ b/tests/parser/primitives/test_function.py @@ -0,0 +1,134 @@ + +import pytest +from polyparser.parser.context import ParserContext +from polyparser.parser.node import BoundNode, ParserNode +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.primitives.function import FunctionNode +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + +def test_simple_function (): + gcontext = ParserContext() + gcontext.set_element( "name1", "value" ) + vstream = ParserStream([]) + + class T (ParserNode): + visit = 0 + def __init__(self, res): + self.res = res + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + self.visit += 1 + + assert context != gcontext + assert context.get_element("name1") == ("value", True) + assert context.get_element("name2") == (None, False) + assert stream == vstream + return self.res + + succ = T( ParsingResult.SUCCESS ); FunctionNode( "succ", succ, [] ).evaluate( vstream, gcontext ) + fail = T( ParsingResult.FAILED ); FunctionNode( "fail", fail, [] ).evaluate( vstream, gcontext ) + ignr = T( ParsingResult.IGNORED ); FunctionNode( "ignr", ignr, [] ).evaluate( vstream, gcontext ) + + def check (name: str): + value, exists = gcontext.get_element(name) + assert exists + assert isinstance(value, BoundNode) + check("succ") + check("fail") + check("ignr") + + assert CallPrimitive( "succ" ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + assert CallPrimitive( "succ" ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + assert CallPrimitive( "fail" ).evaluate( vstream, gcontext ) == ParsingResult.FAILED + assert CallPrimitive( "ignr" ).evaluate( vstream, gcontext ) == ParsingResult.IGNORED + + assert succ.visit == 2 + assert fail.visit == 1 + assert ignr.visit == 1 + + with pytest.raises(NotImplementedError, match="Argument count for method 'succ' is wrong, expected 0 got 1."): + assert CallPrimitive( "succ", "arg1" ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + +def test_function_with_arguments (): + gcontext = ParserContext() + gcontext.set_element( "name1", "value" ) + vstream = ParserStream([]) + + class T (ParserNode): + visit = 0 + def __init__(self, res): + self.res = res + def call(self, stream: ParserStream, context: ParserContext, arguments) -> ParsingResult: + assert arguments == [] + self.visit += 1 + + assert context == gcontext + assert context.get_element("name1") == ("value", True) + assert context.get_element("name2") == (None, False) + assert stream == vstream + return self.res + + func = FunctionNode("func", CallPrimitive("target"), [ "target" ]) + func.evaluate( vstream, gcontext ) + + succ = T( ParsingResult.SUCCESS ) + fail = T( ParsingResult.FAILED ) + ignr = T( ParsingResult.IGNORED ) + + def check (name: str): + value, exists = gcontext.get_element(name) + assert exists + assert isinstance(value, BoundNode) + check("func") + + assert CallPrimitive( "func", succ ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + assert CallPrimitive( "func", succ ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + assert CallPrimitive( "func", fail ).evaluate( vstream, gcontext ) == ParsingResult.FAILED + assert CallPrimitive( "func", ignr ).evaluate( vstream, gcontext ) == ParsingResult.IGNORED + + assert succ.visit == 2 + assert fail.visit == 1 + assert ignr.visit == 1 + + with pytest.raises(NotImplementedError, match="Argument count for method 'func' is wrong, expected 1 got 0."): + assert CallPrimitive( "func" ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + with pytest.raises(NotImplementedError, match="Argument count for method 'func' is wrong, expected 1 got 2."): + assert CallPrimitive( "func", succ, fail ).evaluate( vstream, gcontext ) == ParsingResult.SUCCESS + +def test_higher_order_function (): + gcontext = ParserContext() + gcontext.set_element( "name1", "value" ) + vstream = ParserStream([]) + + class T(ParserNode): + visit = 0 + def __init__(self, res): + self.res = res + def call(self, stream: ParserStream, context: ParserContext, arguments) -> ParsingResult: + assert arguments == [] + self.visit += 1 + + assert context == gcontext + assert context.get_element("name1") == ("value", True) + assert context.get_element("name2") == (None, False) + assert stream == vstream + return self.res + + # + # def func2 (target, target2) = + # target(target2) + # def func (target) = + # target() + # + # // Run the following program + # func2(func, T) + # + + func = FunctionNode("func", CallPrimitive("target"), [ "target" ]) + func.evaluate( vstream, gcontext ) + func2 = FunctionNode("func2", CallPrimitive("target", CallPrimitive("target2")), [ "target", "target2" ]) + func2.evaluate( vstream, gcontext ) + + value = T(ParsingResult.SUCCESS) + assert CallPrimitive( "func2", CallPrimitive("func"), value ).evaluate( vstream, gcontext ) + assert value.visit == 1 diff --git a/tests/parser/primitives/test_primitive_augmented.py b/tests/parser/primitives/test_primitive_augmented.py index ecc2923..9fba006 100644 --- a/tests/parser/primitives/test_primitive_augmented.py +++ b/tests/parser/primitives/test_primitive_augmented.py @@ -1,4 +1,5 @@ +import pytest from polyparser.parser.context import ParserContext from polyparser.parser.node import ParserNode from polyparser.parser.primitives.augmented import AugmentedPrimitive, AugmentedType @@ -30,6 +31,9 @@ def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True) check_eq(arr) if rollback: atomic.rollback() + with pytest.raises(NotImplementedError, match="A token primitive cannot be called"): + with stream as (atomic, state): + prim.call(stream, context, [ "some args" ]) def advance (): with stream as (atomic, state): state.poll() @@ -75,6 +79,9 @@ def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True) check_eq(arr) if rollback: atomic.rollback() + with pytest.raises(NotImplementedError, match="A non empty augmented primitive cannot be called"): + with stream as (atomic, state): + prim.call(stream, context, [ "some args" ]) def advance (): with stream as (atomic, state): state.poll() @@ -213,6 +220,9 @@ def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True) check_eq([tuple(arr)] if result == ParsingResult.SUCCESS else []) if rollback: atomic.rollback() + with pytest.raises(NotImplementedError, match="A non empty augmented primitive cannot be called"): + with stream as (atomic, state): + prim.call(stream, context, [ "some args" ]) def advance (): with stream as (atomic, state): state.poll() diff --git a/tests/parser/primitives/test_primitive_list.py b/tests/parser/primitives/test_primitive_list.py index 5bd844c..39a843e 100644 --- a/tests/parser/primitives/test_primitive_list.py +++ b/tests/parser/primitives/test_primitive_list.py @@ -1,4 +1,6 @@ +from typing import Any, List +import pytest from polyparser.parser.context import ParserContext from polyparser.parser.node import ParserNode from polyparser.parser.primitives.list import ListPrimitive @@ -23,6 +25,11 @@ def test_tokens (*tokens: MockToken, should_work = False): stream = ParserStream(tokens) ctx = ParserContext() + with stream as (_a, _s): + with pytest.raises(NotImplementedError, match="A token primitive cannot be called"): + with stream as (atomic, state): + result = primitive.call(stream, ctx, [ "args" ]) + _a.rollback() with stream as (atomic, state): result = primitive.evaluate(stream, ctx) if should_work: @@ -35,6 +42,20 @@ def test_tokens (*tokens: MockToken, should_work = False): args = state.poll_stored() assert len(args) == 0 + atomic.rollback() + with stream as (atomic, state): + result = primitive.call(stream, ctx, []) + if should_work: + assert result == ParsingResult.SUCCESS + + args = state.poll_stored() + assert len(args) == 1 and args[0] is tokens[1] + else: + assert result == ParsingResult.FAILED + + args = state.poll_stored() + assert len(args) == 0 + atomic.rollback() test_tokens( MockToken( "DASH", "/" ), @@ -67,3 +88,69 @@ def test_ignored_list (): stored = state.poll_stored() assert len(stored) == 1 and stored[0].name == "A" and state.size == 0 +def test_list_pass_args (): + vargs = "some passed args" + vcontext = ParserContext() + vstream = ParserStream([]) + class C (ParserNode): + def __init__(self) -> None: + self.visit = 0 + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + self.visit += 1 + assert stream == vstream + assert context == vcontext + assert arguments == vargs + return ParsingResult.SUCCESS + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + raise NotImplementedError() + + p1, p2 = C(), C() + prim = ListPrimitive( p1, p2 ) + assert prim.call( vstream, vcontext, vargs ) == ParsingResult.SUCCESS + + assert p1.visit == 1 + assert p2.visit == 1 +def test_list_pass_args_ignored (): + vargs = "some passed args" + vcontext = ParserContext() + vstream = ParserStream([]) + class C (ParserNode): + def __init__(self) -> None: + self.visit = 0 + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + self.visit += 1 + assert stream == vstream + assert context == vcontext + assert arguments == vargs + return ParsingResult.IGNORED + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + raise NotImplementedError() + + p1, p2 = C(), C() + prim = ListPrimitive( p1, p2 ) + assert prim.call( vstream, vcontext, vargs ) == ParsingResult.IGNORED + + assert p1.visit == 1 + assert p2.visit == 1 +def test_list_pass_args_failed (): + vargs = "some passed args" + vcontext = ParserContext() + vstream = ParserStream([]) + class C (ParserNode): + def __init__(self, res) -> None: + self.visit = 0 + self.res = res + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + self.visit += 1 + assert stream == vstream + assert context == vcontext + assert arguments == vargs + return self.res + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + raise NotImplementedError() + + ps = C( ParsingResult.IGNORED ), C( ParsingResult.SUCCESS ), C( ParsingResult.IGNORED ), C(ParsingResult.FAILED), C(ParsingResult.SUCCESS) + prim = ListPrimitive( *ps ) + assert prim.call( vstream, vcontext, vargs ) == ParsingResult.FAILED + + assert list(map(lambda x: x.visit, ps)) == [ 1, 1, 1, 1, 0 ] diff --git a/tests/parser/primitives/test_primitive_or.py b/tests/parser/primitives/test_primitive_or.py index c1114ad..9a5c1d0 100644 --- a/tests/parser/primitives/test_primitive_or.py +++ b/tests/parser/primitives/test_primitive_or.py @@ -1,4 +1,5 @@ +import pytest from polyparser.parser.context import ParserContext from polyparser.parser.primitives.branch import OrPrimitive from polyparser.parser.primitives.list import ListPrimitive @@ -33,6 +34,9 @@ def parse (string: str, expects: bool): assert state.size == 1 assert result == ParsingResult.FAILED assert len(stored) == 0 + with pytest.raises(NotImplementedError, match="An or primitive cannot be called"): + with stream as (atomic, state): + primitive.call(stream, context, [ "some args" ]) parse ("true", True) parse ("false", True) diff --git a/tests/parser/primitives/test_primitive_token.py b/tests/parser/primitives/test_primitive_token.py index 5ee482c..24dcf28 100644 --- a/tests/parser/primitives/test_primitive_token.py +++ b/tests/parser/primitives/test_primitive_token.py @@ -43,6 +43,17 @@ def evaluate (prim: TokenPrimitive, result: ParsingResult, arr, rollback = True) check_eq(arr) if rollback: atomic.rollback() + with stream as (atomic, state): + assert prim.call(stream, context, []) == result + + check_eq(arr) + if rollback: atomic.rollback() + with stream as (atomic, state): + try: + assert prim.call(stream, context, [ None ]) == result + except NotImplementedError as error: + assert error.args[0] == "A token primitive cannot be called" + if rollback: atomic.rollback() def advance (): with stream as (atomic, state): state.poll() @@ -63,5 +74,4 @@ def advance (): evaluate( primitive2, ParsingResult.FAILED, [] ) evaluate( primitive3, ParsingResult.SUCCESS, [] ) evaluate( primitive4, ParsingResult.SUCCESS, [ T[2] ] ) - advance() - + advance() \ No newline at end of file diff --git a/tests/parser/test_node.py b/tests/parser/test_node.py index aec6de0..1f030fa 100644 --- a/tests/parser/test_node.py +++ b/tests/parser/test_node.py @@ -1,8 +1,10 @@ +from typing import Any, List import pytest from polyparser.parser.context import ParserContext -from polyparser.parser.node import ParserNode +from polyparser.parser.node import BoundNode, ParserNode +from polyparser.parser.result import ParsingResult from polyparser.parser.stream import ParserStream @@ -12,4 +14,37 @@ def test_node_interface (): stream = ParserStream ([]) node = ParserNode() - node.evaluate( stream, context ) \ No newline at end of file + node.evaluate( stream, context ) + with pytest.raises(NotImplementedError): + context = ParserContext() + stream = ParserStream ([]) + + node = ParserNode() + node.call( stream, context, [] ) + +def test_bound_node (): + vcontext = ParserContext() + vstream = ParserStream ([]) + + vargs = "some arguments, not a list but should go through" + + class BVN (ParserNode): + def __init__(self, v1, v2): + self.v1, self.v2 = v1, v2 + def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): + assert stream == vstream + assert context == self.v1 + assert context != self.v2 + assert context != vcontext + + assert arguments == vargs + def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: + assert stream == vstream + assert context == self.v2 + assert context != self.v1 + assert context != vcontext + + bvn = BVN( ParserContext(), ParserContext() ) + + BoundNode( bvn, bvn.v1 ).call( vstream, vcontext, vargs ) + BoundNode( bvn, bvn.v2 ).evaluate( vstream, vcontext ) \ No newline at end of file From 55d4e6dede8271ff029a46bbecb72ea9d3c08025 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Wed, 4 Dec 2024 23:40:48 +0100 Subject: [PATCH 4/8] add-parser: added poly language and finished polyparser --- polyparser/languages/language.py | 340 +++++++++- polyparser/lexer/__init__.py | 5 +- polyparser/lexer/rules/abstract.py | 3 +- polyparser/lexer/rules/indentation.py | 37 ++ polyparser/parser/__init__.py | 11 +- polyparser/parser/primitives/augmented.py | 2 +- polyparser/parser/primitives/call.py | 2 +- tests/alphabet/test_poly_language.py | 610 ++++++++++++++++++ tests/lexer/rules/file_tests/indentation.txt | 8 + .../rules/file_tests/wrong-indentation.txt | 3 + tests/lexer/rules/test_indentation.py | 37 ++ 11 files changed, 1048 insertions(+), 10 deletions(-) create mode 100644 polyparser/lexer/rules/indentation.py create mode 100644 tests/alphabet/test_poly_language.py create mode 100644 tests/lexer/rules/file_tests/indentation.txt create mode 100644 tests/lexer/rules/file_tests/wrong-indentation.txt create mode 100644 tests/lexer/rules/test_indentation.py diff --git a/polyparser/languages/language.py b/polyparser/languages/language.py index 4844643..acb60aa 100644 --- a/polyparser/languages/language.py +++ b/polyparser/languages/language.py @@ -1,7 +1,26 @@ +from ast import literal_eval +import enum +from typing import Callable, Dict, List from polyparser.io.reader import FileReader from polyparser.lexer import Lexer -from polyparser.parser import Parser +from polyparser.lexer.rules.ignore import IgnoreLexerRule +from polyparser.lexer.rules.indentation import IndentationLexingRule +from polyparser.lexer.rules.keyword import KeywordLexerRule +from polyparser.lexer.rules.name import NameLexerRule +from polyparser.lexer.rules.string import StringLexerRule +from polyparser.lexer.token import Token +from polyparser.lexer.token.factory import TokenTypeFactory +from polyparser.parser import FixedContextParser, Parser +from polyparser.parser.context import ParserContext +from polyparser.parser.node import BoundNode, ParserNode +from polyparser.parser.primitives.augmented import AugmentedPrimitive, AugmentedType +from polyparser.parser.primitives.branch import OrPrimitive +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.primitives.function import FunctionNode +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.stream import ParserStream class Language: __lexer : Lexer @@ -19,4 +38,321 @@ def get_parser (self) -> Parser: def parse (self, reader: "FileReader"): tokens = self.__lexer.try_lexing(reader) - return self.__parser.try_parsing(tokens) \ No newline at end of file + return self.__parser.try_parsing(tokens) + +class PolyLanguage(Language): + alphabet: None | enum.Enum + + augments: Dict[str, Callable] + def __init__(self, augments: Dict[str, Callable] = {}): + self.alphabet = None + + self.augments = augments + + super().__init__() + + def get_alphabet (self): + if self.alphabet is None: + type_factory = TokenTypeFactory( "pll-type-factory" ) + type_factory.add_token_type( "NAME" ) # Name + type_factory.add_token_type( "INDENT" ) # End of Line + type_factory.add_token_type( "STRING" ) + type_factory.add_token_type( "DASH" ) + type_factory.add_token_type( "DEFINE" ) + type_factory.add_token_type( "TWODOTS" ) + type_factory.add_token_type( "L_SQ_B" ) + type_factory.add_token_type( "R_SQ_B" ) + type_factory.add_token_type( "L_B" ) + type_factory.add_token_type( "R_B" ) + type_factory.add_token_type( "EQUALS" ) + type_factory.add_token_type( "COMMA" ) + type_factory.add_token_type( "QMARK" ) + type_factory.add_token_type( "HAT" ) + type_factory.add_token_type( "STAR" ) + type_factory.add_token_type( "PIPE" ) + type_factory.add_token_type( "PLUS" ) + + self.alphabet = type_factory.as_enumeration() + return self.alphabet + def get_lexer(self) -> Lexer: + alphabet = self.get_alphabet() + + lexer = Lexer([ + KeywordLexerRule({ + "def": alphabet.DEFINE, + + "=": alphabet.EQUALS, + "/": alphabet.DASH, + ":": alphabet.TWODOTS, + "[": alphabet.L_SQ_B, + "]": alphabet.R_SQ_B, + "(": alphabet.L_B, + ")": alphabet.R_B, + ",": alphabet.COMMA, + "?": alphabet.QMARK, + + "|": alphabet.PIPE, + "*": alphabet.STAR, + "^": alphabet.HAT, + "+": alphabet.PLUS + }), + NameLexerRule(alphabet.NAME), + StringLexerRule("\"", alphabet.STRING), + IndentationLexingRule(alphabet.INDENT), + IgnoreLexerRule(" \r") + ]) + + return lexer + + def token_parser (self) -> ParserNode: + def as_token_primitive (*args: List[Token]): + assert 1 <= len(args) <= 4 + + stored = ((len(args) - 1) & 1) == 1 + expects = ((len(args) - 1) & 2) == 2 + + name = args[1 if stored else 0].value + + return TokenPrimitive( name, stored, literal_eval( args[-1].value ) if expects else None ) + + return AugmentedPrimitive( + ListPrimitive( + TokenPrimitive( "DASH" ), + AugmentedPrimitive( + TokenPrimitive( "DASH", stored = True ), + AugmentedType.OPTIONAL + ), + TokenPrimitive( "NAME", stored = True ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("TWODOTS", stored = True), + TokenPrimitive("STRING", stored = True) + ), + AugmentedType.OPTIONAL + ), + TokenPrimitive( "DASH" ) + ), + prim_type = as_token_primitive + ) + def call_parser (self) -> ParserNode: + def as_call_primitive (name: Token, *args: List[ParserNode]): + return CallPrimitive( + name.value, + *args + ) + + return AugmentedPrimitive( + ListPrimitive( + TokenPrimitive( "NAME", stored = True ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("L_B"), + self.list_primitive_parser(False), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("COMMA"), + self.list_primitive_parser(False) + ), + AugmentedType.ANY_AMOUNT + ), + TokenPrimitive("R_B") + ), + AugmentedType.ANY_AMOUNT + ) + ), + prim_type = as_call_primitive + ) + def list_primitive_parser (self, brackets: bool = True): + def as_list_primitive (*primitives): + return ListPrimitive( *primitives ) + + primitives = [ + TokenPrimitive("L_SQ_B"), + AugmentedPrimitive( + CallPrimitive("primitive"), + AugmentedType.ANY_AMOUNT + ), + TokenPrimitive("R_SQ_B") + ] + if not brackets: + primitives = primitives[1:-1] + return AugmentedPrimitive( + ListPrimitive( + *primitives + ), + prim_type=as_list_primitive + ) + def simple_primitive_parser (self) -> ParserNode: + return OrPrimitive( + CallPrimitive( "token_primitive" ), + CallPrimitive( "call_primitive" ), + CallPrimitive( "list_primitive" ) + ) + def augmented_primitive_parser (self) -> ParserNode: + def as_augmented_primitive (*args: List[Token | ParserNode]): + assert 1 <= len(args) <= 4 + if len(args) == 1: + return args[0] + position = 0 + + augment_type = None + if ((len(args) - 1) & 1) == 1: + type = args[0].name + position = 1 + if type == "QMARK": + augment_type = AugmentedType.OPTIONAL + if type == "STAR" : + augment_type = AugmentedType.ANY_AMOUNT + if type == "PLUS" : + augment_type = AugmentedType.AT_LEAST_ONE + prim_type = None + if ((len(args) - 1) & 2) == 2: + prim_type = self.augments[args[-1].value] + return AugmentedPrimitive( args[position], augment_type, prim_type ) + + return AugmentedPrimitive( + ListPrimitive( + AugmentedPrimitive( + OrPrimitive( + TokenPrimitive("QMARK", True), + TokenPrimitive("STAR", True), + TokenPrimitive("PLUS", True) + ), + AugmentedType.OPTIONAL + ), + CallPrimitive("simple_primitive"), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("HAT", True), + TokenPrimitive("NAME", True) + ), + AugmentedType.OPTIONAL + ) + ), + prim_type=as_augmented_primitive + ) + def or_primitive_parser (self) -> ParserNode: + def as_or_primitive (*primitives): + if len(primitives) == 1: + return primitives[0] + return OrPrimitive(*primitives) + + return AugmentedPrimitive( + ListPrimitive( + CallPrimitive( "augmented_primitive" ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("PIPE"), + CallPrimitive( "augmented_primitive" ) + ), + AugmentedType.ANY_AMOUNT + ), + ), + prim_type=as_or_primitive + ) + def primitive_parser (self) -> ParserNode: + return CallPrimitive( "or_primitive" ) + + def block_parser (self, can_be_empty = False) -> FunctionNode: + def as_block (*prim): + return ListPrimitive(*prim) + + target = AugmentedType.AT_LEAST_ONE + if can_be_empty: + target = AugmentedType.ANY_AMOUNT + + return FunctionNode( + "block", + AugmentedPrimitive( + AugmentedPrimitive( + OrPrimitive( + ListPrimitive( + CallPrimitive( "indent" ), + AugmentedPrimitive( CallPrimitive("primitive"), AugmentedType.AT_LEAST_ONE ) + ), + CallPrimitive( "function", CallPrimitive("indent") ) + ), + target + ), + prim_type=as_block + ), + [ "indent" ] + ) + def function_parser (self): + def as_function (name: Token, *args: List[Token | ParserNode]): + name = name.value + target = args[-1] + + args = args[:-1] + + has_target_type = False + argnames = [] + for token in args: + if token.name == "TWODOTS": + has_target_type = True + break + argnames.append( token.value ) + + if has_target_type: + target = AugmentedPrimitive( + target, + prim_type=self.augments[args[-1].value] + ) + + return FunctionNode( name, target, argnames ) + return FunctionNode( + "function", + AugmentedPrimitive( + ListPrimitive( + CallPrimitive("indent"), + TokenPrimitive( "DEFINE", False ), + TokenPrimitive( "NAME", True ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("L_B"), + TokenPrimitive("NAME", True), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("COMMA"), + TokenPrimitive("NAME", True) + ), + AugmentedType.ANY_AMOUNT + ), + TokenPrimitive("R_B") + ), + AugmentedType.ANY_AMOUNT + ), + AugmentedPrimitive( + ListPrimitive( + TokenPrimitive("TWODOTS", True), + TokenPrimitive("NAME", True) + ), + AugmentedType.OPTIONAL + ), + TokenPrimitive( "EQUALS", False ), + CallPrimitive( "block", ListPrimitive( CallPrimitive("indent"), TokenPrimitive("INDENT") ) ) + ), + prim_type=as_function + ), + [ "indent" ] + ) + + def get_parser(self) -> Parser: + context = ParserContext() + stream = ParserStream ([]) + + FunctionNode( "token_primitive", self.token_parser(), [] ).evaluate( stream, context ) + FunctionNode( "call_primitive", self.call_parser (), [] ).evaluate( stream, context ) + FunctionNode( "list_primitive", self.list_primitive_parser(), [] ).evaluate( stream, context ) + + FunctionNode( "simple_primitive", self.simple_primitive_parser(), [] ).evaluate( stream, context ) + FunctionNode( "augmented_primitive", self.augmented_primitive_parser(), [] ).evaluate( stream, context ) + FunctionNode( "or_primitive", self.or_primitive_parser(), [] ).evaluate( stream, context ) + FunctionNode( "primitive", self.primitive_parser(), [] ).evaluate( stream, context ) + + self.block_parser().evaluate( stream, context ) + self.function_parser().evaluate( stream, context ) + + FunctionNode( "main", CallPrimitive( "block", ListPrimitive() ), [] ).evaluate( stream, context ) + + return FixedContextParser(context) diff --git a/polyparser/lexer/__init__.py b/polyparser/lexer/__init__.py index d81d54c..97edc78 100644 --- a/polyparser/lexer/__init__.py +++ b/polyparser/lexer/__init__.py @@ -36,7 +36,10 @@ def try_lexing (self, reader: "FileReader") -> List[Token]: assert False if next_token.exists: - array.append( next_token.value ) + if isinstance(next_token.value, list): + array.extend( next_token.value ) + else: + array.append( next_token.value ) return array diff --git a/polyparser/lexer/rules/abstract.py b/polyparser/lexer/rules/abstract.py index c6cc94f..4547fe2 100644 --- a/polyparser/lexer/rules/abstract.py +++ b/polyparser/lexer/rules/abstract.py @@ -1,4 +1,5 @@ +from typing import List from polyparser.io.reader import FileReader from polyparser.lexer.token import Token from polyparser.utils.optional import Optional @@ -13,5 +14,5 @@ class LexerRule: """ try_lexing should return None in case of an error, or Optional[Token] in case it parsed anything """ - def try_lexing (self, reader: "FileReader") -> Optional[Token]: + def try_lexing (self, reader: "FileReader") -> Optional[Token | List[Token]]: assert False, "Not implemented" diff --git a/polyparser/lexer/rules/indentation.py b/polyparser/lexer/rules/indentation.py new file mode 100644 index 0000000..d9424dd --- /dev/null +++ b/polyparser/lexer/rules/indentation.py @@ -0,0 +1,37 @@ + +from typing import List +from polyparser.io.reader import FileReader +from polyparser.lexer.rules.abstract import LexerRule +from polyparser.lexer.token import Token +from polyparser.lexer.token.type import TokenType +from polyparser.utils.optional import Optional + + +class IndentationLexingRule (LexerRule): + def __init__(self, token_type: TokenType) -> None: + self.token_type = token_type + def try_lexing(self, reader: FileReader) -> Optional[List[Token]]: + with reader as (atomic, state): + if state.peek() != '\n': return None + state.poll() + if len(state) == 0: return Optional() + + res = [] + while len(state) != 0 and state.peek() in [ ' ', '\t' ]: + with reader as (atomic2, state2): + fchar = state2.poll() + + if fchar == ' ': + for _ in range(3): + if len(state2) == 0 or state2.peek() == '\n': + return Optional() + if state2.poll() != ' ': + assert False, "Inconsistent use of tabs in the beginning of the line" + + res.append( Token( self.token_type, state2.as_position() ) ) + + if len(state) != 0 and state.peek() == '\n': + return Optional() + if len(res) == 0: + return Optional() + return Optional( res ) diff --git a/polyparser/parser/__init__.py b/polyparser/parser/__init__.py index 427f5f2..1004706 100644 --- a/polyparser/parser/__init__.py +++ b/polyparser/parser/__init__.py @@ -8,16 +8,19 @@ class Parser: __context: ParserContext + __main : str - def __init__(self) -> None: + def __init__(self, main: str = "main") -> None: self.__context = self.get_context() + self.__main = main + def get_context (self) -> ParserContext: raise NotImplementedError() def try_parsing (self, tokens: List[Token]): stream = ParserStream( tokens ) - primitive = CallPrimitive( "main" ) + primitive = CallPrimitive( self.__main ) context = ParserContext( self.__context ) with stream as (atomic, state): @@ -28,9 +31,9 @@ def try_parsing (self, tokens: List[Token]): class FixedContextParser(Parser): __context: ParserContext - def __init__(self, context: ParserContext) -> None: + def __init__(self, context: ParserContext, *args, **kwargs) -> None: self.__context = context - super().__init__() + super().__init__(*args, **kwargs) def get_context(self) -> ParserContext: return self.__context diff --git a/polyparser/parser/primitives/augmented.py b/polyparser/parser/primitives/augmented.py index 2805223..7a5e6cc 100644 --- a/polyparser/parser/primitives/augmented.py +++ b/polyparser/parser/primitives/augmented.py @@ -48,7 +48,7 @@ def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResul try: substate.store( self.__prim_type(*args) ) except Exception as exception: - print(exception) + print("Exception in augmented primitive", exception) subatomic.rollback() last_res = ParsingResult.FAILED break diff --git a/polyparser/parser/primitives/call.py b/polyparser/parser/primitives/call.py index 02d5394..5dcb785 100644 --- a/polyparser/parser/primitives/call.py +++ b/polyparser/parser/primitives/call.py @@ -20,7 +20,7 @@ def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any if exists and isinstance(target, ParserNode): new_args = list(map(lambda arg : BoundNode( arg, context ), self.__args)) - print(new_args, arguments) + return target.call(stream, context, new_args + arguments) return ParsingResult.FAILED def evaluate(self, stream: ParserStream, context: ParserContext) -> ParsingResult: diff --git a/tests/alphabet/test_poly_language.py b/tests/alphabet/test_poly_language.py new file mode 100644 index 0000000..d61c304 --- /dev/null +++ b/tests/alphabet/test_poly_language.py @@ -0,0 +1,610 @@ + +from ast import Tuple +from typing import List +from polyparser.io.reader import FileReader +from polyparser.languages.language import PolyLanguage +from polyparser.lexer import Lexer +from polyparser.parser.context import ParserContext +from polyparser.parser.node import BoundNode, ParserNode +from polyparser.parser.primitives.augmented import AugmentedPrimitive, AugmentedType +from polyparser.parser.primitives.branch import OrPrimitive +from polyparser.parser.primitives.call import CallPrimitive +from polyparser.parser.primitives.function import FunctionNode +from polyparser.parser.primitives.list import ListPrimitive +from polyparser.parser.primitives.token import TokenPrimitive +from polyparser.parser.result import ParsingResult +from polyparser.parser.stream import ParserStream + +POLY_LANGUAGE_SCRIPT = """ +def token_primitive(a, b)(c): TokenPrimitive = + /SLASH/ ?//SLASH/ //NAME/ ?[/DOT/ //STRING/]^Aug | */SLASH/ +""" + +POLY_LANGUAGE_TOKENS = [ + # def token_primitive: TokenPrimitive = + ("DEFINE", "def"), ("NAME", "token_primitive"), + ("L_B", "("), ("NAME", "a"), ("COMMA", ","), ("NAME", "b"), ("R_B", ")"), + ("L_B", "("), ("NAME", "c"), ("R_B", ")"), + ("TWODOTS", ":"), ("NAME", "TokenPrimitive"), ("EQUALS", "="), + ("INDENT", " "), + + # /SLASH/ + ("DASH", "/"), ("NAME", "SLASH"), ("DASH", "/"), + + # ?//SLASH/ + ("QMARK", "?"), ("DASH", "/"), ("DASH", "/"), ("NAME", "SLASH"), ("DASH", "/"), + + # //NAME/ + ("DASH", "/"), ("DASH", "/"), ("NAME", "NAME"), ("DASH", "/"), + + # ?[/DOT/ //STRING/] + ("QMARK", "?"), ("L_SQ_B", "["), ("DASH", "/"), ("NAME", "DOT"), ("DASH", "/"), + ("DASH", "/"), ("DASH", "/"), ("NAME", "STRING"), ("DASH", "/"), ("R_SQ_B", "]"), + ("HAT", "^"), ("NAME", "Aug"), ("PIPE", "|"), ("STAR", "*"), + + # /SLASH/ + ("DASH", "/"), ("NAME", "SLASH"), ("DASH", "/"), +] + +def test_poly_language_lexer (): + reader = FileReader( + "", + POLY_LANGUAGE_SCRIPT + ) + + lang = PolyLanguage() + lexr = lang.get_lexer() + + tokens = lexr.try_lexing( reader ) + + results = POLY_LANGUAGE_TOKENS + for index, token in enumerate(tokens): + assert token.name == results[index][0] + assert token.value == results[index][1] + +def wrap_runner (lexr: Lexer, prsr: ParserNode, augments = {}): + def run (string: str, args: List | None = None, expects: ParsingResult | None = None): + reader = FileReader("", string) + tokens = lexr.try_lexing(reader) + + stream = ParserStream( tokens ) + context = PolyLanguage(augments).get_parser().get_context() + + with stream as (atomic, state): + if args is None: + result = prsr.evaluate( stream, context ) + else: + result = prsr.call( stream, context, args ) + if expects is not None: + assert result == expects + + return state.poll_stored() + return run + +def readable_context (ctx: ParserContext): + dct = ctx._ParserContext__ctx + return { o: as_readable(dct[o], True) for o in dct.keys() } +def as_readable (node: ParserNode, can_be_anything = False): + if isinstance(node, list) and len(node) == 1: node = node[0] + if can_be_anything and not isinstance(node, ParserNode): + return node + assert isinstance(node, ParserNode) + if isinstance(node, BoundNode): + return (as_readable(node._BoundNode__sub_node), readable_context(node._BoundNode__context)) + if isinstance(node, TokenPrimitive): + return (node._TokenPrimitive__name, node._TokenPrimitive__stored, node._TokenPrimitive__expects) + if isinstance(node, ListPrimitive): + return list(map(as_readable, node._ListPrimitive__primitives)) + if isinstance(node, CallPrimitive): + return (node._CallPrimitive__name, list(map(as_readable, node._CallPrimitive__args))) + if isinstance(node, OrPrimitive): + return tuple(map(as_readable, node._OrPrimitive__primitives)) + if isinstance(node, AugmentedPrimitive): + return (as_readable( node._AugmentedPrimitive__sub_primitive ), node._AugmentedPrimitive__augment, node._AugmentedPrimitive__prim_type) + if isinstance (node, FunctionNode): + return (node._FunctionNode__name, node._FunctionNode__args_names, as_readable( node._FunctionNode__target )) +def verify_equality (node: ParserNode, data): + if isinstance(node, list) and len(node) == 1: node = node[0] + assert isinstance(node, ParserNode) + def verify_helper (n: ParserNode, d): + if isinstance(n, TokenPrimitive): + type, stored, expects = d + assert n._TokenPrimitive__name == type + assert n._TokenPrimitive__stored == stored + assert n._TokenPrimitive__expects == expects + if isinstance(n, ListPrimitive): + assert isinstance(d, list) + + assert len(n._ListPrimitive__primitives) == len(d) + for prim, _d in zip(n._ListPrimitive__primitives, d): + verify_helper(prim, _d) + if isinstance(n, OrPrimitive): + assert isinstance(d, tuple) + + assert len(n._OrPrimitive__primitives) == len(d) + for prim, _d in zip(n._OrPrimitive__primitives, d): + verify_helper(prim, _d) + if isinstance(n, CallPrimitive): + name, args = d + assert n._CallPrimitive__name == name + + assert len(n._CallPrimitive__args) == len(args) + + for prim, _d in zip(n._CallPrimitive__args, args): + verify_helper(prim, _d) + if isinstance(n, AugmentedPrimitive): + subp, aug, typ = d + + verify_helper( n._AugmentedPrimitive__sub_primitive, subp ) + assert n._AugmentedPrimitive__augment == aug + assert n._AugmentedPrimitive__prim_type == typ + + verify_helper(node, data) +def test_poly_language_token_parser (): + lang = PolyLanguage() + lexr = lang.get_lexer () + prsr = lang.token_parser () + + run = wrap_runner(lexr, prsr) + + def is_same (prim: "Tuple[TokenPrimitive]", type: str, stored = False, expects = None): + prim = prim[0] + + verify_equality(prim, (type, stored, expects)) + + is_same( run( "/DASH/" ), "DASH" ) + is_same( run( "//DASH/" ), "DASH", True ) + is_same( run( "//NAME:\"if\"/" ), "NAME", True, "if" ) + is_same( run( "/NAME:\"if\"/" ), "NAME", False, "if" ) + +def test_poly_language_simple_call_parser (): + lang = PolyLanguage() + lexr = lang.get_lexer () + prsr = lang.call_parser () + + run = wrap_runner(lexr, prsr) + + verify_equality( run("func")[0], ("func", []) ) +def test_poly_language_simple_primitive (): + lang = PolyLanguage() + lexr = lang.get_lexer () + for prsr in [ lang.simple_primitive_parser(), lang.primitive_parser() ]: + prsr = lang.simple_primitive_parser () + + run = wrap_runner(lexr, prsr) + + verify_equality( run( "/DASH/" ), ("DASH", False, None) ) + verify_equality( run( "//DASH/" ), ("DASH", True, None) ) + verify_equality( run( "//NAME:\"if\"/" ), ("NAME", True, "if") ) + verify_equality( run( "/NAME:\"if\"/" ), ("NAME", False, "if") ) + + verify_equality( run( "func" ), ("func", []) ) + + verify_equality( run( "[func [/L_B/ //NAME/ /R_B/]]" ), [ + ("func", []), + [ + ("L_B", False, None), + ("NAME", True, None), + ("R_B", False, None) + ] + ] ) + +def test_poly_language_list_primitive (): + lang = PolyLanguage() + lexr = lang.get_lexer() + prsr = lang.list_primitive_parser() + + run = wrap_runner(lexr, prsr) + + verify_equality( run("[]"), [] ) + verify_equality( run("[/DASH/]"), [("DASH", False, None)] ) + verify_equality( + run("[/SLASH/ //NAME/ /SLASH/]"), + [ ("SLASH", False, None), ("NAME", True, None), ("SLASH", False, None) ] ) + verify_equality( + run("[/L_SQ_B:\"[\"/ func //R_SQ_B:\"]\"/]"), + [ ("L_SQ_B", False, "["), ("func", []), ("R_SQ_B", True, "]") ] ) + +def test_poly_language_augmented_primitive (): + def a(): return None + def b(): return None + lang = PolyLanguage({ "a": a, "b": b }) + lexr = lang.get_lexer() + prsr = lang.augmented_primitive_parser() + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + verify_equality( run(" /DASH/"), ( "DASH", False, None ) ) + verify_equality( run("?/DASH/"), (( "DASH", False, None ), AugmentedType.OPTIONAL, None) ) + verify_equality( run("*/DASH/"), (( "DASH", False, None ), AugmentedType.ANY_AMOUNT, None) ) + verify_equality( run("+/DASH/"), (( "DASH", False, None ), AugmentedType.AT_LEAST_ONE, None) ) + verify_equality( run(" /DASH/^a"), (( "DASH", False, None ), None, a ) ) + verify_equality( run("?/DASH/^a"), (( "DASH", False, None ), AugmentedType.OPTIONAL, a) ) + verify_equality( run("*/DASH/^a"), (( "DASH", False, None ), AugmentedType.ANY_AMOUNT, a) ) + verify_equality( run("+/DASH/^a"), (( "DASH", False, None ), AugmentedType.AT_LEAST_ONE, a) ) + verify_equality( run(" [/DASH/^b]"), [(( "DASH", False, None ), None, b)]) + verify_equality( run("?[/DASH/^b]"), ([(( "DASH", False, None ), None, b)], AugmentedType.OPTIONAL, None) ) + verify_equality( run("*[/DASH/^b]"), ([(( "DASH", False, None ), None, b)], AugmentedType.ANY_AMOUNT, None) ) + verify_equality( run("+[/DASH/^b]"), ([(( "DASH", False, None ), None, b)], AugmentedType.AT_LEAST_ONE, None) ) +def test_poly_language_or_primitive (): + def a(): return None + def b(): return None + lang = PolyLanguage({ "a": a, "b": b }) + lexr = lang.get_lexer() + prsr = lang.or_primitive_parser() + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + verify_equality( run("//DASH/"), ("DASH", True, None) ) + verify_equality( run("//DASH/ | //SLASH/"), (("DASH", True, None), ("SLASH", True, None)) ) + verify_equality( run("//DASH/ | ?//SLASH/^a"), (("DASH", True, None), (("SLASH", True, None), AugmentedType.OPTIONAL, a)) ) + verify_equality( run("//DASH/ | [//DASH/ | //DASH/]"), + (("DASH", True, None), [(("DASH", True, None), ("DASH", True, None))]) ) + +def test_poly_language_call_one_argument (): + def a(): return None + def b(): return None + lang = PolyLanguage({ "a": a, "b": b }) + lexr = lang.get_lexer() + prsr = lang.or_primitive_parser() + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + verify_equality( run("function(indent)"), ('function', [[('indent', [])]]) ) + verify_equality( run( "function(indent /INDENTATION/)" ), + ('function', [[('indent', []), ('INDENTATION', False, None)]]) ) +def test_poly_language_call_mutli_argument (): + def a(): return None + def b(): return None + + lang = PolyLanguage({ "a": a, "b": b }) + lexr = lang.get_lexer() + prsr = lang.or_primitive_parser() + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + verify_equality(run("f(x y, z)([r(//G/)] | //H/)"), ( + 'f', + [ + [('x', []), ('y', [])], + [('z', [])], + [ + ( + [ + ( + 'r', + [ + [('G', True, None)] + ] + ) + ], + ('H', True, None) + ) + ] + ] + )) + +def test_poly_language_block (): + def a(): return None + def b(): return None + + lang = PolyLanguage({ "a": a, "b": b }) + lexr = lang.get_lexer() + prsr = lang.block_parser() + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + indentation = TokenPrimitive( "INDENT" ) + verify_equality( + run( "\n\tname f(h^b)\n\t//G/", [ indentation ] ), + [ + ('name', []), + ('f', [[ (('h', []), None, b) ]]), + ('G', True, None) + ] + ) + + run( "a", [ indentation ], ParsingResult.FAILED ) + prsr = lang.block_parser(True) + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + indentation = TokenPrimitive( "INDENT" ) + assert run( "a", [ indentation ], ParsingResult.IGNORED ) == [] + verify_equality( run( "a", [ ListPrimitive() ], ParsingResult.SUCCESS ), [ ("a", []) ] ) + +def test_poly_language_function (): + def a(): return None + def b(): return None + + lang = PolyLanguage({ "a": a, "b": b }) + lexr = lang.get_lexer() + prsr = lang.function_parser() + + run = wrap_runner(lexr, prsr, { "a": a, "b": b }) + + verify_equality( run("\ndef f = \n\tname", [ ListPrimitive() ]), ('f', [], [('name', [])]) ) + verify_equality( run("\n\tdef f = \n\t\tname", [ TokenPrimitive("INDENT") ]), ('f', [], [('name', [])]) ) + verify_equality( run("def f(x, z)(y) =\n\tname", [ ListPrimitive() ] ), ('f', ['x', 'z', 'y'], [('name', [])]) ) + verify_equality( run("def f: a =\n\tname", [ ListPrimitive() ]), + ('f', [], ([('name', [])], None, a) ) ) + verify_equality( run("def f(x, z)(y): a =\n\tname", [ ListPrimitive() ]), + ('f', ['x', 'z', 'y'], ([('name', [])], None, a)) ) + + verify_equality( + run("def f(x, z): a =\n\tdef g(y) =\n\t\tname", [ ListPrimitive() ]), + ( + 'f', ['x', 'z'], + ( + [('g', ['y'], [('name', [])])], + None, + a + ) + ) + ) + +def test_poly_language_primitives (): + lang = PolyLanguage({ + "TokenPrimitive" : "TokenPrimitive", + "CallPrimitive" : "CallPrimitive", + "ListPrimitive" : "ListPrimitive", + "OrPrimitive" : "OrPrimitive", + "AugmentedPrimitive" : "AugmentedPrimitive" + }) + lexr = lang.get_lexer() + prsr = lang.get_parser() + + def run (s: str): + return prsr.try_parsing( lexr.try_lexing(FileReader("", s)) ) + + verify_equality( + run("[/SLASH/ ?//SLASH/ //NAME/ ?[//TWODOTS/ //STRING/] /SLASH/]^TokenPrimitive"), + [ + ( + [ + ("SLASH", False, None), + (("SLASH", True, None), AugmentedType.OPTIONAL, None), + ("NAME", True, None), + ([ ("TWODOTS", True, None), ("STRING", True, None) ], AugmentedType.OPTIONAL, None), + ("SLASH", False, None) + ], + None, + "TokenPrimitive" + ) + ] + ) + verify_equality( + run("[/L_SQ_B/ *primitive /R_SQ_B/]^ListPrimitive"), + [ + ( + [ + ("L_SQ_B", False, None), + (("primitive", []), AugmentedType.ANY_AMOUNT, None), + ("R_SQ_B", False, None) + ], + None, + "ListPrimitive" + ) + ] + ) + verify_equality( + run("token_primitive | call_primitive | list_primitive"), + [( + ( "token_primitive", [] ), + ( "call_primitive", [] ), + ( "list_primitive", [] ) + )] + ) + verify_equality( + run("[?[//QMARK/ | //PLUS/ | //STAR/] simple_primitive ?[//BIND/ //NAME/]]^AugmentedPrimitive"), + [( + [ + ([ (("QMARK", True, None), ("PLUS", True, None), ("STAR", True, None)) ], AugmentedType.OPTIONAL, None), + ("simple_primitive", []), + ([ ("BIND", True, None), ("NAME", True, None) ], AugmentedType.OPTIONAL, None) + ], + None, + "AugmentedPrimitive" + )] + ) + + verify_equality( + run("[augmented_primitive *[/OR/ augmented_primitive]]^OrPrimitive"), + [( + [ + ('augmented_primitive', []), + ( + [('OR', False, None), ('augmented_primitive', [])], + AugmentedType.ANY_AMOUNT, + None + ) + ], + None, + 'OrPrimitive' + )] + ) + + +POLY_LANGUAGE_IN_POLY_LANGUAGE = """ +def token_primitive: TokenPrimitive = + /SLASH/ ?//SLASH/ //NAME/ ?[//TWODOTS/ //STRING/] /SLASH/ + +def list_primitive: ListPrimitive = + /L_SQ_B/ *primitive /R_SQ_B/ + +def call_primitive: CallPrimitive = + //NAME/ *[/L_B/ primitive *[/COMMA/ primitive] /R_B/] + +def simple_primitive = + token_primitive | call_primitive | list_primitive +def augmented_primitive: AugmentedPrimitive = + ?[//QMARK/ | //PLUS/ | //STAR/] + simple_primitive + ?[//BIND/ //NAME/] + +def or_primitive: OrPrimitive = + augmented_primitive *[/PIPE/ augmented_primitive] +def primitive = + or_primitive + +def block(indent): ListPrimitive = + +[[indent +primitive] | function(indent)] + +def function(indent): Function = + indent + /DEFINE/ //NAME/ + *[/L_B/ //NAME/ *[/COMMA/ //NAME/] /R_B/] + ?[//HAT/ //NAME/] + /EQUALS/ + block(indent /INDENTATION/) + +def main = + block([]) +""" +POLY_LANGUAGE_REPRESENTATION = [ + ( + 'token_primitive', [], + ( + [ + ('SLASH', False, None), (('SLASH', True, None), AugmentedType.OPTIONAL, None), + ('NAME', True, None), ([('TWODOTS', True, None), ('STRING', True, None)], AugmentedType.OPTIONAL, None), + ('SLASH', False, None) + ], + None, + 'TokenPrimitive' + ) + ), + ( + 'list_primitive', [], + ( + [ + ('L_SQ_B', False, None), (('primitive', []), AugmentedType.ANY_AMOUNT, None), ('R_SQ_B', False, None) + ], + None, + 'ListPrimitive' + ) + ), + ( + 'call_primitive', [], + ( + [ + ('NAME', True, None), + ( + [ + ('L_B', False, None), + ('primitive', []), + ( + [('COMMA', False, None), ('primitive', [])], + AugmentedType.ANY_AMOUNT, + None + ), + ('R_B', False, None) + ], + AugmentedType.ANY_AMOUNT, + None + ) + ], + None, + 'CallPrimitive' + ) + ), + ( + 'simple_primitive', [], + [ + (('token_primitive', []), ('call_primitive', []), ('list_primitive', [])) + ] + ), + ( + 'augmented_primitive', [], + ( + [ + ( + [ + (('QMARK', True, None), ('PLUS', True, None), ('STAR', True, None)) + ], + AugmentedType.OPTIONAL, + None + ), + ('simple_primitive', []), + ([('BIND', True, None), ('NAME', True, None)], AugmentedType.OPTIONAL, None) + ], + None, + 'AugmentedPrimitive' + ) + ), + ( + 'or_primitive', [], + ( + [ + ('augmented_primitive', []), + ([('OR', False, None), ('augmented_primitive', [])], AugmentedType.ANY_AMOUNT, None) + ], + None, + 'OrPrimitive' + ) + ), + ('primitive', [], [('or_primitive', [])]), + ( + 'block', ['indent'], + ( + [ + ( + [ + ( + [ + ('indent', []), + (('primitive', []), AugmentedType.AT_LEAST_ONE, None) + ], + ('function', [[('indent', [])]]) + ) + ], + AugmentedType.AT_LEAST_ONE, + None + ) + ], + None, + 'ListPrimitive' + ) + ), + ( + 'function', ['indent'], + ( + [ + ('indent', []), ('DEFINE', False, None), ('NAME', True, None), + ( + [ + ( + [ + ('L_B', False, None), + ([('NAME', True, None)], AugmentedType.OPTIONAL, None), + ('R_B', False, None) + ], + AugmentedType.ANY_AMOUNT, + None + ) + ], + AugmentedType.ANY_AMOUNT, + None + ), + ('EQUALS', False, None), + ('block', [[('indent', []), ('INDENTATION', False, None)]]) + ], + None, + 'Function' + ) + ), + ('main', [], [('block', [[]])]) +] + +def test_poly_language_full (): + language = PolyLanguage({ + "TokenPrimitive" : "TokenPrimitive", + "CallPrimitive" : "CallPrimitive", + "ListPrimitive" : "ListPrimitive", + "OrPrimitive" : "OrPrimitive", + "AugmentedPrimitive" : "AugmentedPrimitive", + "Function" : "Function" + }) + result = language.parse( FileReader("polylanguage", POLY_LANGUAGE_IN_POLY_LANGUAGE) ) + + verify_equality(result, POLY_LANGUAGE_REPRESENTATION) \ No newline at end of file diff --git a/tests/lexer/rules/file_tests/indentation.txt b/tests/lexer/rules/file_tests/indentation.txt new file mode 100644 index 0000000..d32d342 --- /dev/null +++ b/tests/lexer/rules/file_tests/indentation.txt @@ -0,0 +1,8 @@ + + aaa + ccc + bbb + + +aaaa + bbbb diff --git a/tests/lexer/rules/file_tests/wrong-indentation.txt b/tests/lexer/rules/file_tests/wrong-indentation.txt new file mode 100644 index 0000000..349e96d --- /dev/null +++ b/tests/lexer/rules/file_tests/wrong-indentation.txt @@ -0,0 +1,3 @@ + + + aaaa diff --git a/tests/lexer/rules/test_indentation.py b/tests/lexer/rules/test_indentation.py new file mode 100644 index 0000000..cc0b3c2 --- /dev/null +++ b/tests/lexer/rules/test_indentation.py @@ -0,0 +1,37 @@ + +import pytest +from polyparser.io.reader import FileReader +from polyparser.lexer.rules.indentation import IndentationLexingRule + + +def test_indentation (): + reader = FileReader.open( "tests/lexer/rules/file_tests/indentation.txt" ) + + rule = IndentationLexingRule( "INDENTATION" ) + + expects = [ 4, 1, 2, 0, 0, 0, 1, 0 ] + offset = 0 + + with reader as (atomic, state): + while len(state) != 0: + result = rule.try_lexing(reader) + + if result is None: + state.poll() + else: + count = len(result.value) if result.exists else 0 + assert count == expects[offset] + offset += 1 +def test_wrong_indentation (): + reader = FileReader.open( "tests/lexer/rules/file_tests/wrong-indentation.txt" ) + rule = IndentationLexingRule( "INDENTATION" ) + + with pytest.raises(AssertionError, match = "Inconsistent use of tabs in the beginning of the line"): + with reader as (atomic, state): + while len(state) != 0: + result = rule.try_lexing(reader) + + if result is None: + state.poll() + elif result.exists: + assert False From ebf7c13a159b1df25d5ca7d35fed5ceebf4863ab Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Thu, 5 Dec 2024 12:15:28 +0100 Subject: [PATCH 5/8] add-parser: added source language and switched json parser to poly language json --- polyparser/languages/json.py | 84 ++++++++-------------------- polyparser/languages/language.py | 27 +++++++++ polyparser/parser/primitives/list.py | 2 + tests/alphabet/test_json_language.py | 7 ++- tests/alphabet/test_language.py | 7 ++- 5 files changed, 63 insertions(+), 64 deletions(-) diff --git a/polyparser/languages/json.py b/polyparser/languages/json.py index ecee132..79ac55d 100644 --- a/polyparser/languages/json.py +++ b/polyparser/languages/json.py @@ -1,7 +1,9 @@ import enum import string -from polyparser.languages.language import Language +from typing import Callable, Dict +from polyparser.io.reader import FileReader +from polyparser.languages.language import SourceLanguage from polyparser.lexer import Lexer from polyparser.lexer.rules.ignore import IgnoreLexerRule from polyparser.lexer.rules.keyword import KeywordLexerRule @@ -16,8 +18,20 @@ from polyparser.parser.primitives.token import TokenPrimitive from ast import literal_eval +JSON_POLY_LANGUAGE_SOURCE = """ +def string: String = + //STRING/ +def list: List = + /LSB/ ?[primitive *[/COMMA/ primitive]] /RSB/ +def map: Map = + /LCB/ ?[string /EQUIV/ primitive *[/COMMA/ string /EQUIV/ primitive]] /RCB/ +def primitive = + map | list | string +def main = + primitive +""" -class JsonLanguage(Language): +class JsonLanguage(SourceLanguage): alphabet: None | enum.Enum def __init__(self): self.alphabet = None @@ -60,61 +74,11 @@ def get_lexer(self) -> Lexer: ]) return lexer - def get_parser(self) -> Parser: - alphabet = self.get_alphabet() - - context = ParserContext() - - context.set_element( "string", AugmentedPrimitive( - TokenPrimitive("STRING", True), - prim_type=lambda x: literal_eval(x.value))) - context.set_element( "list", AugmentedPrimitive( - ListPrimitive( - TokenPrimitive( "LSB" ), - AugmentedPrimitive( - ListPrimitive( - CallPrimitive( "main" ), - AugmentedPrimitive( - ListPrimitive( - TokenPrimitive("COMMA"), - CallPrimitive("main")), - augment=AugmentedType.ANY_AMOUNT), - AugmentedPrimitive( - TokenPrimitive("COMMA"), - augment=AugmentedType.OPTIONAL)), - augment=AugmentedType.OPTIONAL), - TokenPrimitive( "RSB" ) - ), - prim_type=lambda *args: list(args))) - context.set_element( "dict.equiv", ListPrimitive( - CallPrimitive("string"), - TokenPrimitive("EQUIV"), - CallPrimitive("main"))) - context.set_element( "dict", AugmentedPrimitive( - ListPrimitive( - TokenPrimitive( "LCB" ), - AugmentedPrimitive( - ListPrimitive( - CallPrimitive( "dict.equiv" ), - AugmentedPrimitive( - ListPrimitive( - TokenPrimitive("COMMA"), - CallPrimitive("dict.equiv")), - augment=AugmentedType.ANY_AMOUNT), - AugmentedPrimitive( - TokenPrimitive("COMMA"), - augment=AugmentedType.OPTIONAL)), - augment=AugmentedType.OPTIONAL), - TokenPrimitive( "RCB" ) - ), - prim_type=lambda *args: { - args[i]:args[i + 1] - for i in range(0, len(args), 2) - })) - context.set_element( "main", OrPrimitive( - CallPrimitive( "list" ), - CallPrimitive( "dict" ), - CallPrimitive( "string" ) - ) ) - - return FixedContextParser(context) \ No newline at end of file + def get_poly_language_source(self) -> FileReader: + return FileReader("", JSON_POLY_LANGUAGE_SOURCE) + def get_transcripts(self) -> Dict[str, Callable]: + return { + "String": lambda arg : literal_eval( arg.value ), + "List" : lambda *args : list( args ), + "Map" : lambda *args : { args[i] : args[i + 1] for i in range(0, len(args), 2) } + } \ No newline at end of file diff --git a/polyparser/languages/language.py b/polyparser/languages/language.py index acb60aa..6a10bb3 100644 --- a/polyparser/languages/language.py +++ b/polyparser/languages/language.py @@ -356,3 +356,30 @@ def get_parser(self) -> Parser: FunctionNode( "main", CallPrimitive( "block", ListPrimitive() ), [] ).evaluate( stream, context ) return FixedContextParser(context) + +class SourceLanguage (Language): + def __init__(self): + super().__init__() + def get_entry_point (self) -> str: + return "main" + def get_transcripts (self) -> Dict[str, Callable]: + raise NotImplementedError() + def get_poly_language_source (self) -> FileReader: + raise NotImplementedError() + def get_parser(self) -> Parser: + source = self.get_poly_language_source() + augments = self.get_transcripts() + + poly_language = PolyLanguage( augments ) + result = poly_language.parse( source ) + if len(result) == 1 and isinstance(result[0], ListPrimitive): + result = result[0].get_primitives() + + context = ParserContext() + stream = ParserStream ([]) + + for primitive in result: + assert isinstance(primitive, FunctionNode), "Top level primitives can only be functions" + + primitive.evaluate( stream, context ) + return FixedContextParser( context, self.get_entry_point() ) diff --git a/polyparser/parser/primitives/list.py b/polyparser/parser/primitives/list.py index d24f045..bd07f52 100644 --- a/polyparser/parser/primitives/list.py +++ b/polyparser/parser/primitives/list.py @@ -13,6 +13,8 @@ def __init__(self, *primitives: List[ParserNode]) -> None: super().__init__() self.__primitives = primitives + def get_primitives (self): + return list(self.__primitives) def call(self, stream: ParserStream, context: ParserContext, arguments: List[Any]): if len(arguments) == 0: return self.evaluate(stream, context) diff --git a/tests/alphabet/test_json_language.py b/tests/alphabet/test_json_language.py index 4686df4..0dea6fa 100644 --- a/tests/alphabet/test_json_language.py +++ b/tests/alphabet/test_json_language.py @@ -4,6 +4,7 @@ import json import random import string +import time from polyparser.io.reader import FileReader from polyparser.languages.json import JsonLanguage @@ -141,6 +142,8 @@ def random_json (maxdepth=3, edepth=1, maxamount=3): return random_string() def test_simple_json (): + json_langs = [ JsonLanguage(), JsonLanguage_V1() ] + for _ in range(10): _json = random_json() @@ -149,9 +152,7 @@ def test_simple_json (): if random.choice([False, True]): string = string.replace("'", "\"") - json_langs = [ JsonLanguage(), JsonLanguage_V1() ] - for json_lang in json_langs: json_result = json_lang.parse( FileReader( "", string ) ) - + assert json_result[0] == _json diff --git a/tests/alphabet/test_language.py b/tests/alphabet/test_language.py index a7dcfb0..62f7720 100644 --- a/tests/alphabet/test_language.py +++ b/tests/alphabet/test_language.py @@ -1,6 +1,6 @@ import pytest -from polyparser.languages.language import Language +from polyparser.languages.language import Language, SourceLanguage def test_exceptions (): @@ -10,3 +10,8 @@ def test_exceptions (): Language.get_lexer(object()) with pytest.raises(NotImplementedError): Language.get_parser(object()) + with pytest.raises(NotImplementedError): + SourceLanguage.get_poly_language_source(object()) + with pytest.raises(NotImplementedError): + SourceLanguage.get_transcripts(object()) + assert SourceLanguage.get_entry_point(object()) == "main" \ No newline at end of file From f26406fad829d7aa2967e7ce90f6d3ae2ebf7b19 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Thu, 5 Dec 2024 16:15:04 +0100 Subject: [PATCH 6/8] add-parser: added documentation for parsing tools --- doc/reference/api/index.rst | 7 +- doc/reference/api/parser.rst | 132 +++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 doc/reference/api/parser.rst diff --git a/doc/reference/api/index.rst b/doc/reference/api/index.rst index 4105212..4f7e975 100644 --- a/doc/reference/api/index.rst +++ b/doc/reference/api/index.rst @@ -6,9 +6,10 @@ API Reference This page documents the inner API of the ``polyparser`` project. The project is separated into the following modules -#. :ref:`Input / Output ` - ```polyparser.io`` : responsible for file handling and generic stream objects.` -#. :ref:`Lexer ` - ```polyparser.lexer`` : handles tokenization of files.` -#. :ref:`Utils ` - ```polyparser.utils`` : contains tools that can be used in multiple other packages.` +#. :ref:`Input / Output ` - ``polyparser.io`` : responsible for file handling and generic stream objects. +#. :ref:`Lexer ` - ``polyparser.lexer`` : handles tokenization of files. +#. :ref:`Utils ` - ``polyparser.utils`` : contains tools that can be used in multiple other packages. +#. :ref:`Parser ` - ``polyparser.parser`` : contains the generic parsing framework. We will be using the following guidelines regarding the documentation : diff --git a/doc/reference/api/parser.rst b/doc/reference/api/parser.rst new file mode 100644 index 0000000..cc97f2e --- /dev/null +++ b/doc/reference/api/parser.rst @@ -0,0 +1,132 @@ +:tocdepth: 4 + +.. _`parser`: + +Parser +====== + +This page documents the inner API of the ``polyparser.parser`` package. +The parser is structured mostly around the following objects : + +#. :ref:`Context ` - Parsing context +#. :ref:`Cursor / Stream ` - Parser cursor and parser stream +#. :ref:`Nodes ` - Parser nodes (rules for parsing) + +.. _polyparser_parser_context: + +Module ``polyparser.parser.context`` +------------------------------------ + +``class ParserContext`` +~~~~~~~~~~~~~~~~~~~~~~~ + +A parser context contains the current variables, +it is similar to a classical variable container, which can be +linked to a parent variable container. It has both ``get_element`` +and ``set_element`` variables to allow for getting and setting variables. + +.. _polyparser_parser_cursor: + +Module ``polyparser.parser.stream`` +----------------------------------- + +``class ParserStream(SaveStream[ParserCursor])`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This object represents a stream of tokens as well as the +values parsed with the prefix already parsed. + +Module ``polyparser.parser.cursor`` +----------------------------------- + +``class ParserCursor(SavedState)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``ParserCursor`` is the ``SavedState`` implementation used +in the :ref:`save stream `. +It contains a list of tokens that can be polled +and peeked like a queue using the ``poll`` and ``peek`` methods. +It also contains the list of values that have been stored inside the state +as well as a way to clear this list and retrieve all of the data inside using +the ``store`` and ``poll_stored`` methods. + +.. _polyparser_parser_node: + +A ``ParserNode`` object represents a rule to parse the tokens inside the ``ParserStream``. +The set of primitives is described by the ``PLY-BCK 002 - PolyParser`` specification. Each +``ParserNode`` implements both the ``call`` and ``evaluate`` methods. These two methods +take a ``stream`` and ``context`` parameters for the current state of the parsing, and the +``call`` method simulates a function being called and has thus a list of ``arguments`` that +are themselves primitives or functions. They both return a parsing result, either ``FAILED``, +``SUCCESS`` or ``IGNORED``. + +When the ``call`` method is called on a primitive that +does not implement a function-based architecture, if the list of arguments is empty, then it calls +the ``evaluate`` method. Otherwise, it raises an exception. This exception might not have a good +stack trace, so you need to be carefull when you write the generic parser. + +Module ``polyparser.parser.primitives`` +--------------------------------------- + +``class TokenPrimitive`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +A token primitive parses a single token with a specific name. +The constructor of the primitive takes the name of the token type +it expects, whether to store it in the state (default is ``False``) and +the expected value of the token (default is ``None`` as the value can be anything). + +``class ListPrimitive`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +The list primitive has a list of sub primitives, and both methods forward +arguments to the sub primitives in order. The parser result will then be +``FAILED`` if at least one of the result is ``FAILED``, ``IGNORED`` if all of them are ``IGNORED``, +and ``SUCCESS`` if at least one of them is ``SUCCESS``. If the result is failed, the state is rolled back. + +``class OrPrimitive`` +~~~~~~~~~~~~~~~~~~~~~ + +The or primitive allows to have a branch in the parsing. +It has a list of subprimitives and tries to parse them in order, +searching for the first that is successful, and in that case +it returns ``SUCCESS``. If none of them is successful, +and one of them is ignored, then the branch is ignored. +Otherwise, the or primitive fails. + +``class AugmentedPrimitive`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The augmented primitive allows to enhance the behaviour of +a single primitive by passing its stored data in a python function +and replacing the stored data by the result of this function. +It also allows to repeat this behaviour once, making it optional, repeating +it an arbitrary amount of time, or repeating it at least one time. + +``class CallPrimitive`` +~~~~~~~~~~~~~~~~~~~~~~~ + +A call primitive allows to call a function with arguments. +If we evaluate the primitive, then it is equivalent to calling the +primitive with an empty list of arguments. If we call the primitive +with a list of arguments, then it calls the target primitive (retrieved +from the context with the given name), with the arguments from the current +call primitive bound to the current context (using a ``BoundNode``) extended by +the old arguments. + +Module ``polyparser.parser.function`` +------------------------------------- + +``class FunctionNode`` +~~~~~~~~~~~~~~~~~~~~~~ + +This primitive allows to represent a repetitive or recursive procedure that +can have arguments. It is described by the name of the function, its sub procedure +and the list of argument names. When being evaluated, it would bind itself to the context +and would be stored inside the context to be called later. When being called, it would +generate a new context from the bound context and adding the arguments to the context using the argument +names in the same order. + +.. toctree:: + :hidden: + From e37a7b33a47b78e3085e7d30bc0fcfc6c04efd90 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Thu, 5 Dec 2024 17:26:25 +0100 Subject: [PATCH 7/8] add-parser: added poly language documentation --- doc/conf.py | 28 +++- doc/reference/api/index.rst | 5 +- doc/reference/api/language.rst | 233 +++++++++++++++++++++++++++++++++ 3 files changed, 264 insertions(+), 2 deletions(-) create mode 100644 doc/reference/api/language.rst diff --git a/doc/conf.py b/doc/conf.py index 0849da9..d52f722 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,4 +39,30 @@ html_sidebars = { '**': ['about.html', 'searchbox.html', 'navigation.html', 'versions.html'] } -html_context = module.generate_gitdata(html_version_root) +#html_context = module.generate_gitdata(html_version_root) + +# PolyLanguage + +from pygments.lexer import RegexLexer +from pygments import token +from sphinx.highlighting import lexers + +class PolyLanguageLexer(RegexLexer): + name = 'PolyLanguage' + + tokens = { + 'root': [ + (r'\.\.\.', token.Keyword), + (r': *[a-zA-Z_][a-zA-Z0-9_]*', token.Keyword), + (r'(\/(\/)?([a-zA-Z_][a-zA-Z0-9_]*(:"[^"]*")?)\/)|,|=|:|\(|\)', token.Literal), + (r'\^ *[a-zA-Z_][a-zA-Z0-9_]*', token.Keyword), + (r'def', token.Keyword), + (r'[a-zA-Z_]', token.Literal), + (r'\[|\]', token.Keyword), + (r'\+|\?|\*|\^|\|', token.Keyword), + (r'#[^\n]*', token.Comment), + (r'\s', token.Text) + ] + } + +lexers['polylanguage'] = PolyLanguageLexer(startinline=True) diff --git a/doc/reference/api/index.rst b/doc/reference/api/index.rst index 4f7e975..d8bc11d 100644 --- a/doc/reference/api/index.rst +++ b/doc/reference/api/index.rst @@ -10,6 +10,7 @@ This page documents the inner API of the ``polyparser`` project. The project is #. :ref:`Lexer ` - ``polyparser.lexer`` : handles tokenization of files. #. :ref:`Utils ` - ``polyparser.utils`` : contains tools that can be used in multiple other packages. #. :ref:`Parser ` - ``polyparser.parser`` : contains the generic parsing framework. +#. :ref:`Language ` - ``polyparser.language`` : contains the generic language framework to create custom languages. We will be using the following guidelines regarding the documentation : @@ -21,4 +22,6 @@ We will be using the following guidelines regarding the documentation : io lexer - utils \ No newline at end of file + utils + parser + language \ No newline at end of file diff --git a/doc/reference/api/language.rst b/doc/reference/api/language.rst new file mode 100644 index 0000000..69ae62f --- /dev/null +++ b/doc/reference/api/language.rst @@ -0,0 +1,233 @@ +:tocdepth: 4 + +.. _`language`: + +Language Definition +=================== + +This page documents the inner API of the ``polyparser.language`` package. It possesses the following interesting modules and classes + +#. :ref:`class Language ` - Class representing a generic language +#. :ref:`class PolyLanguage ` - Class representing the poly language langue. +#. :ref:`class SourceLanguage ` - Class representing a language with a source from poly language. + +.. _polyparser_language: + +``class Language`` +~~~~~~~~~~~~~~~~~~ + +A language is defined by its two main methods, ``get_lexer`` and ``get_parser``, +generating the lexer and parser for the specific language. It also defines a ``parse`` +method taking a ``FileReader`` that handles all the computations to parse for the language. + +.. _polyparser_poly_language: + +``class PolyLanguage`` +~~~~~~~~~~~~~~~~~~~~~~ + +This language is already defined and allows you to define the parser for a language +in a simple programming language. The programming language has a simple syntax inspired +from the Scala syntax. The primitive types used by augmented primitives can be passed as +an argument to the constructor of the class. For a language to be valid, the only top level +primitives are functions. The language will then automatically call the ``main`` function that +should not take any arguments. + +The respective primitives can be written in the following ways : + +.. code-block:: polylanguage + + # Token Primitives + + /TOKEN/ # Expects token of type "TOKEN" + //NAME/ # Expects token of type "NAME" and stores it in the state + /NAME:"if"/ # Expects token of type "NAME" and value "if" + //NAME:"if"/ # Expects token of type "NAME" and value "if" + # and stores it in the state + + # List Primitive + + [] # Empty primitive + [ /IF/ /LB/ /RB/ ] # Primitive matching "if ()" + + # Call Primitive + + name # Call the "name" function + block(/INDENT/) # Call the "block" function with a /INDENT/ + f(f, f(g)) # Call the function "f", with arguments + # f and f(g) that can then be called + + # Augmented Primitive + + ?//NAME/ # An optional name + +//NAME/ # A name that can be parsed an infinite amount of + # times, but needs to be parsed once. + *[/COMMA/ //NAME/] # Parse as many times as you want + # a comma followed by a name + //STRING/^String # Find a STRING token and then preprocess it + # to remove the espace characters using the + # String primitive type given to poly language. + +//STRING/^String # Find many strings and preprocess all of them. + + # Or Primitive + + # An or primitive can be created by multiple augmented primitives + # separated by pipes to represent the choice. + + a | b | c # Choose the call primitive a or the call primitive b + # or the primitive c. + + //STRING/^String | [] # Choose a string or an empty primitive, + # it is equivalent to an optional string. + + # Function + + # Function with no arguments and that parses a simple name + def f = + //NAME/ + + # Function that parses a string and preprocesses it + def string: String = + //STRING/ + + # Function that parses a string after an indentation + # specified as an argument, and that preprocesses the string. + def block (indent): String = + indent //STRING/ + + # Example on how to do a dependency / context injection + def f(callback) = + def g = + ... + callback(g) + +One can write Poly Language easily in Poly Language in the following way : + +.. code-block:: polylanguage + + def token_primitive: TokenPrimitive = + /SLASH/ ?//SLASH/ //NAME/ ?[//TWODOTS/ //STRING/] /SLASH/ + + def list_primitive: ListPrimitive = + [/L_SQ_B/ *primitive /R_SQ_B/] + + def call_primitive: CallPrimitive = + //NAME/ *[/L_B/ primitive *[/COMMA/ primitive] /R_B/] + + def simple_primitive = + token_primitive | call_primitive | list_primitive + def augmented_primitive: AugmentedPrimitive = + ?[//QMARK/ | //PLUS/ | //STAR/] # ? | + | * + simple_primitive # sub primitive + ?[//BIND/ //NAME/] # + + def or_primitive: OrPrimitive = + augmented_primitive *[/OR/ augmented_primitive] + def primitive = or_primitive + + def block(indent*): ListPrimitive = + +[[indent +primitive] | function(indent)] + + def function(indent*): Function = + indent + /DEFINE/ //NAME/ + *[/LB/ //NAME/ *[/COMMA/ //NAME/] /RB/] + /SET/ + block(indent /INDENTATION/) + + def main = + block([]) + +.. _polyparser_source_language: + +``class SourceLanguage`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +A source language is a language that generates its parser using Poly Language. The +language extending this abstract class should implement the ``get_poly_language_source`` +and ``get_transcripts``, returning respectively the ``FileReader`` for the language and +the dictionnary for the primitive types of the augmented primitived of the language. One +can also implement ``get_entry_point`` returning the entry point of the parser, by default +``"main"``. + + +``class JsonLanguage`` +~~~~~~~~~~~~~~~~~~~~~~ + +WARNING, this language isn't complete and does not contain the implementations for JSON +numbers, booleans or null values. +This is the main example of a ``SourceLanguage``, which is defined in the following way : + +.. code-block:: python + + class JsonLanguage(SourceLanguage): + alphabet: None | enum.Enum + def __init__(self): + self.alphabet = None + + super().__init__() + + def get_alphabet (self): + if self.alphabet is None: + type_factory = TokenTypeFactory( "json-type-factory" ) + type_factory.add_token_type( "LCB" ) # Left Curly Bracket '{' + type_factory.add_token_type( "RCB" ) # Right Curly Bracket '}' + type_factory.add_token_type( "LSB" ) # Left Squared Bracket '[' + type_factory.add_token_type( "RSB" ) # Right Squared Bracket '[' + + type_factory.add_token_type( "COMMA" ) # COMMA ',' + type_factory.add_token_type( "EQUIV" ) # EQUIV ':' + + type_factory.add_token_type( "STRING" ) # String + + self.alphabet = type_factory.as_enumeration() + return self.alphabet + + def get_lexer(self) -> Lexer: + alphabet = self.get_alphabet() + + lexer = Lexer([ + StringLexerRule( "\"", alphabet.STRING ), + StringLexerRule( "'", alphabet.STRING ), + KeywordLexerRule({ + '{': alphabet.LCB, + '}': alphabet.RCB, + '[': alphabet.LSB, + ']': alphabet.RSB, + ',': alphabet.COMMA, + ':': alphabet.EQUIV + }), + IgnoreLexerRule(string.whitespace) + ]) + + return lexer + def get_poly_language_source(self) -> FileReader: + return FileReader("", JSON_POLY_LANGUAGE_SOURCE) + def get_transcripts(self) -> Dict[str, Callable]: + return { + "String": lambda arg : literal_eval( arg.value ), + "List" : lambda *args : list( args ), + "Map" : lambda *args : { args[i] : args[i + 1] for i in range(0, len(args), 2) } + } + +The ``JSON_POLY_LANGUAGE_SOURCE`` contains the source code for the parser and contains the following data : + +.. code-block:: polylanguage + + def string: String = + //STRING/ + def list: List = + /LSB/ ?[primitive *[/COMMA/ primitive]] /RSB/ + def map: Map = + /LCB/ + ?[string /EQUIV/ primitive *[/COMMA/ string /EQUIV/ primitive]] + /RCB/ + + def primitive = + map | list | string + + def main = + primitive + +.. toctree:: + :hidden: + From b44af067185dcfe106a005ebdf61326778b82203 Mon Sep 17 00:00:00 2001 From: "theo.hollender" Date: Thu, 5 Dec 2024 17:29:53 +0100 Subject: [PATCH 8/8] add-parser: fixed version documentation --- doc/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/conf.py b/doc/conf.py index d52f722..294b4fc 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,7 +39,7 @@ html_sidebars = { '**': ['about.html', 'searchbox.html', 'navigation.html', 'versions.html'] } -#html_context = module.generate_gitdata(html_version_root) +html_context = module.generate_gitdata(html_version_root) # PolyLanguage