From b8be80cb8b275ce440cfc960dfa0fb13e3192f90 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Sat, 22 Mar 2025 00:48:41 +0100 Subject: [PATCH 01/11] transformer overhaul --- hcl2/api.py | 2 +- hcl2/{transformer.py => dict_transformer.py} | 4 + hcl2/rule_transformer.py | 101 ++++ hcl2/serialization.py | 496 +++++++++++++++++++ test/helpers/hcl2_helper.py | 2 +- test/unit/test_dict_transformer.py | 2 +- 6 files changed, 604 insertions(+), 3 deletions(-) rename hcl2/{transformer.py => dict_transformer.py} (99%) create mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/serialization.py diff --git a/hcl2/api.py b/hcl2/api.py index 399ba929..1cec02a2 100644 --- a/hcl2/api.py +++ b/hcl2/api.py @@ -3,7 +3,7 @@ from lark.tree import Tree from hcl2.parser import parser, reconstruction_parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer diff --git a/hcl2/transformer.py b/hcl2/dict_transformer.py similarity index 99% rename from hcl2/transformer.py rename to hcl2/dict_transformer.py index 382092d6..64c58bcb 100644 --- a/hcl2/transformer.py +++ b/hcl2/dict_transformer.py @@ -277,6 +277,10 @@ def heredoc_template_trim(self, args: List) -> str: def new_line_or_comment(self, args: List) -> _DiscardType: return Discard + # def EQ(self, args: List): + # print("EQ", args) + # return args + def for_tuple_expr(self, args: List) -> str: args = self.strip_new_line_tokens(args) for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]]) diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py new file mode 100644 index 00000000..8f0b922a --- /dev/null +++ b/hcl2/rule_transformer.py @@ -0,0 +1,101 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.visitors import _Leaf_T, _Return_T, Discard + +from hcl2.serialization import ( + LarkRule, + LarkToken, + StartRule, + BodyRule, + BlockRule, + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + ExprTermRule, + ConditionalRule, + BinaryOpRule, + BinaryOperatorRule, + BinaryTermRule, + UnaryOpRule, + AttributeRule, + NewLineOrCommentRule, +) + +ArgsType = List[Union[Token, Tree]] + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def start(self, args: ArgsType) -> StartRule: + return StartRule(args) + + def body(self, args: ArgsType) -> BodyRule: + return BodyRule(args) + + def block(self, args: ArgsType) -> BlockRule: + return BlockRule(args) + + def identifier(self, args: ArgsType) -> IdentifierRule: + return IdentifierRule(args) + + def int_lit(self, args: ArgsType) -> IntLitRule: + return IntLitRule(args) + + def float_lit(self, args: ArgsType) -> FloatLitRule: + return FloatLitRule(args) + + def string_lit(self, args: ArgsType) -> StringLitRule: + return StringLitRule(args) + + def expr_term(self, args: ArgsType) -> ExprTermRule: + return ExprTermRule(args) + + def conditional(self, args: ArgsType) -> ConditionalRule: + return ConditionalRule(args) + + def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: + return BinaryOperatorRule(args) + + def binary_term(self, args: ArgsType) -> BinaryTermRule: + return BinaryTermRule(args) + + def unary_op(self, args: ArgsType) -> UnaryOpRule: + return UnaryOpRule(args) + + def binary_op(self, args: ArgsType) -> BinaryOpRule: + return BinaryOpRule(args) + + def attribute(self, args: ArgsType) -> AttributeRule: + return AttributeRule(args) + + def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args) + return Discard + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) diff --git a/hcl2/serialization.py b/hcl2/serialization.py new file mode 100644 index 00000000..15d75caa --- /dev/null +++ b/hcl2/serialization.py @@ -0,0 +1,496 @@ +from abc import ABC, abstractmethod +from json import JSONEncoder +from typing import List, Any, Union, Tuple, Optional + +from lark import Tree, Token + +ArgsType = List["LarkElement"] + + +def is_dollar_string(value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + + return f"({value})" + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + _classes = [] + + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children): + self._children: List[LarkElement] = children + + def __init_subclass__(cls, **kwargs): + cls._classes.append(cls) + + def __repr__(self): + return f"" + + +class StartRule(LarkRule): + + _children: Tuple["BodyRule"] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> "BodyRule": + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + "NewLineOrCommentRule", + "AttributeRule", + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class BlockRule(LarkRule): + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List["IdentifierRule"]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result + + +class IdentifierRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "identifier" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IntLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "int_lit" + + def __init__(self, children): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class FloatLitRule(LarkRule): + + _children: Tuple[TokenSequence] + + @staticmethod + def rule_name() -> str: + return "float_lit" + + def __init__(self, children): + print("float_lit", children) + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class StringLitRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined()[1:-1] + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self._parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(ExprTermRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children): + super().__init__(children) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryOperatorRule(LarkRule): + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + +class BinaryTermRule(LarkRule): + _children: Tuple[ + BinaryOperatorRule, + Optional["NewLineOrCommentRule"], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children): + if len(children) == 2: + children.insert(1, None) + super().__init__(children) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional["NewLineOrCommentRule"]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class UnaryOpRule(Expression): + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def unary_operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + "NewLineOrCommentRule", + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/test/helpers/hcl2_helper.py b/test/helpers/hcl2_helper.py index 5acee1e7..c39ee7fb 100644 --- a/test/helpers/hcl2_helper.py +++ b/test/helpers/hcl2_helper.py @@ -3,7 +3,7 @@ from lark import Tree from hcl2.parser import parser -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class Hcl2Helper: diff --git a/test/unit/test_dict_transformer.py b/test/unit/test_dict_transformer.py index 122332eb..baad5ba9 100644 --- a/test/unit/test_dict_transformer.py +++ b/test/unit/test_dict_transformer.py @@ -2,7 +2,7 @@ from unittest import TestCase -from hcl2.transformer import DictTransformer +from hcl2.dict_transformer import DictTransformer class TestDictTransformer(TestCase): From e39b42918b4f6dca5694bd836faa5ee649b8e560 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 26 Mar 2025 21:28:54 +0100 Subject: [PATCH 02/11] reorganize code --- hcl2/rule_transformer.py | 101 ---- hcl2/rule_transformer/__init__.py | 0 hcl2/rule_transformer/json.py | 12 + hcl2/rule_transformer/rules/__init__.py | 0 hcl2/rule_transformer/rules/abstract.py | 93 ++++ hcl2/rule_transformer/rules/base.py | 122 +++++ hcl2/rule_transformer/rules/expression.py | 187 +++++++ hcl2/rule_transformer/rules/token_sequence.py | 63 +++ hcl2/rule_transformer/rules/whitespace.py | 36 ++ hcl2/rule_transformer/transformer.py | 118 +++++ hcl2/rule_transformer/utils.py | 23 + hcl2/serialization.py | 496 ------------------ 12 files changed, 654 insertions(+), 597 deletions(-) delete mode 100644 hcl2/rule_transformer.py create mode 100644 hcl2/rule_transformer/__init__.py create mode 100644 hcl2/rule_transformer/json.py create mode 100644 hcl2/rule_transformer/rules/__init__.py create mode 100644 hcl2/rule_transformer/rules/abstract.py create mode 100644 hcl2/rule_transformer/rules/base.py create mode 100644 hcl2/rule_transformer/rules/expression.py create mode 100644 hcl2/rule_transformer/rules/token_sequence.py create mode 100644 hcl2/rule_transformer/rules/whitespace.py create mode 100644 hcl2/rule_transformer/transformer.py create mode 100644 hcl2/rule_transformer/utils.py delete mode 100644 hcl2/serialization.py diff --git a/hcl2/rule_transformer.py b/hcl2/rule_transformer.py deleted file mode 100644 index 8f0b922a..00000000 --- a/hcl2/rule_transformer.py +++ /dev/null @@ -1,101 +0,0 @@ -# pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - -from lark import Transformer, Tree, Token -from lark.visitors import _Leaf_T, _Return_T, Discard - -from hcl2.serialization import ( - LarkRule, - LarkToken, - StartRule, - BodyRule, - BlockRule, - IdentifierRule, - IntLitRule, - FloatLitRule, - StringLitRule, - ExprTermRule, - ConditionalRule, - BinaryOpRule, - BinaryOperatorRule, - BinaryTermRule, - UnaryOpRule, - AttributeRule, - NewLineOrCommentRule, -) - -ArgsType = List[Union[Token, Tree]] - - -class RuleTransformer(Transformer): - """Takes a syntax tree generated by the parser and - transforms it to a tree of LarkRule instances - """ - - with_meta: bool - - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} - - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments - super().__init__() - - def start(self, args: ArgsType) -> StartRule: - return StartRule(args) - - def body(self, args: ArgsType) -> BodyRule: - return BodyRule(args) - - def block(self, args: ArgsType) -> BlockRule: - return BlockRule(args) - - def identifier(self, args: ArgsType) -> IdentifierRule: - return IdentifierRule(args) - - def int_lit(self, args: ArgsType) -> IntLitRule: - return IntLitRule(args) - - def float_lit(self, args: ArgsType) -> FloatLitRule: - return FloatLitRule(args) - - def string_lit(self, args: ArgsType) -> StringLitRule: - return StringLitRule(args) - - def expr_term(self, args: ArgsType) -> ExprTermRule: - return ExprTermRule(args) - - def conditional(self, args: ArgsType) -> ConditionalRule: - return ConditionalRule(args) - - def binary_operator(self, args: ArgsType) -> BinaryOperatorRule: - return BinaryOperatorRule(args) - - def binary_term(self, args: ArgsType) -> BinaryTermRule: - return BinaryTermRule(args) - - def unary_op(self, args: ArgsType) -> UnaryOpRule: - return UnaryOpRule(args) - - def binary_op(self, args: ArgsType) -> BinaryOpRule: - return BinaryOpRule(args) - - def attribute(self, args: ArgsType) -> AttributeRule: - return AttributeRule(args) - - def new_line_or_comment(self, args: ArgsType) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args) - return Discard - - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) - - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) diff --git a/hcl2/rule_transformer/__init__.py b/hcl2/rule_transformer/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/json.py b/hcl2/rule_transformer/json.py new file mode 100644 index 00000000..647b6683 --- /dev/null +++ b/hcl2/rule_transformer/json.py @@ -0,0 +1,12 @@ +from json import JSONEncoder +from typing import Any + +from hcl2.rule_transformer.rules.abstract import LarkRule + + +class LarkEncoder(JSONEncoder): + def default(self, obj: Any): + if isinstance(obj, LarkRule): + return obj.serialize() + else: + return super().default(obj) diff --git a/hcl2/rule_transformer/rules/__init__.py b/hcl2/rule_transformer/rules/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py new file mode 100644 index 00000000..37f63a03 --- /dev/null +++ b/hcl2/rule_transformer/rules/abstract.py @@ -0,0 +1,93 @@ +from abc import ABC, abstractmethod +from typing import Any, Union, List, Optional + +from lark import Token, Tree +from lark.tree import Meta + + +class LarkElement(ABC): + @abstractmethod + def tree(self) -> Token: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + +class LarkToken(LarkElement): + def __init__(self, name: str, value: Union[str, int]): + self._name = name + self._value = value + + @property + def name(self) -> str: + return self._name + + @property + def value(self): + return self._value + + def serialize(self) -> Any: + return self._value + + def tree(self) -> Token: + return Token(self.name, self.value) + + def __str__(self) -> str: + return str(self._value) + + def __repr__(self) -> str: + return f"" + + +EQ_Token = LarkToken +COLON_TOKEN = LarkToken +LPAR_TOKEN = LarkToken # left parenthesis +RPAR_TOKEN = LarkToken # right parenthesis + + +class TokenSequence: + def __init__(self, tokens: List[LarkToken]): + self.tokens = tokens + + def tree(self) -> List[Token]: + return [token.tree() for token in self.tokens] + + def joined(self): + return "".join(str(token) for token in self.tokens) + + +class LarkRule(ABC): + @staticmethod + @abstractmethod + def rule_name() -> str: + raise NotImplementedError() + + @abstractmethod + def serialize(self) -> Any: + raise NotImplementedError() + + @property + def children(self) -> List[LarkElement]: + return self._children + + def tree(self) -> Tree: + result_children = [] + for child in self._children: + if child is None: + continue + + if isinstance(child, TokenSequence): + result_children.extend(child.tree()) + else: + result_children.append(child.tree()) + + return Tree(self.rule_name(), result_children) + + def __init__(self, children, meta: Optional[Meta] = None): + self._children = children + self._meta = meta + + def __repr__(self): + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py new file mode 100644 index 00000000..f46d8039 --- /dev/null +++ b/hcl2/rule_transformer/rules/base.py @@ -0,0 +1,122 @@ +from typing import Tuple, Any, List, Union, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.token_sequence import IdentifierRule + +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class AttributeRule(LarkRule): + _children: Tuple[ + IdentifierRule, + EQ_Token, + Expression, + ] + + @staticmethod + def rule_name() -> str: + return "attribute" + + @property + def identifier(self) -> IdentifierRule: + return self._children[0] + + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self) -> Any: + return {self.identifier.serialize(): self.expression.serialize()} + + +class BodyRule(LarkRule): + + _children: List[ + Union[ + NewLineOrCommentRule, + AttributeRule, + "BlockRule", + ] + ] + + @staticmethod + def rule_name() -> str: + return "body" + + def serialize(self) -> Any: + blocks: List[BlockRule] = [] + attributes: List[AttributeRule] = [] + comments = [] + + for child in self._children: + if isinstance(child, BlockRule): + blocks.append(child) + if isinstance(child, AttributeRule): + attributes.append(child) + if isinstance(child, NewLineOrCommentRule): + child_comments = child.actual_comments() + if child_comments: + comments.extend(child_comments) + + result = {} + + for attribute in attributes: + result.update( + {attribute.identifier.serialize(): attribute.expression.serialize()} + ) + + result.update( + {block.labels[0].serialize(): block.serialize() for block in blocks} + ) + + if comments: + result["__comments__"] = comments + + return result + + +class StartRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "start" + + @property + def body(self) -> BodyRule: + return self._children[0] + + def serialize(self) -> Any: + return self.body.serialize() + + +class BlockRule(LarkRule): + + _children: Tuple[BodyRule] + + @staticmethod + def rule_name() -> str: + return "block" + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children) + *self._labels, self._body = children + + @property + def labels(self) -> List[IdentifierRule]: + return list(filter(lambda label: label is not None, self._labels)) + + @property + def body(self) -> BodyRule: + return self._body + + def serialize(self) -> BodyRule: + result = self._body.serialize() + labels = self._labels + for label in reversed(labels[1:]): + result = {label.serialize(): result} + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py new file mode 100644 index 00000000..2a38912a --- /dev/null +++ b/hcl2/rule_transformer/rules/expression.py @@ -0,0 +1,187 @@ +from abc import ABC +from typing import Any, Tuple, Optional, List + +from lark import Tree, Token +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import ( + LarkRule, + LarkToken, + LPAR_TOKEN, + RPAR_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.utils import ( + wrap_into_parentheses, + to_dollar_string, + unwrap_dollar_string, +) + + +class Expression(LarkRule, ABC): + @staticmethod + def rule_name() -> str: + return "expression" + + +class ExprTermRule(Expression): + + _children: Tuple[ + Optional[LPAR_TOKEN], + Optional[NewLineOrCommentRule], + Expression, + Optional[NewLineOrCommentRule], + Optional[RPAR_TOKEN], + ] + + @staticmethod + def rule_name() -> str: + return "expr_term" + + def __init__(self, children, meta: Optional[Meta] = None): + self._parentheses = False + if ( + isinstance(children[0], LarkToken) + and children[0].name == "LPAR" + and isinstance(children[-1], LarkToken) + and children[-1].name == "RPAR" + ): + self._parentheses = True + children = children[1:-1] + super().__init__(children, meta) + + @property + def parentheses(self) -> bool: + return self._parentheses + + def serialize(self) -> Any: + result = self._children[0].serialize() + if self.parentheses: + result = wrap_into_parentheses(result) + result = to_dollar_string(result) + return result + + def tree(self) -> Tree: + tree = super().tree() + if self.parentheses: + return Tree( + tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] + ) + return tree + + +class ConditionalRule(LarkRule): + + _children: Tuple[ + Expression, + Expression, + Expression, + ] + + @staticmethod + def rule_name(): + return "conditional" + + @property + def condition(self) -> Expression: + return self._children[0] + + @property + def if_true(self) -> Expression: + return self._children[1] + + @property + def if_false(self) -> Expression: + return self._children[2] + + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def serialize(self) -> Any: + result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + return to_dollar_string(result) + + +class BinaryTermRule(LarkRule): + + _children: Tuple[ + BinaryOperatorRule, + Optional[NewLineOrCommentRule], + ExprTermRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_term" + + def __init__(self, children, meta: Optional[Meta] = None): + if len(children) == 2: + children.insert(1, None) + super().__init__(children, meta) + + @property + def binary_operator(self) -> BinaryOperatorRule: + return self._children[0] + + @property + def comment(self) -> Optional[NewLineOrCommentRule]: + return self._children[1] + + @property + def has_comment(self) -> bool: + return self.comment is not None + + @property + def expr_term(self) -> ExprTermRule: + return self._children[2] + + def serialize(self) -> Any: + return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + + +class BinaryOpRule(Expression): + _children: Tuple[ + ExprTermRule, + BinaryTermRule, + NewLineOrCommentRule, + ] + + @staticmethod + def rule_name() -> str: + return "binary_op" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def binary_term(self) -> BinaryTermRule: + return self._children[1] + + def serialize(self) -> Any: + lhs = self.expr_term.serialize() + operator = self.binary_term.binary_operator.serialize() + rhs = self.binary_term.expr_term.serialize() + rhs = unwrap_dollar_string(rhs) + return to_dollar_string(f"{lhs} {operator} {rhs}") + + +class UnaryOpRule(Expression): + + _children: Tuple[LarkToken, ExprTermRule] + + @staticmethod + def rule_name() -> str: + return "unary_op" + + @property + def operator(self) -> str: + return str(self._children[0]) + + @property + def expr_term(self): + return self._children[1] + + def serialize(self) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py new file mode 100644 index 00000000..66e22e2f --- /dev/null +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -0,0 +1,63 @@ +from abc import ABC +from typing import Tuple, Any, List, Optional + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken + + +class TokenSequenceRule(LarkRule, ABC): + + _children: Tuple[TokenSequence] + + def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): + children = [TokenSequence(children)] + super().__init__(children) + + def serialize(self) -> Any: + return self._children[0].joined() + + +class IdentifierRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "identifier" + + def serialize(self) -> str: + return str(super().serialize()) + + +class IntLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "int_lit" + + def serialize(self) -> float: + return int(super().serialize()) + + +class FloatLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "float_lit" + + def serialize(self) -> float: + return float(super().serialize()) + + +class StringLitRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "STRING_LIT" + + def serialize(self) -> str: + return str(super().serialize()) + + +class BinaryOperatorRule(TokenSequenceRule): + @staticmethod + def rule_name() -> str: + return "binary_operator" + + def serialize(self) -> str: + return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py new file mode 100644 index 00000000..f56a386e --- /dev/null +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -0,0 +1,36 @@ +from typing import Optional, List, Any + +from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule + + +class NewLineOrCommentRule(LarkRule): + + _children: List[LarkToken] + + @staticmethod + def rule_name() -> str: + return "new_line_or_comment" + + def serialize(self) -> Any: + return TokenSequence(self._children).joined() + + def actual_comments(self) -> Optional[List[str]]: + comment = self.serialize() + if comment == "\n": + return None + + comment = comment.strip() + comments = comment.split("\n") + + result = [] + for comment in comments: + if comment.startswith("//"): + comment = comment[2:] + + elif comment.startswith("#"): + comment = comment[1:] + + if comment != "": + result.append(comment.strip()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py new file mode 100644 index 00000000..9e6af6ef --- /dev/null +++ b/hcl2/rule_transformer/transformer.py @@ -0,0 +1,118 @@ +# pylint: disable=missing-function-docstring,unused-argument +from typing import List, Union + +from lark import Transformer, Tree, Token +from lark.tree import Meta +from lark.visitors import _Leaf_T, Discard, v_args + +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.base import ( + StartRule, + BodyRule, + BlockRule, + AttributeRule, +) +from hcl2.rule_transformer.rules.expression import ( + BinaryTermRule, + ConditionalRule, + ExprTermRule, + BinaryOpRule, + UnaryOpRule, +) +from hcl2.rule_transformer.rules.token_sequence import ( + IdentifierRule, + IntLitRule, + FloatLitRule, + StringLitRule, + BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +class RuleTransformer(Transformer): + """Takes a syntax tree generated by the parser and + transforms it to a tree of LarkRule instances + """ + + with_meta: bool + + @staticmethod + def is_type_keyword(value: str) -> bool: + return value in {"bool", "number", "string"} + + def __init__(self, with_meta: bool = False, with_comments: bool = True): + """ + :param with_meta: If set to true then adds `__start_line__` and `__end_line__` + parameters to the output dict. Default to false. + """ + self._with_meta = with_meta + self._with_comments = with_comments + super().__init__() + + def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: + return super().transform(tree) + + def __default_token__(self, token: Token) -> LarkToken: + return LarkToken(token.type, token.value) + + @v_args(meta=True) + def start(self, meta: Meta, args) -> StartRule: + return StartRule(args, meta) + + @v_args(meta=True) + def body(self, meta: Meta, args) -> BodyRule: + return BodyRule(args, meta) + + @v_args(meta=True) + def block(self, meta: Meta, args) -> BlockRule: + return BlockRule(args, meta) + + @v_args(meta=True) + def identifier(self, meta: Meta, args) -> IdentifierRule: + return IdentifierRule(args, meta) + + @v_args(meta=True) + def int_lit(self, meta: Meta, args) -> IntLitRule: + return IntLitRule(args, meta) + + @v_args(meta=True) + def float_lit(self, meta: Meta, args) -> FloatLitRule: + return FloatLitRule(args, meta) + + @v_args(meta=True) + def string_lit(self, meta: Meta, args) -> StringLitRule: + return StringLitRule(args, meta) + + @v_args(meta=True) + def expr_term(self, meta: Meta, args) -> ExprTermRule: + return ExprTermRule(args, meta) + + @v_args(meta=True) + def conditional(self, meta: Meta, args) -> ConditionalRule: + return ConditionalRule(args, meta) + + @v_args(meta=True) + def binary_operator(self, meta: Meta, args) -> BinaryOperatorRule: + return BinaryOperatorRule(args, meta) + + @v_args(meta=True) + def binary_term(self, meta: Meta, args) -> BinaryTermRule: + return BinaryTermRule(args, meta) + + @v_args(meta=True) + def unary_op(self, meta: Meta, args) -> UnaryOpRule: + return UnaryOpRule(args, meta) + + @v_args(meta=True) + def binary_op(self, meta: Meta, args) -> BinaryOpRule: + return BinaryOpRule(args, meta) + + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self._with_comments: + return NewLineOrCommentRule(args, meta) + return Discard diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py new file mode 100644 index 00000000..060d3b53 --- /dev/null +++ b/hcl2/rule_transformer/utils.py @@ -0,0 +1,23 @@ +def is_dollar_string(value: str) -> bool: + if not isinstance(value, str): + return False + return value.startswith("${") and value.endswith("}") + + +def to_dollar_string(value: str) -> str: + if not is_dollar_string(value): + return f"${{{value}}}" + return value + + +def unwrap_dollar_string(value: str) -> str: + if is_dollar_string(value): + return value[2:-1] + return value + + +def wrap_into_parentheses(value: str) -> str: + if is_dollar_string(value): + value = unwrap_dollar_string(value) + return to_dollar_string(f"({value})") + return f"({value})" diff --git a/hcl2/serialization.py b/hcl2/serialization.py deleted file mode 100644 index 15d75caa..00000000 --- a/hcl2/serialization.py +++ /dev/null @@ -1,496 +0,0 @@ -from abc import ABC, abstractmethod -from json import JSONEncoder -from typing import List, Any, Union, Tuple, Optional - -from lark import Tree, Token - -ArgsType = List["LarkElement"] - - -def is_dollar_string(value: str) -> bool: - return value.startswith("${") and value.endswith("}") - - -def to_dollar_string(value: str) -> str: - if not is_dollar_string(value): - return f"${{{value}}}" - return value - - -def unwrap_dollar_string(value: str) -> str: - if is_dollar_string(value): - return value[2:-1] - return value - - -def wrap_into_parentheses(value: str) -> str: - if is_dollar_string(value): - value = unwrap_dollar_string(value) - return to_dollar_string(f"({value})") - - return f"({value})" - - -class LarkEncoder(JSONEncoder): - def default(self, obj: Any): - if isinstance(obj, LarkRule): - return obj.serialize() - else: - return super().default(obj) - - -class LarkElement(ABC): - @abstractmethod - def tree(self) -> Token: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name - self._value = value - - @property - def name(self) -> str: - return self._name - - @property - def value(self): - return self._value - - def serialize(self) -> Any: - return self._value - - def tree(self) -> Token: - return Token(self.name, self.value) - - def __str__(self) -> str: - return str(self._value) - - def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken - - -class TokenSequence: - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] - - def joined(self): - return "".join(str(token) for token in self.tokens) - - -class LarkRule(ABC): - _classes = [] - - @staticmethod - @abstractmethod - def rule_name() -> str: - raise NotImplementedError() - - @abstractmethod - def serialize(self) -> Any: - raise NotImplementedError() - - def tree(self) -> Tree: - result_children = [] - for child in self._children: - if child is None: - continue - - if isinstance(child, TokenSequence): - result_children.extend(child.tree()) - else: - result_children.append(child.tree()) - - return Tree(self.rule_name(), result_children) - - def __init__(self, children): - self._children: List[LarkElement] = children - - def __init_subclass__(cls, **kwargs): - cls._classes.append(cls) - - def __repr__(self): - return f"" - - -class StartRule(LarkRule): - - _children: Tuple["BodyRule"] - - @staticmethod - def rule_name() -> str: - return "start" - - @property - def body(self) -> "BodyRule": - return self._children[0] - - def serialize(self) -> Any: - return self.body.serialize() - - -class BodyRule(LarkRule): - - _children: List[ - Union[ - "NewLineOrCommentRule", - "AttributeRule", - "BlockRule", - ] - ] - - @staticmethod - def rule_name() -> str: - return "body" - - def serialize(self) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] - comments = [] - - for child in self._children: - if isinstance(child, BlockRule): - blocks.append(child) - if isinstance(child, AttributeRule): - attributes.append(child) - if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() - if child_comments: - comments.extend(child_comments) - - result = {} - - for attribute in attributes: - result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} - ) - - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) - - if comments: - result["__comments__"] = comments - - return result - - -class BlockRule(LarkRule): - @staticmethod - def rule_name() -> str: - return "block" - - def __init__(self, children): - super().__init__(children) - *self._labels, self._body = children - - @property - def labels(self) -> List["IdentifierRule"]: - return list(filter(lambda label: label is not None, self._labels)) - - @property - def body(self) -> BodyRule: - return self._body - - def serialize(self) -> BodyRule: - result = self._body.serialize() - labels = self._labels - for label in reversed(labels[1:]): - result = {label.serialize(): result} - return result - - -class IdentifierRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "identifier" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class IntLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "int_lit" - - def __init__(self, children): - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class FloatLitRule(LarkRule): - - _children: Tuple[TokenSequence] - - @staticmethod - def rule_name() -> str: - return "float_lit" - - def __init__(self, children): - print("float_lit", children) - children = [TokenSequence(children)] - super().__init__(children) - - def serialize(self) -> Any: - return self._children[0].joined() - - -class StringLitRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "STRING_LIT" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined()[1:-1] - - -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: - return "expression" - - -class ExprTermRule(Expression): - @staticmethod - def rule_name() -> str: - return "expr_term" - - def __init__(self, children): - self._parentheses = False - if ( - isinstance(children[0], LarkToken) - and children[0].name == "LPAR" - and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" - ): - self._parentheses = True - children = children[1:-1] - super().__init__(children) - - @property - def parentheses(self) -> bool: - return self._parentheses - - def serialize(self) -> Any: - result = self._children[0].serialize() - if self._parentheses: - result = wrap_into_parentheses(result) - result = to_dollar_string(result) - return result - - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - - -class ConditionalRule(ExprTermRule): - - _children: Tuple[ - Expression, - Expression, - Expression, - ] - - @staticmethod - def rule_name(): - return "conditional" - - @property - def condition(self) -> Expression: - return self._children[0] - - @property - def if_true(self) -> Expression: - return self._children[1] - - @property - def if_false(self) -> Expression: - return self._children[2] - - def __init__(self, children): - super().__init__(children) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" - return to_dollar_string(result) - - -class BinaryOperatorRule(LarkRule): - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "binary_operator" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - -class BinaryTermRule(LarkRule): - _children: Tuple[ - BinaryOperatorRule, - Optional["NewLineOrCommentRule"], - ExprTermRule, - ] - - @staticmethod - def rule_name() -> str: - return "binary_term" - - def __init__(self, children): - if len(children) == 2: - children.insert(1, None) - super().__init__(children) - - @property - def binary_operator(self) -> BinaryOperatorRule: - return self._children[0] - - @property - def comment(self) -> Optional["NewLineOrCommentRule"]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - - @property - def expr_term(self) -> ExprTermRule: - return self._children[2] - - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" - - -class UnaryOpRule(Expression): - _children: Tuple[LarkToken, ExprTermRule] - - @staticmethod - def rule_name() -> str: - return "unary_op" - - @property - def unary_operator(self) -> str: - return str(self._children[0]) - - @property - def expr_term(self): - return self._children[1] - - def serialize(self) -> Any: - return to_dollar_string(f"{self.unary_operator}{self.expr_term.serialize()}") - - -class BinaryOpRule(Expression): - _children: Tuple[ - ExprTermRule, - BinaryTermRule, - "NewLineOrCommentRule", - ] - - @staticmethod - def rule_name() -> str: - return "binary_op" - - @property - def expr_term(self) -> ExprTermRule: - return self._children[0] - - @property - def binary_term(self) -> BinaryTermRule: - return self._children[1] - - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") - - -class AttributeRule(LarkRule): - _children: Tuple[ - IdentifierRule, - EQ_Token, - Expression, - ] - - @staticmethod - def rule_name() -> str: - return "attribute" - - @property - def identifier(self) -> IdentifierRule: - return self._children[0] - - @property - def expression(self) -> Expression: - return self._children[2] - - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} - - -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: - return "new_line_or_comment" - - def serialize(self) -> Any: - return TokenSequence(self._children).joined() - - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() - if comment == "\n": - return None - - comment = comment.strip() - comments = comment.split("\n") - - result = [] - for comment in comments: - if comment.startswith("//"): - comment = comment[2:] - - elif comment.startswith("#"): - comment = comment[1:] - - if comment != "": - result.append(comment.strip()) - - return result From d9c2eca1f99a7edf9b6e16603755c5113dc8a8d7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Apr 2025 16:19:11 +0200 Subject: [PATCH 03/11] batch of different changes --- hcl2/rule_transformer/rules/abstract.py | 32 +++--- hcl2/rule_transformer/rules/base.py | 55 ++++++--- hcl2/rule_transformer/rules/expression.py | 104 +++++++++++------- hcl2/rule_transformer/rules/token_sequence.py | 31 +++--- hcl2/rule_transformer/rules/whitespace.py | 11 +- hcl2/rule_transformer/transformer.py | 12 +- hcl2/rule_transformer/utils.py | 9 ++ 7 files changed, 152 insertions(+), 102 deletions(-) diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 37f63a03..6c650ea3 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -4,14 +4,16 @@ from lark import Token, Tree from lark.tree import Meta +from hcl2.rule_transformer.utils import SerializationOptions + class LarkElement(ABC): @abstractmethod - def tree(self) -> Token: + def reverse(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @@ -28,10 +30,10 @@ def name(self) -> str: def value(self): return self._value - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self._value - def tree(self) -> Token: + def reverse(self) -> Token: return Token(self.name, self.value) def __str__(self) -> str: @@ -47,45 +49,45 @@ def __repr__(self) -> str: RPAR_TOKEN = LarkToken # right parenthesis -class TokenSequence: +class TokenSequence(LarkElement): def __init__(self, tokens: List[LarkToken]): self.tokens = tokens - def tree(self) -> List[Token]: - return [token.tree() for token in self.tokens] + def reverse(self) -> List[Token]: + return [token.reverse() for token in self.tokens] - def joined(self): + def serialize(self, options: SerializationOptions = SerializationOptions()): return "".join(str(token) for token in self.tokens) -class LarkRule(ABC): +class LarkRule(LarkElement, ABC): @staticmethod @abstractmethod def rule_name() -> str: raise NotImplementedError() @abstractmethod - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: raise NotImplementedError() @property def children(self) -> List[LarkElement]: return self._children - def tree(self) -> Tree: + def reverse(self) -> Tree: result_children = [] for child in self._children: if child is None: continue if isinstance(child, TokenSequence): - result_children.extend(child.tree()) + result_children.extend(child.reverse()) else: - result_children.append(child.tree()) + result_children.append(child.reverse()) - return Tree(self.rule_name(), result_children) + return Tree(self.rule_name(), result_children, meta=self._meta) - def __init__(self, children, meta: Optional[Meta] = None): + def __init__(self, children: List, meta: Optional[Meta] = None): self._children = children self._meta = meta diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index f46d8039..76d014e9 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import Tuple, Any, List, Union, Optional from lark.tree import Meta @@ -7,6 +8,7 @@ from hcl2.rule_transformer.rules.token_sequence import IdentifierRule from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions class AttributeRule(LarkRule): @@ -28,8 +30,8 @@ def identifier(self) -> IdentifierRule: def expression(self) -> Expression: return self._children[2] - def serialize(self) -> Any: - return {self.identifier.serialize(): self.expression.serialize()} + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return {self.identifier.serialize(options): self.expression.serialize(options)} class BodyRule(LarkRule): @@ -46,18 +48,23 @@ class BodyRule(LarkRule): def rule_name() -> str: return "body" - def serialize(self) -> Any: + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] - + inline_comments = [] for child in self._children: + if isinstance(child, BlockRule): blocks.append(child) + if isinstance(child, AttributeRule): attributes.append(child) + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) + if isinstance(child, NewLineOrCommentRule): - child_comments = child.actual_comments() + child_comments = child.to_list() if child_comments: comments.extend(child_comments) @@ -65,15 +72,27 @@ def serialize(self) -> Any: for attribute in attributes: result.update( - {attribute.identifier.serialize(): attribute.expression.serialize()} + { + attribute.identifier.serialize( + options + ): attribute.expression.serialize(options) + } ) - result.update( - {block.labels[0].serialize(): block.serialize() for block in blocks} - ) + result_blocks = defaultdict(list) + for block in blocks: + name = block.labels[0].serialize(options) + if name in result.keys(): + raise RuntimeError(f"Attribute {name} is already defined.") + result_blocks[name].append(block.serialize(options)) + + result.update(**result_blocks) - if comments: - result["__comments__"] = comments + if options.with_comments: + if comments: + result["__comments__"] = comments + if inline_comments: + result["__inline_comments__"] = inline_comments return result @@ -90,8 +109,8 @@ def rule_name() -> str: def body(self) -> BodyRule: return self._children[0] - def serialize(self) -> Any: - return self.body.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.body.serialize(options) class BlockRule(LarkRule): @@ -103,7 +122,7 @@ def rule_name() -> str: return "block" def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children) + super().__init__(children, meta) *self._labels, self._body = children @property @@ -114,9 +133,11 @@ def labels(self) -> List[IdentifierRule]: def body(self) -> BodyRule: return self._body - def serialize(self) -> BodyRule: - result = self._body.serialize() + def serialize( + self, options: SerializationOptions = SerializationOptions() + ) -> BodyRule: + result = self._body.serialize(options) labels = self._labels for label in reversed(labels[1:]): - result = {label.serialize(): result} + result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 2a38912a..16daf310 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -16,6 +16,7 @@ wrap_into_parentheses, to_dollar_string, unwrap_dollar_string, + SerializationOptions, ) @@ -24,10 +25,35 @@ class Expression(LarkRule, ABC): def rule_name() -> str: return "expression" + def __init__(self, children, meta: Optional[Meta] = None): + super().__init__(children, meta) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + result.extend(child.to_list()) + + elif isinstance(child, Expression): + result.extend(child.inline_comments()) + + return result + + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + class ExprTermRule(Expression): - _children: Tuple[ + type_ = Tuple[ Optional[LPAR_TOKEN], Optional[NewLineOrCommentRule], Expression, @@ -35,6 +61,8 @@ class ExprTermRule(Expression): Optional[RPAR_TOKEN], ] + _children: type_ + @staticmethod def rule_name() -> str: return "expr_term" @@ -48,34 +76,36 @@ def __init__(self, children, meta: Optional[Meta] = None): and children[-1].name == "RPAR" ): self._parentheses = True - children = children[1:-1] + else: + children = [None, *children, None] + + self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @property def parentheses(self) -> bool: return self._parentheses - def serialize(self) -> Any: - result = self._children[0].serialize() + @property + def expression(self) -> Expression: + return self._children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) return result - def tree(self) -> Tree: - tree = super().tree() - if self.parentheses: - return Tree( - tree.data, [Token("LPAR", "("), *tree.children, Token("RPAR", ")")] - ) - return tree - -class ConditionalRule(LarkRule): +class ConditionalRule(Expression): _children: Tuple[ Expression, + Optional[NewLineOrCommentRule], Expression, + Optional[NewLineOrCommentRule], + Optional[NewLineOrCommentRule], Expression, ] @@ -83,27 +113,28 @@ class ConditionalRule(LarkRule): def rule_name(): return "conditional" + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 4]) + super().__init__(children, meta) + @property def condition(self) -> Expression: return self._children[0] @property def if_true(self) -> Expression: - return self._children[1] + return self._children[2] @property def if_false(self) -> Expression: - return self._children[2] + return self._children[5] - def __init__(self, children, meta: Optional[Meta] = None): - super().__init__(children, meta) - - def serialize(self) -> Any: - result = f"{self.condition.serialize()} ? {self.if_true.serialize()} : {self.if_false.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) -class BinaryTermRule(LarkRule): +class BinaryTermRule(Expression): _children: Tuple[ BinaryOperatorRule, @@ -116,28 +147,19 @@ def rule_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - if len(children) == 2: - children.insert(1, None) + self._possibly_insert_null_comments(children, [1]) super().__init__(children, meta) @property def binary_operator(self) -> BinaryOperatorRule: return self._children[0] - @property - def comment(self) -> Optional[NewLineOrCommentRule]: - return self._children[1] - - @property - def has_comment(self) -> bool: - return self.comment is not None - @property def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self) -> Any: - return f"{self.binary_operator.serialize()} {self.expr_term.serialize()}" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" class BinaryOpRule(Expression): @@ -159,10 +181,14 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self) -> Any: - lhs = self.expr_term.serialize() - operator = self.binary_term.binary_operator.serialize() - rhs = self.binary_term.expr_term.serialize() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + lhs = self.expr_term.serialize(options) + operator = self.binary_term.binary_operator.serialize(options) + rhs = self.binary_term.expr_term.serialize(options) + # below line is to avoid dollar string nested inside another dollar string, e.g.: + # hcl2: 15 + (10 * 12) + # desired json: "${15 + (10 * 12)}" + # undesired json: "${15 + ${(10 * 12)}}" rhs = unwrap_dollar_string(rhs) return to_dollar_string(f"{lhs} {operator} {rhs}") @@ -183,5 +209,5 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize()}") + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 66e22e2f..174e2510 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,9 +1,10 @@ from abc import ABC -from typing import Tuple, Any, List, Optional +from typing import Tuple, Any, List, Optional, Type from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions class TokenSequenceRule(LarkRule, ABC): @@ -12,10 +13,13 @@ class TokenSequenceRule(LarkRule, ABC): def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): children = [TokenSequence(children)] - super().__init__(children) + super().__init__(children, meta) - def serialize(self) -> Any: - return self._children[0].joined() + def serialized_type(self) -> Type: + return str + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.serialized_type()(self._children[0].serialize(options)) class IdentifierRule(TokenSequenceRule): @@ -23,17 +27,14 @@ class IdentifierRule(TokenSequenceRule): def rule_name() -> str: return "identifier" - def serialize(self) -> str: - return str(super().serialize()) - class IntLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "int_lit" - def serialize(self) -> float: - return int(super().serialize()) + def serialized_type(self) -> Type: + return int class FloatLitRule(TokenSequenceRule): @@ -41,23 +42,19 @@ class FloatLitRule(TokenSequenceRule): def rule_name() -> str: return "float_lit" - def serialize(self) -> float: - return float(super().serialize()) + def serialized_type(self) -> Type: + return float class StringLitRule(TokenSequenceRule): @staticmethod def rule_name() -> str: + # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; + # nevertheless, try to change it to a rule in hcl2.lark return "STRING_LIT" - def serialize(self) -> str: - return str(super().serialize()) - class BinaryOperatorRule(TokenSequenceRule): @staticmethod def rule_name() -> str: return "binary_operator" - - def serialize(self) -> str: - return str(super().serialize()) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index f56a386e..b37cedc4 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,6 +1,7 @@ from typing import Optional, List, Any from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.utils import SerializationOptions class NewLineOrCommentRule(LarkRule): @@ -11,11 +12,13 @@ class NewLineOrCommentRule(LarkRule): def rule_name() -> str: return "new_line_or_comment" - def serialize(self) -> Any: - return TokenSequence(self._children).joined() + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return TokenSequence(self._children).serialize(options) - def actual_comments(self) -> Optional[List[str]]: - comment = self.serialize() + def to_list( + self, options: SerializationOptions = SerializationOptions() + ) -> Optional[List[str]]: + comment = self.serialize(options) if comment == "\n": return None diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 9e6af6ef..1c7d6157 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -40,13 +40,7 @@ class RuleTransformer(Transformer): def is_type_keyword(value: str) -> bool: return value in {"bool", "number", "string"} - def __init__(self, with_meta: bool = False, with_comments: bool = True): - """ - :param with_meta: If set to true then adds `__start_line__` and `__end_line__` - parameters to the output dict. Default to false. - """ - self._with_meta = with_meta - self._with_comments = with_comments + def __init__(self): super().__init__() def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: @@ -113,6 +107,4 @@ def attribute(self, meta: Meta, args) -> AttributeRule: @v_args(meta=True) def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - if self._with_comments: - return NewLineOrCommentRule(args, meta) - return Discard + return NewLineOrCommentRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 060d3b53..e083d628 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,3 +1,12 @@ +from dataclasses import dataclass + + +@dataclass +class SerializationOptions: + with_comments: bool = True + with_meta: bool = False + + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): return False From 448ffd42050489eb92bbc5855a0905b04436c51f Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 4 Apr 2025 10:29:47 +0200 Subject: [PATCH 04/11] comments --- hcl2/rule_transformer/rules/whitespace.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index b37cedc4..96fe7c91 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -22,16 +22,19 @@ def to_list( if comment == "\n": return None - comment = comment.strip() comments = comment.split("\n") result = [] for comment in comments: - if comment.startswith("//"): - comment = comment[2:] + comment = comment.strip() - elif comment.startswith("#"): - comment = comment[1:] + for delimiter in ("//", "/*", "#"): + + if comment.startswith(delimiter): + comment = comment[len(delimiter) :] + + if comment.endswith("*/"): + comment = comment[:-2] if comment != "": result.append(comment.strip()) From 65f88bc3e7466b09108f4c0504c485d27e164558 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 2 Jul 2025 17:03:05 +0200 Subject: [PATCH 05/11] various changes --- hcl2/parser.py | 4 +- hcl2/rule_transformer/editor.py | 77 ++++++ hcl2/rule_transformer/hcl2.lark | 166 +++++++++++ hcl2/rule_transformer/processor.py | 258 ++++++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 93 ++++--- hcl2/rule_transformer/rules/base.py | 53 ++-- hcl2/rule_transformer/rules/containers.py | 85 ++++++ hcl2/rule_transformer/rules/expression.py | 102 +++---- hcl2/rule_transformer/rules/indexing.py | 75 +++++ hcl2/rule_transformer/rules/literal_rules.py | 47 ++++ hcl2/rule_transformer/rules/strings.py | 50 ++++ hcl2/rule_transformer/rules/token_sequence.py | 116 ++++---- hcl2/rule_transformer/rules/tokens.py | 66 +++++ hcl2/rule_transformer/rules/tree.py | 106 +++++++ hcl2/rule_transformer/rules/whitespace.py | 46 +++- hcl2/rule_transformer/transformer.py | 103 +++++-- hcl2/rule_transformer/utils.py | 8 +- 17 files changed, 1232 insertions(+), 223 deletions(-) create mode 100644 hcl2/rule_transformer/editor.py create mode 100644 hcl2/rule_transformer/hcl2.lark create mode 100644 hcl2/rule_transformer/processor.py create mode 100644 hcl2/rule_transformer/rules/containers.py create mode 100644 hcl2/rule_transformer/rules/indexing.py create mode 100644 hcl2/rule_transformer/rules/literal_rules.py create mode 100644 hcl2/rule_transformer/rules/strings.py create mode 100644 hcl2/rule_transformer/rules/tokens.py create mode 100644 hcl2/rule_transformer/rules/tree.py diff --git a/hcl2/parser.py b/hcl2/parser.py index 79d50122..a0c87e34 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, @@ -29,7 +29,7 @@ def reconstruction_parser() -> Lark: if necessary. """ return Lark.open( - "hcl2.lark", + "rule_transformer/hcl2.lark", parser="lalr", # Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed: # diff --git a/hcl2/rule_transformer/editor.py b/hcl2/rule_transformer/editor.py new file mode 100644 index 00000000..9efce08f --- /dev/null +++ b/hcl2/rule_transformer/editor.py @@ -0,0 +1,77 @@ +import dataclasses +from copy import copy, deepcopy +from typing import List, Optional, Set, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.base import BlockRule, StartRule + + +@dataclasses.dataclass +class TreePathElement: + + name: str + index: int = 0 + + +@dataclasses.dataclass +class TreePath: + + elements: List[TreePathElement] = dataclasses.field(default_factory=list) + + @classmethod + def build(cls, elements: List[Tuple[str, Optional[int]] | str]): + results = [] + for element in elements: + if isinstance(element, tuple): + if len(element) == 1: + result = TreePathElement(element[0], 0) + else: + result = TreePathElement(*element) + else: + result = TreePathElement(element, 0) + + results.append(result) + + return cls(results) + + def __iter__(self): + return self.elements.__iter__() + + def __len__(self): + return self.elements.__len__() + + +class Editor: + def __init__(self, rules_tree: LarkRule): + self.rules_tree = rules_tree + + @classmethod + def _find_one(cls, rules_tree: LarkRule, path_element: TreePathElement) -> LarkRule: + return cls._find_all(rules_tree, path_element.name)[path_element.index] + + @classmethod + def _find_all(cls, rules_tree: LarkRule, rule_name: str) -> List[LarkRule]: + children = [] + print("rule", rules_tree) + print("rule children", rules_tree.children) + for child in rules_tree.children: + if isinstance(child, LarkRule) and child.lark_name() == rule_name: + children.append(child) + + return children + + def find_by_path(self, path: TreePath, rule_name: str) -> List[LarkRule]: + path = deepcopy(path.elements) + + current_rule = self.rules_tree + while len(path) > 0: + current_path, *path = path + print(current_path, path) + current_rule = self._find_one(current_rule, current_path) + + return self._find_all(current_rule, rule_name) + + # def visit(self, path: TreePath) -> "Editor": + # + # while len(path) > 1: + # current = diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark new file mode 100644 index 00000000..a7722118 --- /dev/null +++ b/hcl2/rule_transformer/hcl2.lark @@ -0,0 +1,166 @@ +// ============================================================================ +// Terminals +// ============================================================================ + +// Whitespace and Comments +NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/ + +// Keywords +IF : "if" +IN : "in" +FOR : "for" +FOR_EACH : "for_each" + +// Identifiers and Names +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ +IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH + +// Literals +ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ +STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ +DECIMAL : "0".."9" +NEGATIVE_DECIMAL : "-" DECIMAL +EXP_MARK : ("e" | "E") ("+" | "-")? DECIMAL+ +INT_LITERAL: NEGATIVE_DECIMAL? DECIMAL+ +FLOAT_LITERAL: (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) "." DECIMAL+ (EXP_MARK)? + | (NEGATIVE_DECIMAL? DECIMAL+ | NEGATIVE_DECIMAL+) (EXP_MARK) + +// Operators +BINARY_OP : DOUBLE_EQ | NEQ | LT | GT | LEQ | GEQ | MINUS | ASTERISK | SLASH | PERCENT | DOUBLE_AMP | DOUBLE_PIPE | PLUS +DOUBLE_EQ : "==" +NEQ : "!=" +LT : "<" +GT : ">" +LEQ : "<=" +GEQ : ">=" +MINUS : "-" +ASTERISK : "*" +SLASH : "/" +PERCENT : "%" +DOUBLE_AMP : "&&" +DOUBLE_PIPE : "||" +PLUS : "+" +NOT : "!" +QMARK : "?" + +// Punctuation +LPAR : "(" +RPAR : ")" +LBRACE : "{" +RBRACE : "}" +LSQB : "[" +RSQB : "]" +COMMA : "," +DOT : "." +EQ : /[ \t]*=(?!=|>)/ +COLON : ":" +DBLQUOTE : "\"" + +// Interpolation +INTERP_START : "${" + +// Splat Operators +ATTR_SPLAT : ".*" +FULL_SPLAT_START : "[*]" + +// Special Operators +FOR_OBJECT_ARROW : "=>" +ELLIPSIS : "..." +COLONS: "::" + +// Heredocs +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ + +// Ignore whitespace (but not newlines, as they're significant in HCL) +%ignore /[ \t]+/ + +// ============================================================================ +// Rules +// ============================================================================ + +// Top-level structure +start : body + +// Body and basic constructs +body : (new_line_or_comment? (attribute | block))* new_line_or_comment? +attribute : identifier EQ expression +block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRACE + +// Whitespace and comments +new_line_or_comment: ( NL_OR_COMMENT )+ + +// Basic literals and identifiers +identifier: IDENTIFIER +int_lit: INT_LITERAL +float_lit: FLOAT_LITERAL +string: DBLQUOTE string_part* DBLQUOTE +string_part: STRING_CHARS + | ESCAPED_INTERPOLATION + | interpolation + +// Expressions +?expression : expr_term | operation | conditional +interpolation: INTERP_START expression RBRACE +conditional : expression QMARK new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? expression + +// Operations +?operation : unary_op | binary_op +!unary_op : (MINUS | NOT) expr_term +binary_op : expression binary_term new_line_or_comment? +binary_term : binary_operator new_line_or_comment? expression +!binary_operator : BINARY_OP + +// Expression terms +expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR + | float_lit + | int_lit + | string + | tuple + | object + | function_call + | index_expr_term + | get_attr_expr_term + | identifier + | provider_function_call + | heredoc_template + | heredoc_template_trim + | attr_splat_expr_term + | full_splat_expr_term + | for_tuple_expr + | for_object_expr + +// Collections +tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB +object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +object_elem : object_elem_key ( EQ | COLON ) expression +object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression +object_elem_key_expression : LPAR expression RPAR +object_elem_key_dot_accessor : identifier (DOT identifier)+ + +// Heredocs +heredoc_template : HEREDOC_TEMPLATE +heredoc_template_trim : HEREDOC_TEMPLATE_TRIM + +// Functions +function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) +provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR + +// Indexing and attribute access +index_expr_term : expr_term index +get_attr_expr_term : expr_term get_attr +attr_splat_expr_term : expr_term attr_splat +full_splat_expr_term : expr_term full_splat +?index : braces_index | short_index +braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB +short_index : DOT INT_LITERAL +get_attr : DOT identifier +attr_splat : ATTR_SPLAT get_attr* +full_splat : FULL_SPLAT_START (get_attr | index)* + +// For expressions +!for_tuple_expr : LSQB new_line_or_comment? for_intro new_line_or_comment? expression new_line_or_comment? for_cond? new_line_or_comment? RSQB +!for_object_expr : LBRACE new_line_or_comment? for_intro new_line_or_comment? expression FOR_OBJECT_ARROW new_line_or_comment? expression new_line_or_comment? ELLIPSIS? new_line_or_comment? for_cond? new_line_or_comment? RBRACE +!for_intro : FOR new_line_or_comment? identifier (COMMA identifier new_line_or_comment?)? new_line_or_comment? IN new_line_or_comment? expression new_line_or_comment? COLON new_line_or_comment? +!for_cond : IF new_line_or_comment? expression diff --git a/hcl2/rule_transformer/processor.py b/hcl2/rule_transformer/processor.py new file mode 100644 index 00000000..b854aff5 --- /dev/null +++ b/hcl2/rule_transformer/processor.py @@ -0,0 +1,258 @@ +from copy import copy, deepcopy +from typing import ( + List, + Optional, + Union, + Callable, + Any, + Tuple, + Generic, + TypeVar, + cast, + Generator, +) + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.base import BlockRule, AttributeRule +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + +T = TypeVar("T", bound=LarkRule) + + +class RulesProcessor(Generic[T]): + """""" + + @classmethod + def _traverse( + cls, + node: T, + predicate: Callable[[T], bool], + current_depth: int = 0, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor"]: + + results = [] + + if predicate(node): + results.append(cls(node)) + + if max_depth is not None and current_depth >= max_depth: + return results + + for child in node.children: + if child is None or not isinstance(child, LarkRule): + continue + + child_results = cls._traverse( + child, + predicate, + current_depth + 1, + max_depth, + ) + results.extend(child_results) + + return results + + def __init__(self, node: LarkRule): + self.node = node + + @property + def siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children + + @property + def next_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[self.node.index + 1 :] + + @property + def previous_siblings(self): + if self.node.parent is None: + return None + return self.node.parent.children[: self.node.index - 1] + + def walk(self) -> Generator[Tuple["RulesProcessor", List["RulesProcessor"]]]: + child_processors = [self.__class__(child) for child in self.node.children] + yield self, child_processors + for processor in child_processors: + if isinstance(processor.node, LarkRule): + for result in processor.walk(): + yield result + + def find_block( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> "RulesProcessor[BlockRule]": + return self.find_blocks(labels, exact_match, max_depth)[0] + + def find_blocks( + self, + labels: List[str], + exact_match: bool = True, + max_depth: Optional[int] = None, + ) -> List["RulesProcessor[BlockRule]"]: + """ + Find blocks by their labels. + + Args: + labels: List of label strings to match + exact_match: If True, all labels must match exactly. If False, labels can be a subset. + max_depth: Maximum depth to search + + Returns: + ... + """ + + def block_predicate(node: LarkRule) -> bool: + if not isinstance(node, BlockRule): + return False + + node_labels = [label.serialize() for label in node.labels] + + if exact_match: + return node_labels == labels + else: + # Check if labels is a prefix of node_labels + if len(labels) > len(node_labels): + return False + return node_labels[: len(labels)] == labels + + return cast( + List[RulesProcessor[BlockRule]], + self._traverse(self.node, block_predicate, max_depth=max_depth), + ) + + def attribute( + self, name: str, max_depth: Optional[int] = None + ) -> "RulesProcessor[AttributeRule]": + return self.find_attributes(name, max_depth)[0] + + def find_attributes( + self, name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor[AttributeRule]"]: + """ + Find attributes by their identifier name. + + Args: + name: Attribute name to search for + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching attributes + """ + + def attribute_predicate(node: LarkRule) -> bool: + if not isinstance(node, AttributeRule): + return False + return node.identifier.serialize() == name + + return self._traverse(self.node, attribute_predicate, max_depth=max_depth) + + def rule(self, rule_name: str, max_depth: Optional[int] = None): + return self.find_rules(rule_name, max_depth)[0] + + def find_rules( + self, rule_name: str, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules of a specific type. + + Args: + rule_name: Name of the rule type to find + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + + def rule_predicate(node: LarkRule) -> bool: + return node.lark_name() == rule_name + + return self._traverse(self.node, rule_predicate, max_depth=max_depth) + + def find_by_predicate( + self, predicate: Callable[[LarkRule], bool], max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """ + Find all rules matching a custom predicate. + + Args: + predicate: Function that returns True for nodes to collect + max_depth: Maximum depth to search + + Returns: + List of TreePath objects for matching rules + """ + return self._traverse(self.node, predicate, max_depth) + + # Convenience methods + def get_all_blocks(self, max_depth: Optional[int] = None) -> List: + """Get all blocks in the tree.""" + return self.find_rules("block", max_depth) + + def get_all_attributes( + self, max_depth: Optional[int] = None + ) -> List["RulesProcessor"]: + """Get all attributes in the tree.""" + return self.find_rules("attribute", max_depth) + + def previous(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in reversed(self.previous_siblings): + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def next(self, skip_new_line: bool = True) -> Optional["RulesProcessor"]: + """Get the next sibling node.""" + if self.node.parent is None: + return None + + for sibling in self.next_siblings: + if sibling is not None and isinstance(sibling, LarkRule): + if skip_new_line and isinstance(sibling, NewLineOrCommentRule): + continue + return self.__class__(sibling) + + def append_child( + self, new_node: LarkRule, indentation: bool = True + ) -> "RulesProcessor": + children = self.node.children + if indentation: + if isinstance(children[-1], NewLineOrCommentRule): + children.pop() + children.append(NewLineOrCommentRule.from_string("\n ")) + + new_node = deepcopy(new_node) + new_node.set_parent(self.node) + new_node.set_index(len(children)) + children.append(new_node) + return self.__class__(new_node) + + def replace(self, new_node: LarkRule) -> "RulesProcessor": + new_node = deepcopy(new_node) + + self.node.parent.children.pop(self.node.index) + self.node.parent.children.insert(self.node.index, new_node) + new_node.set_parent(self.node.parent) + new_node.set_index(self.node.index) + return self.__class__(new_node) + + # def insert_before(self, new_node: LarkRule) -> bool: + # """Insert a new node before this one.""" + # if self.parent is None or self.parent_index < 0: + # return False + # + # try: + # self.parent.children.insert(self.parent_index, new_node) + # except (IndexError, AttributeError): + # return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 6c650ea3..d3a3b634 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Union, List, Optional +from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree from lark.tree import Meta @@ -8,8 +8,23 @@ class LarkElement(ABC): + @property + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + def __init__(self, index: int = -1, parent: "LarkElement" = None): + self._index = index + self._parent = parent + + def set_index(self, i: int): + self._index = i + + def set_parent(self, node: "LarkElement"): + self._parent = node + @abstractmethod - def reverse(self) -> Any: + def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod @@ -17,53 +32,42 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A raise NotImplementedError() -class LarkToken(LarkElement): - def __init__(self, name: str, value: Union[str, int]): - self._name = name +class LarkToken(LarkElement, ABC): + def __init__(self, value: Union[str, int]): self._value = value + super().__init__() @property - def name(self) -> str: - return self._name + @abstractmethod + def lark_name(self) -> str: + raise NotImplementedError() + + @property + @abstractmethod + def serialize_conversion(self) -> Callable: + raise NotImplementedError() @property def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self._value + def serialize(self, options: SerializationOptions = SerializationOptions()): + return self.serialize_conversion(self.value) - def reverse(self) -> Token: - return Token(self.name, self.value) + def to_lark(self) -> Token: + return Token(self.lark_name, self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" - - -EQ_Token = LarkToken -COLON_TOKEN = LarkToken -LPAR_TOKEN = LarkToken # left parenthesis -RPAR_TOKEN = LarkToken # right parenthesis - - -class TokenSequence(LarkElement): - def __init__(self, tokens: List[LarkToken]): - self.tokens = tokens - - def reverse(self) -> List[Token]: - return [token.reverse() for token in self.tokens] - - def serialize(self, options: SerializationOptions = SerializationOptions()): - return "".join(str(token) for token in self.tokens) + return f"" class LarkRule(LarkElement, ABC): - @staticmethod + @property @abstractmethod - def rule_name() -> str: + def lark_name(self) -> str: raise NotImplementedError() @abstractmethod @@ -74,22 +78,33 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A def children(self) -> List[LarkElement]: return self._children - def reverse(self) -> Tree: + @property + def parent(self): + return self._parent + + @property + def index(self): + return self._index + + def to_lark(self) -> Tree: result_children = [] for child in self._children: if child is None: continue - if isinstance(child, TokenSequence): - result_children.extend(child.reverse()) - else: - result_children.append(child.reverse()) + result_children.append(child.to_lark()) - return Tree(self.rule_name(), result_children, meta=self._meta) + return Tree(self.lark_name, result_children, meta=self._meta) - def __init__(self, children: List, meta: Optional[Meta] = None): + def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): + super().__init__() self._children = children self._meta = meta + for index, child in enumerate(children): + if child is not None: + child.set_index(index) + child.set_parent(self) + def __repr__(self): - return f"" + return f"" diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 76d014e9..6d0c4924 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,9 @@ from lark.tree import Meta -from hcl2.rule_transformer.rules.abstract import LarkRule, EQ_Token +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.token_sequence import IdentifierRule +from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions @@ -13,17 +13,17 @@ class AttributeRule(LarkRule): _children: Tuple[ - IdentifierRule, - EQ_Token, + IdentifierToken, + EQ_TOKEN, Expression, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "attribute" @property - def identifier(self) -> IdentifierRule: + def identifier(self) -> IdentifierToken: return self._children[0] @property @@ -39,13 +39,13 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - AttributeRule, + # AttributeRule, "BlockRule", ] ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "body" def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: @@ -71,13 +71,7 @@ def serialize(self, options: SerializationOptions = SerializationOptions()) -> A result = {} for attribute in attributes: - result.update( - { - attribute.identifier.serialize( - options - ): attribute.expression.serialize(options) - } - ) + result.update(attribute.serialize(options)) result_blocks = defaultdict(list) for block in blocks: @@ -101,14 +95,14 @@ class StartRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "start" - @property def body(self) -> BodyRule: return self._children[0] + @property + def lark_name(self) -> str: + return "start" + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: return self.body.serialize(options) @@ -117,16 +111,19 @@ class BlockRule(LarkRule): _children: Tuple[BodyRule] - @staticmethod - def rule_name() -> str: - return "block" - def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - *self._labels, self._body = children + + *self._labels, self._body = [ + child for child in children if not isinstance(child, LarkToken) + ] + + @property + def lark_name(self) -> str: + return "block" @property - def labels(self) -> List[IdentifierRule]: + def labels(self) -> List[IdentifierToken]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -138,6 +135,6 @@ def serialize( ) -> BodyRule: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels[1:]): + for label in reversed(labels): result = {label.serialize(options): result} return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py new file mode 100644 index 00000000..c39f3ba2 --- /dev/null +++ b/hcl2/rule_transformer/rules/containers.py @@ -0,0 +1,85 @@ +from typing import Tuple, List, Optional, Union, Any + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.literal_rules import ( + FloatLitRule, + IntLitRule, + IdentifierRule, +) +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.tokens import ( + COLON_TOKEN, + EQ_TOKEN, + LBRACE_TOKEN, + COMMA_TOKEN, + RBRACE_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class ObjectElemKeyRule(LarkRule): + _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + @staticmethod + def lark_name() -> str: + return "object_elem_key" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.children[0].serialize(options) + + +class ObjectElemRule(LarkRule): + + _children: Tuple[ + ObjectElemKeyRule, + Union[EQ_TOKEN, COLON_TOKEN], + Expression, + ] + + @staticmethod + def lark_name() -> str: + return "object_elem" + + @property + def key(self) -> ObjectElemKeyRule: + return self.children[0] + + @property + def expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return { + self.children[0].serialize(options): self.children[2].serialize(options) + } + + +class ObjectRule(InlineCommentMixIn): + + _children: Tuple[ + LBRACE_TOKEN, + Optional[NewLineOrCommentRule], + Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], + RBRACE_TOKEN, + ] + + @staticmethod + def lark_name() -> str: + return "object" + + @property + def elements(self) -> List[ObjectElemRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) + ] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + result = {} + for element in self.elements: + result.update(element.serialize()) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expression.py index 16daf310..8a03f813 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expression.py @@ -1,17 +1,18 @@ from abc import ABC -from typing import Any, Tuple, Optional, List +from copy import deepcopy +from typing import Any, Tuple, Optional -from lark import Tree, Token from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import ( - LarkRule, LarkToken, - LPAR_TOKEN, - RPAR_TOKEN, ) -from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.rules.token_sequence import BinaryOperatorRule +from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule +from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, @@ -20,36 +21,14 @@ ) -class Expression(LarkRule, ABC): - @staticmethod - def rule_name() -> str: +class Expression(InlineCommentMixIn, ABC): + @property + def lark_name(self) -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) - def inline_comments(self): - result = [] - for child in self._children: - - if isinstance(child, NewLineOrCommentRule): - result.extend(child.to_list()) - - elif isinstance(child, Expression): - result.extend(child.inline_comments()) - - return result - - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): - for index in indexes: - try: - child = children[index] - except IndexError: - children.insert(index, None) - else: - if not isinstance(child, NewLineOrCommentRule): - children.insert(index, None) - class ExprTermRule(Expression): @@ -63,17 +42,17 @@ class ExprTermRule(Expression): _children: type_ - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].name == "LPAR" + and children[0].lark_name == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].name == "RPAR" + and children[-1].lark_name == "RPAR" ): self._parentheses = True else: @@ -90,11 +69,14 @@ def parentheses(self) -> bool: def expression(self) -> Expression: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: result = self.expression.serialize(options) if self.parentheses: result = wrap_into_parentheses(result) result = to_dollar_string(result) + + if options.unwrap_dollar_string: + result = unwrap_dollar_string(result) return result @@ -102,19 +84,21 @@ class ConditionalRule(Expression): _children: Tuple[ Expression, + QMARK_TOKEN, Optional[NewLineOrCommentRule], Expression, Optional[NewLineOrCommentRule], + COLON_TOKEN, Optional[NewLineOrCommentRule], Expression, ] - @staticmethod - def rule_name(): + @property + def lark_name(self) -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 4]) + self._possibly_insert_null_comments(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -123,13 +107,15 @@ def condition(self) -> Expression: @property def if_true(self) -> Expression: - return self._children[2] + return self._children[3] @property def if_false(self) -> Expression: - return self._children[5] + return self._children[7] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + options = options.replace(unwrap_dollar_string=True) + print(self.condition) result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" return to_dollar_string(result) @@ -142,8 +128,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -166,11 +152,11 @@ class BinaryOpRule(Expression): _children: Tuple[ ExprTermRule, BinaryTermRule, - NewLineOrCommentRule, + Optional[NewLineOrCommentRule], ] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "binary_op" @property @@ -182,23 +168,23 @@ def binary_term(self) -> BinaryTermRule: return self._children[1] def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - lhs = self.expr_term.serialize(options) - operator = self.binary_term.binary_operator.serialize(options) - rhs = self.binary_term.expr_term.serialize(options) - # below line is to avoid dollar string nested inside another dollar string, e.g.: - # hcl2: 15 + (10 * 12) - # desired json: "${15 + (10 * 12)}" - # undesired json: "${15 + ${(10 * 12)}}" - rhs = unwrap_dollar_string(rhs) - return to_dollar_string(f"{lhs} {operator} {rhs}") + children_options = options.replace(unwrap_dollar_string=True) + lhs = self.expr_term.serialize(children_options) + operator = self.binary_term.binary_operator.serialize(children_options) + rhs = self.binary_term.expr_term.serialize(children_options) + + result = f"{lhs} {operator} {rhs}" + if options.unwrap_dollar_string: + return result + return to_dollar_string(result) class UnaryOpRule(Expression): _children: Tuple[LarkToken, ExprTermRule] - @staticmethod - def rule_name() -> str: + @property + def lark_name(self) -> str: return "unary_op" @property diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py new file mode 100644 index 00000000..ce23d040 --- /dev/null +++ b/hcl2/rule_transformer/rules/indexing.py @@ -0,0 +1,75 @@ +from typing import List, Optional, Tuple, Any + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.tokens import ( + DOT_TOKEN, + IntToken, + LSQB_TOKEN, + RSQB_TOKEN, +) +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string + + +class ShortIndexRule(LarkRule): + + _children: Tuple[ + DOT_TOKEN, + IntToken, + ] + + @property + def lark_name(self) -> str: + return "short_index" + + @property + def index(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f".{self.index.serialize(options)}" + + +class SqbIndex(InlineCommentMixIn): + _children: Tuple[ + LSQB_TOKEN, + Optional[NewLineOrCommentRule], + ExprTermRule, + Optional[NewLineOrCommentRule], + RSQB_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "braces_index" + + @property + def index_expression(self): + return self.children[2] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return f"[{self.index_expression.serialize(options)}]" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3]) + super().__init__(children, meta) + + +class IndexExprTermRule(Expression): + + _children: Tuple[ExprTermRule, SqbIndex] + + @property + def lark_name(self) -> str: + return "index_expr_term" + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return to_dollar_string( + f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + ) diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py new file mode 100644 index 00000000..06ca99ae --- /dev/null +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -0,0 +1,47 @@ +from abc import ABC +from typing import Any, Tuple + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +from hcl2.rule_transformer.utils import SerializationOptions + + +class TokenRule(LarkRule, ABC): + + _children: Tuple[LarkToken] + + @property + def token(self) -> LarkToken: + return self._children[0] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return self.token.serialize() + + +class IdentifierRule(TokenRule): + @property + def lark_name(self) -> str: + return "identifier" + + +class IntLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "int_lit" + + +class FloatLitRule(TokenRule): + @property + def lark_name(self) -> str: + return "float_lit" + + +class StringPartRule(TokenRule): + @property + def lark_name(self) -> str: + return "string" + + +class BinaryOperatorRule(TokenRule): + @property + def lark_name(self) -> str: + return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py new file mode 100644 index 00000000..0f53c55a --- /dev/null +++ b/hcl2/rule_transformer/rules/strings.py @@ -0,0 +1,50 @@ +from typing import Tuple, Optional, List, Any, Union + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule +from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.tokens import ( + INTERP_START_TOKEN, + RBRACE_TOKEN, + DBLQUOTE_TOKEN, + STRING_CHARS_TOKEN, +) +from hcl2.rule_transformer.utils import SerializationOptions + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + + @property + def lark_name(self) -> str: + return "string" + + @property + def string_parts(self): + return self.children[1:-1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class InterpolationRule(LarkRule): + + _children: Tuple[ + INTERP_START_TOKEN, + Expression, + RBRACE_TOKEN, + ] + + @property + def lark_name(self) -> str: + return "interpolation" + + @property + def expression(self): + return self.children[1] + + def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + return "${" + self.expression.serialize(options) + "}" diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py index 174e2510..66d780b3 100644 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ b/hcl2/rule_transformer/rules/token_sequence.py @@ -1,60 +1,56 @@ -from abc import ABC -from typing import Tuple, Any, List, Optional, Type - -from lark.tree import Meta - -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions - - -class TokenSequenceRule(LarkRule, ABC): - - _children: Tuple[TokenSequence] - - def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): - children = [TokenSequence(children)] - super().__init__(children, meta) - - def serialized_type(self) -> Type: - return str - - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.serialized_type()(self._children[0].serialize(options)) - - -class IdentifierRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "identifier" - - -class IntLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "int_lit" - - def serialized_type(self) -> Type: - return int - - -class FloatLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "float_lit" - - def serialized_type(self) -> Type: - return float - - -class StringLitRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; - # nevertheless, try to change it to a rule in hcl2.lark - return "STRING_LIT" - - -class BinaryOperatorRule(TokenSequenceRule): - @staticmethod - def rule_name() -> str: - return "binary_operator" +# from abc import ABC +# from typing import Tuple, Any, List, Optional, Type +# +# from lark.tree import Meta +# +# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken +# from hcl2.rule_transformer.utils import SerializationOptions +# +# +# class TokenSequenceRule(LarkRule, ABC): +# +# _children: Tuple[TokenSequence] +# +# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): +# children = [TokenSequence(children)] +# super().__init__(children, meta) +# +# def serialized_type(self) -> Type: +# return str +# +# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: +# return self.serialized_type()(self._children[0].serialize(options)) +# +# +# class IdentifierRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "identifier" +# +# +# class IntLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "int_lit" +# +# def serialized_type(self) -> Type: +# return int +# +# +# class FloatLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# return "float_lit" +# +# def serialized_type(self) -> Type: +# return float +# +# +# class StringLitRule(TokenSequenceRule): +# @staticmethod +# def lark_name() -> str: +# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; +# # nevertheless, try to change it to a rule in the grammar +# return "STRING_LIT" +# +# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py new file mode 100644 index 00000000..18e1ac07 --- /dev/null +++ b/hcl2/rule_transformer/rules/tokens.py @@ -0,0 +1,66 @@ +from typing import Callable, Any + +from hcl2.rule_transformer.rules.abstract import LarkToken + + +class StringToken(LarkToken): + def __init__(self, name: str, value: Any): + super().__init__(value) + self._name = name + + @property + def lark_name(self) -> str: + return self._name + + @property + def serialize_conversion(self) -> Callable: + return str + + +# explicitly define various kinds of string-based tokens +STRING_CHARS_TOKEN = StringToken +ESCAPED_INTERPOLATION_TOKEN = StringToken +BINARY_OP_TOKEN = StringToken +EQ_TOKEN = StringToken +COLON_TOKEN = StringToken +LPAR_TOKEN = StringToken # ( +RPAR_TOKEN = StringToken # ) +LBRACE_TOKEN = StringToken # { +RBRACE_TOKEN = StringToken # } +DOT_TOKEN = StringToken +COMMA_TOKEN = StringToken +QMARK_TOKEN = StringToken +LSQB_TOKEN = StringToken # [ +RSQB_TOKEN = StringToken # ] +INTERP_START_TOKEN = StringToken # ${ +DBLQUOTE_TOKEN = StringToken # " + + +class IdentifierToken(LarkToken): + @property + def lark_name(self) -> str: + return "IDENTIFIER" + + @property + def serialize_conversion(self) -> Callable: + return str + + +class IntToken(LarkToken): + @property + def lark_name(self) -> str: + return "INT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return int + + +class FloatToken(LarkToken): + @property + def lark_name(self) -> str: + return "FLOAT_LITERAL" + + @property + def serialize_conversion(self) -> Callable: + return float diff --git a/hcl2/rule_transformer/rules/tree.py b/hcl2/rule_transformer/rules/tree.py new file mode 100644 index 00000000..e39d2077 --- /dev/null +++ b/hcl2/rule_transformer/rules/tree.py @@ -0,0 +1,106 @@ +from abc import ABC, abstractmethod +from typing import List, Optional, Any, Union + + +class LarkNode(ABC): + """Base class for all nodes in the tree""" + + def __init__(self, index: int = -1, parent: Optional["Node"] = None): + self._index = index + self._parent = parent + + @property + def parent(self) -> Optional["Node"]: + return self._parent + + @property + def index(self) -> int: + return self._index + + def set_parent(self, parent: "Node"): + self._parent = parent + + def set_index(self, index: int): + self._index = index + + @abstractmethod + def serialize(self, options=None) -> Any: + pass + + @abstractmethod + def to_lark(self) -> Any: + """Convert back to Lark representation""" + pass + + def is_leaf(self) -> bool: + """Check if this is a leaf node (atomic token)""" + return isinstance(self, LeafNode) + + def is_sequence(self) -> bool: + """Check if this is a token sequence node""" + return isinstance(self, SequenceNode) + + def is_internal(self) -> bool: + """Check if this is an internal node (grammar rule)""" + return isinstance(self, InternalNode) + + def is_atomic(self) -> bool: + """Check if this represents an atomic value (leaf or sequence)""" + return self.is_leaf() or self.is_sequence() + + +class LarkLeaf(Node, ABC): + """""" + + def __init__(self, value: Any, index: int = -1, parent: Optional[TreeNode] = None): + super().__init__(index, parent) + self._value = value + + @property + def value(self) -> Any: + return self._value + + def serialize(self, options=None) -> Any: + return self._value + + +class InternalNode(Node): + def __init__( + self, children: List[Node], index: int = -1, parent: Optional[Node] = None + ): + super().__init__(index, parent) + self._children = children or [] + + # Set parent and index for all children + for i, child in enumerate(self._children): + if child is not None: + child.set_parent(self) + child.set_index(i) + + @property + def children(self) -> List[Node]: + return self._children + + def add_child(self, child: Node): + """Add a child to this internal node""" + child.set_parent(self) + child.set_index(len(self._children)) + self._children.append(child) + + def remove_child(self, index: int) -> Optional[Node]: + """Remove child at given index""" + if 0 <= index < len(self._children): + child = self._children.pop(index) + if child: + child.set_parent(None) + # Update indices for remaining children + for i in range(index, len(self._children)): + if self._children[i]: + self._children[i].set_index(i) + return child + return None + + @abstractmethod + def rule_name(self) -> str: + """The name of the grammar rule this represents""" + pass diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 96fe7c91..65d5dd9c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -1,19 +1,19 @@ -from typing import Optional, List, Any +from abc import ABC +from typing import Optional, List, Any, Tuple -from hcl2.rule_transformer.rules.abstract import TokenSequence, LarkToken, LarkRule +from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule +from hcl2.rule_transformer.rules.literal_rules import TokenRule from hcl2.rule_transformer.utils import SerializationOptions -class NewLineOrCommentRule(LarkRule): - - _children: List[LarkToken] - - @staticmethod - def rule_name() -> str: +class NewLineOrCommentRule(TokenRule): + @property + def lark_name(self) -> str: return "new_line_or_comment" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return TokenSequence(self._children).serialize(options) + @classmethod + def from_string(cls, string: str) -> "NewLineOrCommentRule": + return cls([LarkToken("NL_OR_COMMENT", string)]) def to_list( self, options: SerializationOptions = SerializationOptions() @@ -40,3 +40,29 @@ def to_list( result.append(comment.strip()) return result + + +class InlineCommentMixIn(LarkRule, ABC): + def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + for index in indexes: + try: + child = children[index] + except IndexError: + children.insert(index, None) + else: + if not isinstance(child, NewLineOrCommentRule): + children.insert(index, None) + + def inline_comments(self): + result = [] + for child in self._children: + + if isinstance(child, NewLineOrCommentRule): + comments = child.to_list() + if comments is not None: + result.extend(comments) + + elif isinstance(child, InlineCommentMixIn): + result.extend(child.inline_comments()) + + return result diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1c7d6157..31e88d61 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,30 +1,45 @@ # pylint: disable=missing-function-docstring,unused-argument from typing import List, Union -from lark import Transformer, Tree, Token +from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta -from lark.visitors import _Leaf_T, Discard, v_args -from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.base import ( StartRule, BodyRule, BlockRule, AttributeRule, ) +from hcl2.rule_transformer.rules.containers import ( + ObjectRule, + ObjectElemRule, + ObjectElemKeyRule, +) from hcl2.rule_transformer.rules.expression import ( BinaryTermRule, - ConditionalRule, - ExprTermRule, - BinaryOpRule, UnaryOpRule, + BinaryOpRule, + ExprTermRule, + ConditionalRule, ) -from hcl2.rule_transformer.rules.token_sequence import ( - IdentifierRule, - IntLitRule, +from hcl2.rule_transformer.rules.indexing import ( + IndexExprTermRule, + SqbIndex, + ShortIndexRule, +) +from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, - StringLitRule, + IntLitRule, + IdentifierRule, BinaryOperatorRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule +from hcl2.rule_transformer.rules.tokens import ( + IdentifierToken, + StringToken, + IntToken, + FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -36,18 +51,24 @@ class RuleTransformer(Transformer): with_meta: bool - @staticmethod - def is_type_keyword(value: str) -> bool: - return value in {"bool", "number", "string"} + def transform(self, tree: Tree) -> StartRule: + return super().transform(tree) - def __init__(self): + def __init__(self, discard_new_line_or_comments: bool = False): super().__init__() + self.discard_new_line_or_comments = discard_new_line_or_comments - def transform(self, tree: Tree[_Leaf_T]) -> LarkRule: - return super().transform(tree) + def __default_token__(self, token: Token) -> StringToken: + return StringToken(token.type, token.value) + + def IDENTIFIER(self, token: Token) -> IdentifierToken: + return IdentifierToken(token.value) - def __default_token__(self, token: Token) -> LarkToken: - return LarkToken(token.type, token.value) + def INT_LITERAL(self, token: Token) -> IntToken: + return IntToken(token.value) + + def FLOAT_LITERAL(self, token: Token) -> FloatToken: + return FloatToken(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -61,6 +82,16 @@ def body(self, meta: Meta, args) -> BodyRule: def block(self, meta: Meta, args) -> BlockRule: return BlockRule(args, meta) + @v_args(meta=True) + def attribute(self, meta: Meta, args) -> AttributeRule: + return AttributeRule(args, meta) + + @v_args(meta=True) + def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: + if self.discard_new_line_or_comments: + return Discard + return NewLineOrCommentRule(args, meta) + @v_args(meta=True) def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) @@ -74,8 +105,16 @@ def float_lit(self, meta: Meta, args) -> FloatLitRule: return FloatLitRule(args, meta) @v_args(meta=True) - def string_lit(self, meta: Meta, args) -> StringLitRule: - return StringLitRule(args, meta) + def string(self, meta: Meta, args) -> StringRule: + return StringRule(args, meta) + + @v_args(meta=True) + def string_part(self, meta: Meta, args) -> StringPartRule: + return StringPartRule(args, meta) + + @v_args(meta=True) + def interpolation(self, meta: Meta, args) -> InterpolationRule: + return InterpolationRule(args, meta) @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: @@ -102,9 +141,25 @@ def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) @v_args(meta=True) - def attribute(self, meta: Meta, args) -> AttributeRule: - return AttributeRule(args, meta) + def object(self, meta: Meta, args) -> ObjectRule: + return ObjectRule(args, meta) @v_args(meta=True) - def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: - return NewLineOrCommentRule(args, meta) + def object_elem(self, meta: Meta, args) -> ObjectElemRule: + return ObjectElemRule(args, meta) + + @v_args(meta=True) + def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: + return ObjectElemKeyRule(args, meta) + + @v_args(meta=True) + def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: + return IndexExprTermRule(args, meta) + + @v_args(meta=True) + def braces_index(self, meta: Meta, args) -> SqbIndex: + return SqbIndex(args, meta) + + @v_args(meta=True) + def short_index(self, meta: Meta, args) -> ShortIndexRule: + return ShortIndexRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index e083d628..6a6ed661 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,11 +1,15 @@ -from dataclasses import dataclass +from dataclasses import dataclass, replace @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - + unwrap_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationOptions": + return replace(self, **kwargs) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From 5a10fece33cf401c4e2b23a1655e983c3c708e55 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 23 Jul 2025 11:48:44 +0200 Subject: [PATCH 06/11] batch of changes --- hcl2/parser.py | 2 +- hcl2/reconstructor.py | 7 +- hcl2/rule_transformer/deserializer.py | 31 +++ hcl2/rule_transformer/hcl2.lark | 25 +- hcl2/rule_transformer/rules/abstract.py | 36 ++- hcl2/rule_transformer/rules/base.py | 61 +++-- hcl2/rule_transformer/rules/containers.py | 165 ++++++++++++-- .../rules/{expression.py => expressions.py} | 135 ++++++----- hcl2/rule_transformer/rules/functions.py | 104 +++++++++ hcl2/rule_transformer/rules/indexing.py | 215 ++++++++++++++++-- hcl2/rule_transformer/rules/literal_rules.py | 34 +-- hcl2/rule_transformer/rules/strings.py | 42 ++-- hcl2/rule_transformer/rules/token_sequence.py | 56 ----- hcl2/rule_transformer/rules/tokens.py | 99 ++++---- hcl2/rule_transformer/rules/whitespace.py | 4 +- hcl2/rule_transformer/transformer.py | 90 ++++++-- hcl2/rule_transformer/utils.py | 41 +++- 17 files changed, 835 insertions(+), 312 deletions(-) create mode 100644 hcl2/rule_transformer/deserializer.py rename hcl2/rule_transformer/rules/{expression.py => expressions.py} (53%) create mode 100644 hcl2/rule_transformer/rules/functions.py delete mode 100644 hcl2/rule_transformer/rules/token_sequence.py diff --git a/hcl2/parser.py b/hcl2/parser.py index a0c87e34..3e524736 100644 --- a/hcl2/parser.py +++ b/hcl2/parser.py @@ -12,7 +12,7 @@ def parser() -> Lark: """Build standard parser for transforming HCL2 text into python structures""" return Lark.open( - "rule_transformer/hcl2.lark.lark", + "rule_transformer/hcl2.lark", parser="lalr", cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar rel_to=__file__, diff --git a/hcl2/reconstructor.py b/hcl2/reconstructor.py index 7f957d7b..555edcf6 100644 --- a/hcl2/reconstructor.py +++ b/hcl2/reconstructor.py @@ -167,12 +167,17 @@ def _should_add_space(self, rule, current_terminal, is_block_label: bool = False if self._is_equals_sign(current_terminal): return True + if is_block_label: + pass + # print(rule, self._last_rule, current_terminal, self._last_terminal) + if is_block_label and isinstance(rule, Token) and rule.value == "string": if ( current_terminal == self._last_terminal == Terminal("DBLQUOTE") or current_terminal == Terminal("DBLQUOTE") - and self._last_terminal == Terminal("NAME") + and self._last_terminal == Terminal("IDENTIFIER") ): + # print("true") return True # if we're in a ternary or binary operator, add space around the operator diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py new file mode 100644 index 00000000..5bdcf775 --- /dev/null +++ b/hcl2/rule_transformer/deserializer.py @@ -0,0 +1,31 @@ +import json +from typing import Any, TextIO, List + +from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.utils import DeserializationOptions + + +class Deserializer: + def __init__(self, options=DeserializationOptions()): + self.options = options + + def load_python(self, value: Any) -> LarkElement: + pass + + def loads(self, value: str) -> LarkElement: + return self.load_python(json.loads(value)) + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + def _deserialize(self, value: Any) -> LarkElement: + pass + + def _deserialize_dict(self, value: dict) -> LarkRule: + pass + + def _deserialize_list(self, value: List) -> LarkRule: + pass + + def _deserialize_expression(self, value: str) -> LarkRule: + pass diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index a7722118..3f8d913e 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -11,11 +11,9 @@ IN : "in" FOR : "for" FOR_EACH : "for_each" -// Identifiers and Names -NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ -IDENTIFIER: NAME | IN | FOR | IF | FOR_EACH // Literals +NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/ ESCAPED_INTERPOLATION.2: /\$\$\{[^}]*\}/ STRING_CHARS.1: /(?:(?!\$\$\{)(?!\$\{)[^"\\]|\\.|(?:\$(?!\$?\{)))+/ DECIMAL : "0".."9" @@ -91,7 +89,8 @@ block : identifier (identifier | string)* new_line_or_comment? LBRACE body RBRAC new_line_or_comment: ( NL_OR_COMMENT )+ // Basic literals and identifiers -identifier: IDENTIFIER +identifier : NAME +keyword: IN | FOR | IF | FOR_EACH int_lit: INT_LITERAL float_lit: FLOAT_LITERAL string: DBLQUOTE string_part* DBLQUOTE @@ -118,21 +117,20 @@ expr_term : LPAR new_line_or_comment? expression new_line_or_comment? RPAR | string | tuple | object - | function_call - | index_expr_term - | get_attr_expr_term | identifier - | provider_function_call + | function_call | heredoc_template | heredoc_template_trim + | index_expr_term + | get_attr_expr_term | attr_splat_expr_term | full_splat_expr_term | for_tuple_expr | for_object_expr // Collections -tuple : LSQB (new_line_or_comment* expression new_line_or_comment* COMMA)* (new_line_or_comment* expression)? new_line_or_comment* RSQB -object : LBRACE new_line_or_comment? (new_line_or_comment* (object_elem | (object_elem COMMA)) new_line_or_comment*)* RBRACE +tuple : LSQB new_line_or_comment? (expression new_line_or_comment? COMMA new_line_or_comment?)* (expression new_line_or_comment? COMMA? new_line_or_comment?)? RSQB +object : LBRACE new_line_or_comment? ((object_elem | (object_elem new_line_or_comment? COMMA)) new_line_or_comment?)* RBRACE object_elem : object_elem_key ( EQ | COLON ) expression object_elem_key : float_lit | int_lit | identifier | string | object_elem_key_dot_accessor | object_elem_key_expression object_elem_key_expression : LPAR expression RPAR @@ -143,9 +141,8 @@ heredoc_template : HEREDOC_TEMPLATE heredoc_template_trim : HEREDOC_TEMPLATE_TRIM // Functions -function_call : identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR -arguments : (expression (new_line_or_comment* COMMA new_line_or_comment* expression)* (COMMA | ELLIPSIS)? new_line_or_comment*) -provider_function_call: identifier COLONS identifier COLONS identifier LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +function_call : identifier (COLONS identifier COLONS identifier)? LPAR new_line_or_comment? arguments? new_line_or_comment? RPAR +arguments : (expression (new_line_or_comment? COMMA new_line_or_comment? expression)* (COMMA | ELLIPSIS)? new_line_or_comment?) // Indexing and attribute access index_expr_term : expr_term index @@ -156,7 +153,7 @@ full_splat_expr_term : expr_term full_splat braces_index : LSQB new_line_or_comment? expression new_line_or_comment? RSQB short_index : DOT INT_LITERAL get_attr : DOT identifier -attr_splat : ATTR_SPLAT get_attr* +attr_splat : ATTR_SPLAT (get_attr | index)* full_splat : FULL_SPLAT_START (get_attr | index)* // For expressions diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index d3a3b634..e32d9ddb 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -2,15 +2,16 @@ from typing import Any, Union, List, Optional, Tuple, Callable from lark import Token, Tree +from lark.exceptions import VisitError from lark.tree import Meta -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class LarkElement(ABC): - @property + @staticmethod @abstractmethod - def lark_name(self) -> str: + def lark_name() -> str: raise NotImplementedError() def __init__(self, index: int = -1, parent: "LarkElement" = None): @@ -28,7 +29,9 @@ def to_lark(self) -> Any: raise NotImplementedError() @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @@ -37,11 +40,6 @@ def __init__(self, value: Union[str, int]): self._value = value super().__init__() - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @property @abstractmethod def serialize_conversion(self) -> Callable: @@ -51,27 +49,26 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value - def serialize(self, options: SerializationOptions = SerializationOptions()): + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.serialize_conversion(self.value) def to_lark(self) -> Token: - return Token(self.lark_name, self.value) + return Token(self.lark_name(), self.value) def __str__(self) -> str: return str(self._value) def __repr__(self) -> str: - return f"" + return f"" class LarkRule(LarkElement, ABC): - @property - @abstractmethod - def lark_name(self) -> str: - raise NotImplementedError() - @abstractmethod - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: raise NotImplementedError() @property @@ -94,7 +91,7 @@ def to_lark(self) -> Tree: result_children.append(child.to_lark()) - return Tree(self.lark_name, result_children, meta=self._meta) + return Tree(self.lark_name(), result_children, meta=self._meta) def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() @@ -103,6 +100,7 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: + print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 6d0c4924..da74954b 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,34 +3,37 @@ from lark.tree import Meta +from hcl2.dict_transformer import START_LINE, END_LINE from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expression import Expression -from hcl2.rule_transformer.rules.tokens import IdentifierToken, EQ_TOKEN +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class AttributeRule(LarkRule): _children: Tuple[ - IdentifierToken, - EQ_TOKEN, - Expression, + NAME, + EQ, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "attribute" @property - def identifier(self) -> IdentifierToken: + def identifier(self) -> NAME: return self._children[0] @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return {self.identifier.serialize(options): self.expression.serialize(options)} @@ -44,11 +47,13 @@ class BodyRule(LarkRule): ] ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "body" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: blocks: List[BlockRule] = [] attributes: List[AttributeRule] = [] comments = [] @@ -99,11 +104,13 @@ class StartRule(LarkRule): def body(self) -> BodyRule: return self._children[0] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "start" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.body.serialize(options) @@ -118,12 +125,12 @@ def __init__(self, children, meta: Optional[Meta] = None): child for child in children if not isinstance(child, LarkToken) ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "block" @property - def labels(self) -> List[IdentifierToken]: + def labels(self) -> List[NAME]: return list(filter(lambda label: label is not None, self._labels)) @property @@ -131,10 +138,18 @@ def body(self) -> BodyRule: return self._body def serialize( - self, options: SerializationOptions = SerializationOptions() - ) -> BodyRule: + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: result = self._body.serialize(options) labels = self._labels - for label in reversed(labels): + for label in reversed(labels[1:]): result = {label.serialize(options): result} + + result.update( + { + START_LINE: self._meta.line, + END_LINE: self._meta.end_line, + } + ) + return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index c39f3ba2..11ac0f5e 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,7 +1,8 @@ +import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, IntLitRule, @@ -9,36 +10,135 @@ ) from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import ( - COLON_TOKEN, - EQ_TOKEN, - LBRACE_TOKEN, - COMMA_TOKEN, - RBRACE_TOKEN, + COLON, + EQ, + LBRACE, + COMMA, + RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class TupleRule(InlineCommentMixIn): + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + Tuple[ + ExpressionRule, + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ... + ], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "tuple" + + @property + def elements(self) -> List[ExpressionRule]: + return [ + child for child in self.children[1:-1] if isinstance(child, ExpressionRule) + ] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_tuples: + return [element.serialize(options, context) for element in self.elements] + + with context.modify(inside_dollar_string=True): + result = f"[{", ".join( + str(element.serialize(options, context)) for element in self.elements + )}]" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result class ObjectElemKeyRule(LarkRule): - _children: Tuple[Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule]] + + key_T = Union[FloatLitRule, IntLitRule, IdentifierRule, StringRule] + + _children: Tuple[key_T] @staticmethod def lark_name() -> str: return "object_elem_key" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return self.children[0].serialize(options) + @property + def value(self) -> key_T: + return self._children[0] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + return self.value.serialize(options, context) + + +class ObjectElemKeyExpressionRule(LarkRule): + + _children: Tuple[ + LPAR, + ExpressionRule, + RPAR, + ] + + + @staticmethod + def lark_name() -> str: + return "object_elem_key_expression" + + @property + def expression(self) -> ExpressionRule: + return self._children[1] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + with context.modify(inside_dollar_string=True): + result = f"({self.expression.serialize(options, context)})" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ObjectElemKeyDotAccessor(LarkRule): + + _children: Tuple[ + IdentifierRule, + Tuple[ + IdentifierRule, + DOT, + ... + ] + ] + + @staticmethod + def lark_name() -> str: + return "object_elem_key_dot_accessor" + + @property + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) class ObjectElemRule(LarkRule): _children: Tuple[ ObjectElemKeyRule, - Union[EQ_TOKEN, COLON_TOKEN], - Expression, + Union[EQ, COLON], + ExpressionRule, ] @staticmethod @@ -47,25 +147,31 @@ def lark_name() -> str: @property def key(self) -> ObjectElemKeyRule: - return self.children[0] + return self._children[0] @property def expression(self): - return self.children[2] + return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: return { - self.children[0].serialize(options): self.children[2].serialize(options) + self.key.serialize(options, context): self.expression.serialize(options, context) } class ObjectRule(InlineCommentMixIn): _children: Tuple[ - LBRACE_TOKEN, + LBRACE, Optional[NewLineOrCommentRule], - Tuple[Union[ObjectElemRule, Optional[COMMA_TOKEN], NewLineOrCommentRule], ...], - RBRACE_TOKEN, + Tuple[ + ObjectElemRule, + Optional[NewLineOrCommentRule], + Optional[COMMA], + Optional[NewLineOrCommentRule], + ... + ], + RBRACE, ] @staticmethod @@ -78,8 +184,21 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - result = {} - for element in self.elements: - result.update(element.serialize()) + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + if not options.wrap_objects: + result = {} + for element in self.elements: + result.update(element.serialize(options, context)) + + return result + + with context.modify(inside_dollar_string=True): + result = f"{{{", ".join( + f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" + for element in self.elements + )}}}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/expression.py b/hcl2/rule_transformer/rules/expressions.py similarity index 53% rename from hcl2/rule_transformer/rules/expression.py rename to hcl2/rule_transformer/rules/expressions.py index 8a03f813..d89f3b3c 100644 --- a/hcl2/rule_transformer/rules/expression.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -8,7 +8,7 @@ LarkToken, ) from hcl2.rule_transformer.rules.literal_rules import BinaryOperatorRule -from hcl2.rule_transformer.rules.tokens import LPAR_TOKEN, RPAR_TOKEN, QMARK_TOKEN, COLON_TOKEN +from hcl2.rule_transformer.rules.tokens import LPAR, RPAR, QMARK, COLON from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, @@ -18,46 +18,46 @@ to_dollar_string, unwrap_dollar_string, SerializationOptions, + SerializationContext, ) -class Expression(InlineCommentMixIn, ABC): - @property - def lark_name(self) -> str: +class ExpressionRule(InlineCommentMixIn, ABC): + @staticmethod + def lark_name() -> str: return "expression" def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class ExprTermRule(Expression): +class ExprTermRule(ExpressionRule): type_ = Tuple[ - Optional[LPAR_TOKEN], + Optional[LPAR], Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - Optional[RPAR_TOKEN], + Optional[RPAR], ] _children: type_ - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "expr_term" def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = False if ( isinstance(children[0], LarkToken) - and children[0].lark_name == "LPAR" + and children[0].lark_name() == "LPAR" and isinstance(children[-1], LarkToken) - and children[-1].lark_name == "RPAR" + and children[-1].lark_name() == "RPAR" ): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) super().__init__(children, meta) @@ -66,35 +66,37 @@ def parentheses(self) -> bool: return self._parentheses @property - def expression(self) -> Expression: + def expression(self) -> ExpressionRule: return self._children[2] - def serialize(self , unwrap: bool = False, options: SerializationOptions = SerializationOptions()) -> Any: - result = self.expression.serialize(options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = self.expression.serialize(options, context) + if self.parentheses: result = wrap_into_parentheses(result) - result = to_dollar_string(result) - - if options.unwrap_dollar_string: - result = unwrap_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class ConditionalRule(Expression): +class ConditionalRule(ExpressionRule): _children: Tuple[ - Expression, - QMARK_TOKEN, + ExpressionRule, + QMARK, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, Optional[NewLineOrCommentRule], - COLON_TOKEN, + COLON, Optional[NewLineOrCommentRule], - Expression, + ExpressionRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): @@ -102,25 +104,34 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @property - def condition(self) -> Expression: + def condition(self) -> ExpressionRule: return self._children[0] @property - def if_true(self) -> Expression: + def if_true(self) -> ExpressionRule: return self._children[3] @property - def if_false(self) -> Expression: + def if_false(self) -> ExpressionRule: return self._children[7] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - options = options.replace(unwrap_dollar_string=True) - print(self.condition) - result = f"{self.condition.serialize(options)} ? {self.if_true.serialize(options)} : {self.if_false.serialize(options)}" - return to_dollar_string(result) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=False): + result = ( + f"{self.condition.serialize(options, context)} " + f"? {self.if_true.serialize(options, context)} " + f": {self.if_false.serialize(options, context)}" + ) + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class BinaryTermRule(Expression): + +class BinaryTermRule(ExpressionRule): _children: Tuple[ BinaryOperatorRule, @@ -128,8 +139,8 @@ class BinaryTermRule(Expression): ExprTermRule, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): @@ -144,19 +155,21 @@ def binary_operator(self) -> BinaryOperatorRule: def expr_term(self) -> ExprTermRule: return self._children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return f"{self.binary_operator.serialize(options)} {self.expr_term.serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f"{self.binary_operator.serialize(options, context)} {self.expr_term.serialize(options, context)}" -class BinaryOpRule(Expression): +class BinaryOpRule(ExpressionRule): _children: Tuple[ ExprTermRule, BinaryTermRule, Optional[NewLineOrCommentRule], ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_op" @property @@ -167,24 +180,28 @@ def expr_term(self) -> ExprTermRule: def binary_term(self) -> BinaryTermRule: return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - children_options = options.replace(unwrap_dollar_string=True) - lhs = self.expr_term.serialize(children_options) - operator = self.binary_term.binary_operator.serialize(children_options) - rhs = self.binary_term.expr_term.serialize(children_options) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + with context.modify(inside_dollar_string=True): + lhs = self.expr_term.serialize(options, context) + operator = self.binary_term.binary_operator.serialize(options, context) + rhs = self.binary_term.expr_term.serialize(options, context) result = f"{lhs} {operator} {rhs}" - if options.unwrap_dollar_string: - return result - return to_dollar_string(result) + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result -class UnaryOpRule(Expression): + +class UnaryOpRule(ExpressionRule): _children: Tuple[LarkToken, ExprTermRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "unary_op" @property @@ -195,5 +212,9 @@ def operator(self) -> str: def expr_term(self): return self._children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string(f"{self.operator}{self.expr_term.serialize(options)}") + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string( + f"{self.operator}{self.expr_term.serialize(options, context)}" + ) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py new file mode 100644 index 00000000..412a1667 --- /dev/null +++ b/hcl2/rule_transformer/rules/functions.py @@ -0,0 +1,104 @@ +from functools import lru_cache +from typing import Any, Optional, Tuple, Union, List + +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR +from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string + + +class ArgumentsRule(InlineCommentMixIn): + + _children: Tuple[ + ExpressionRule, + Tuple[ + Optional[NewLineOrCommentRule], + COMMA, + Optional[NewLineOrCommentRule], + ExpressionRule, + ... + ], + Optional[Union[COMMA, ELLIPSIS]], + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "arguments" + + @property + @lru_cache(maxsize=None) + def has_ellipsis(self) -> bool: + for child in self._children[-2:]: + if isinstance(child, StringToken) and child.lark_name() == "ELLIPSIS": + return True + return False + + @property + def arguments(self) -> List[ExpressionRule]: + return [child for child in self._children if isinstance(child, ExpressionRule)] + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + if self.has_ellipsis: + result += " ..." + return result + + +class FunctionCallRule(InlineCommentMixIn): + + _children: Tuple[ + IdentifierRule, + Optional[IdentifierRule], + Optional[IdentifierRule], + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "function_call" + + @property + @lru_cache(maxsize=None) + def identifiers(self) -> List[IdentifierRule]: + return [child for child in self._children if isinstance(child, IdentifierRule)] + + @property + @lru_cache(maxsize=None) + def arguments(self) -> Optional[ArgumentsRule]: + for child in self._children[2:6]: + if isinstance(child, ArgumentsRule): + return child + + + def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + result = ( + f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"({self.arguments.serialize(options, context) if self.arguments else ""})" + ) + if not context.inside_dollar_string: + result = to_dollar_string(result) + + return result + + +# class ProviderFunctionCallRule(FunctionCallRule): +# _children: Tuple[ +# IdentifierRule, +# IdentifierRule, +# IdentifierRule, +# LPAR, +# Optional[NewLineOrCommentRule], +# Optional[ArgumentsRule], +# Optional[NewLineOrCommentRule], +# RPAR, +# ] +# +# @staticmethod +# def lark_name() -> str: +# return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index ce23d040..7a9b53a5 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -1,59 +1,69 @@ -from typing import List, Optional, Tuple, Any +from typing import List, Optional, Tuple, Any, Union from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import ExprTermRule, Expression +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import ( - DOT_TOKEN, - IntToken, - LSQB_TOKEN, - RSQB_TOKEN, + DOT, + IntLiteral, + LSQB, + RSQB, + ATTR_SPLAT, ) from hcl2.rule_transformer.rules.whitespace import ( InlineCommentMixIn, NewLineOrCommentRule, ) -from hcl2.rule_transformer.utils import SerializationOptions, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + to_dollar_string, + SerializationContext, +) class ShortIndexRule(LarkRule): _children: Tuple[ - DOT_TOKEN, - IntToken, + DOT, + IntLiteral, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "short_index" @property def index(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f".{self.index.serialize(options)}" -class SqbIndex(InlineCommentMixIn): +class SqbIndexRule(InlineCommentMixIn): _children: Tuple[ - LSQB_TOKEN, + LSQB, Optional[NewLineOrCommentRule], ExprTermRule, Optional[NewLineOrCommentRule], - RSQB_TOKEN, + RSQB, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "braces_index" @property def index_expression(self): return self.children[2] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): @@ -61,15 +71,170 @@ def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) -class IndexExprTermRule(Expression): +class IndexExprTermRule(ExpressionRule): - _children: Tuple[ExprTermRule, SqbIndex] + _children: Tuple[ExprTermRule, SqbIndexRule] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "index_expr_term" - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return to_dollar_string( - f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.children[0].serialize(options)}{self.children[1].serialize(options)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class GetAttrRule(LarkRule): + + _children: Tuple[ + DOT, + IdentifierRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr" + + @property + def identifier(self) -> IdentifierRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return f".{self.identifier.serialize(options, context)}" + + +class GetAttrExprTermRule(ExpressionRule): + + _children: Tuple[ + ExprTermRule, + GetAttrRule, + ] + + @staticmethod + def lark_name() -> str: + return "get_attr_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def get_attr(self) -> GetAttrRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.get_attr.serialize(options, context)}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class AttrSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "attr_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".*" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs + ) + + +class AttrSplatExprTermRule(ExpressionRule): + + _children: Tuple[ExprTermRule, AttrSplatRule] + + @staticmethod + def lark_name() -> str: + return "attr_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> AttrSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class FullSplatRule(LarkRule): + _children: Tuple[ + ATTR_SPLAT, + Tuple[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]], ...], + ] + + @staticmethod + def lark_name() -> str: + return "full_splat" + + @property + def get_attrs( + self, + ) -> List[Union[GetAttrRule, Union[SqbIndexRule, ShortIndexRule]]]: + return self._children[1:] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return "[*]" + "".join( + get_attr.serialize(options, context) for get_attr in self.get_attrs ) + + +class FullSplatExprTermRule(ExpressionRule): + _children: Tuple[ExprTermRule, FullSplatRule] + + @staticmethod + def lark_name() -> str: + return "full_splat_expr_term" + + @property + def expr_term(self) -> ExprTermRule: + return self._children[0] + + @property + def attr_splat(self) -> FullSplatRule: + return self._children[1] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + with context.modify(inside_dollar_string=True): + result = f"{self.expr_term.serialize(options, context)}{self.attr_splat.serialize(options, context)}" + + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index 06ca99ae..db7e8289 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -2,7 +2,7 @@ from typing import Any, Tuple from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class TokenRule(LarkRule, ABC): @@ -13,35 +13,43 @@ class TokenRule(LarkRule, ABC): def token(self) -> LarkToken: return self._children[0] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.token.serialize() +class KeywordRule(TokenRule): + @staticmethod + def lark_name() -> str: + return "keyword" + + class IdentifierRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "identifier" class IntLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "int_lit" class FloatLitRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "float_lit" class StringPartRule(TokenRule): - @property - def lark_name(self) -> str: - return "string" + @staticmethod + def lark_name() -> str: + return "string_part" class BinaryOperatorRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "binary_operator" diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 0f53c55a..dc3b85b0 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -3,48 +3,56 @@ from lark.tree import Meta from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expression import Expression, ExprTermRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import StringPartRule from hcl2.rule_transformer.rules.tokens import ( - INTERP_START_TOKEN, - RBRACE_TOKEN, - DBLQUOTE_TOKEN, - STRING_CHARS_TOKEN, + INTERP_START, + RBRACE, + DBLQUOTE, + STRING_CHARS, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, ) -from hcl2.rule_transformer.utils import SerializationOptions class StringRule(LarkRule): - _children: Tuple[DBLQUOTE_TOKEN, List[StringPartRule], DBLQUOTE_TOKEN] + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "string" @property def string_parts(self): return self.children[1:-1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' class InterpolationRule(LarkRule): _children: Tuple[ - INTERP_START_TOKEN, - Expression, - RBRACE_TOKEN, + INTERP_START, + ExpressionRule, + RBRACE, ] - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "interpolation" @property def expression(self): return self.children[1] - def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: - return "${" + self.expression.serialize(options) + "}" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return to_dollar_string(self.expression.serialize(options)) diff --git a/hcl2/rule_transformer/rules/token_sequence.py b/hcl2/rule_transformer/rules/token_sequence.py deleted file mode 100644 index 66d780b3..00000000 --- a/hcl2/rule_transformer/rules/token_sequence.py +++ /dev/null @@ -1,56 +0,0 @@ -# from abc import ABC -# from typing import Tuple, Any, List, Optional, Type -# -# from lark.tree import Meta -# -# from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -# from hcl2.rule_transformer.utils import SerializationOptions -# -# -# class TokenSequenceRule(LarkRule, ABC): -# -# _children: Tuple[TokenSequence] -# -# def __init__(self, children: List[LarkToken], meta: Optional[Meta] = None): -# children = [TokenSequence(children)] -# super().__init__(children, meta) -# -# def serialized_type(self) -> Type: -# return str -# -# def serialize(self, options: SerializationOptions = SerializationOptions()) -> Any: -# return self.serialized_type()(self._children[0].serialize(options)) -# -# -# class IdentifierRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "identifier" -# -# -# class IntLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "int_lit" -# -# def serialized_type(self) -> Type: -# return int -# -# -# class FloatLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# return "float_lit" -# -# def serialized_type(self) -> Type: -# return float -# -# -# class StringLitRule(TokenSequenceRule): -# @staticmethod -# def lark_name() -> str: -# # TODO actually this is a terminal, but it doesn't matter for lark.Transformer class; -# # nevertheless, try to change it to a rule in the grammar -# return "STRING_LIT" -# -# diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 18e1ac07..7dd79f63 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,54 +1,67 @@ -from typing import Callable, Any +from functools import lru_cache +from typing import Callable, Any, Type from hcl2.rule_transformer.rules.abstract import LarkToken class StringToken(LarkToken): - def __init__(self, name: str, value: Any): + """ + Single run-time base class; every `StringToken["..."]` call returns a + cached subclass whose static `lark_name()` yields the given string. + """ + + @staticmethod + @lru_cache(maxsize=None) + def __build_subclass(name: str) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + return type( # type: ignore + f"{name}_TOKEN", + (StringToken,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + }, + ) + + def __class_getitem__(cls, name: str) -> Type["StringToken"]: + if not isinstance(name, str): + raise TypeError("StringToken[...] expects a single str argument") + return cls.__build_subclass(name) + + def __init__(self, value: Any) -> None: super().__init__(value) - self._name = name @property - def lark_name(self) -> str: - return self._name - - @property - def serialize_conversion(self) -> Callable: - return str - - -# explicitly define various kinds of string-based tokens -STRING_CHARS_TOKEN = StringToken -ESCAPED_INTERPOLATION_TOKEN = StringToken -BINARY_OP_TOKEN = StringToken -EQ_TOKEN = StringToken -COLON_TOKEN = StringToken -LPAR_TOKEN = StringToken # ( -RPAR_TOKEN = StringToken # ) -LBRACE_TOKEN = StringToken # { -RBRACE_TOKEN = StringToken # } -DOT_TOKEN = StringToken -COMMA_TOKEN = StringToken -QMARK_TOKEN = StringToken -LSQB_TOKEN = StringToken # [ -RSQB_TOKEN = StringToken # ] -INTERP_START_TOKEN = StringToken # ${ -DBLQUOTE_TOKEN = StringToken # " - - -class IdentifierToken(LarkToken): - @property - def lark_name(self) -> str: - return "IDENTIFIER" - - @property - def serialize_conversion(self) -> Callable: + def serialize_conversion(self) -> Callable[[Any], str]: return str -class IntToken(LarkToken): - @property - def lark_name(self) -> str: +# explicitly define various kinds of string-based tokens for type hinting +NAME = StringToken["NAME"] +STRING_CHARS = StringToken["STRING_CHARS"] +ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] +BINARY_OP = StringToken["BINARY_OP"] +EQ = StringToken["EQ"] +COLON = StringToken["COLON"] +LPAR = StringToken["LPAR"] +RPAR = StringToken["RPAR"] +LBRACE = StringToken["LBRACE"] +RBRACE = StringToken["RBRACE"] +DOT = StringToken["DOT"] +COMMA = StringToken["COMMA"] +ELLIPSIS = StringToken["ELLIPSIS"] +QMARK = StringToken["QMARK"] +LSQB = StringToken["LSQB"] +RSQB = StringToken["RSQB"] +INTERP_START = StringToken["INTERP_START"] +DBLQUOTE = StringToken["DBLQUOTE"] +ATTR_SPLAT = StringToken["ATTR_SPLAT"] +FULL_SPLAT = StringToken["FULL_SPLAT"] + + +class IntLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "INT_LITERAL" @property @@ -56,9 +69,9 @@ def serialize_conversion(self) -> Callable: return int -class FloatToken(LarkToken): - @property - def lark_name(self) -> str: +class FloatLiteral(LarkToken): + @staticmethod + def lark_name() -> str: return "FLOAT_LITERAL" @property diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index 65d5dd9c..fa24355c 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -7,8 +7,8 @@ class NewLineOrCommentRule(TokenRule): - @property - def lark_name(self) -> str: + @staticmethod + def lark_name() -> str: return "new_line_or_comment" @classmethod diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 31e88d61..41e970d6 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -14,18 +14,28 @@ ObjectRule, ObjectElemRule, ObjectElemKeyRule, + TupleRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, ) -from hcl2.rule_transformer.rules.expression import ( +from hcl2.rule_transformer.rules.expressions import ( BinaryTermRule, UnaryOpRule, BinaryOpRule, ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, - SqbIndex, + SqbIndexRule, ShortIndexRule, + GetAttrRule, + GetAttrExprTermRule, + AttrSplatExprTermRule, + AttrSplatRule, + FullSplatRule, + FullSplatExprTermRule, ) from hcl2.rule_transformer.rules.literal_rules import ( FloatLitRule, @@ -36,10 +46,10 @@ ) from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( - IdentifierToken, + NAME, + IntLiteral, + FloatLiteral, StringToken, - IntToken, - FloatToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -59,16 +69,16 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: - return StringToken(token.type, token.value) + return StringToken[token.type](token.value) - def IDENTIFIER(self, token: Token) -> IdentifierToken: - return IdentifierToken(token.value) + def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: + return FloatLiteral(token.value) - def INT_LITERAL(self, token: Token) -> IntToken: - return IntToken(token.value) + def NAME(self, token: Token) -> NAME: + return NAME(token.value) - def FLOAT_LITERAL(self, token: Token) -> FloatToken: - return FloatToken(token.value) + def INT_LITERAL(self, token: Token) -> IntLiteral: + return IntLiteral(token.value) @v_args(meta=True) def start(self, meta: Meta, args) -> StartRule: @@ -140,6 +150,10 @@ def unary_op(self, meta: Meta, args) -> UnaryOpRule: def binary_op(self, meta: Meta, args) -> BinaryOpRule: return BinaryOpRule(args, meta) + @v_args(meta=True) + def tuple(self, meta: Meta, args) -> TupleRule: + return TupleRule(args, meta) + @v_args(meta=True) def object(self, meta: Meta, args) -> ObjectRule: return ObjectRule(args, meta) @@ -152,14 +166,62 @@ def object_elem(self, meta: Meta, args) -> ObjectElemRule: def object_elem_key(self, meta: Meta, args) -> ObjectElemKeyRule: return ObjectElemKeyRule(args, meta) + @v_args(meta=True) + def object_elem_key_expression( + self, meta: Meta, args + ) -> ObjectElemKeyExpressionRule: + return ObjectElemKeyExpressionRule(args, meta) + + @v_args(meta=True) + def object_elem_key_dot_accessor( + self, meta: Meta, args + ) -> ObjectElemKeyDotAccessor: + return ObjectElemKeyDotAccessor(args, meta) + + @v_args(meta=True) + def arguments(self, meta: Meta, args) -> ArgumentsRule: + return ArgumentsRule(args, meta) + + @v_args(meta=True) + def function_call(self, meta: Meta, args) -> FunctionCallRule: + return FunctionCallRule(args, meta) + + # @v_args(meta=True) + # def provider_function_call(self, meta: Meta, args) -> ProviderFunctionCallRule: + # return ProviderFunctionCallRule(args, meta) + @v_args(meta=True) def index_expr_term(self, meta: Meta, args) -> IndexExprTermRule: return IndexExprTermRule(args, meta) @v_args(meta=True) - def braces_index(self, meta: Meta, args) -> SqbIndex: - return SqbIndex(args, meta) + def braces_index(self, meta: Meta, args) -> SqbIndexRule: + return SqbIndexRule(args, meta) @v_args(meta=True) def short_index(self, meta: Meta, args) -> ShortIndexRule: return ShortIndexRule(args, meta) + + @v_args(meta=True) + def get_attr(self, meta: Meta, args) -> GetAttrRule: + return GetAttrRule(args, meta) + + @v_args(meta=True) + def get_attr_expr_term(self, meta: Meta, args) -> GetAttrExprTermRule: + return GetAttrExprTermRule(args, meta) + + @v_args(meta=True) + def attr_splat(self, meta: Meta, args) -> AttrSplatRule: + return AttrSplatRule(args, meta) + + @v_args(meta=True) + def attr_splat_expr_term(self, meta: Meta, args) -> AttrSplatExprTermRule: + return AttrSplatExprTermRule(args, meta) + + @v_args(meta=True) + def full_splat(self, meta: Meta, args) -> FullSplatRule: + return FullSplatRule(args, meta) + + @v_args(meta=True) + def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: + return FullSplatExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 6a6ed661..8ffeab8b 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,15 +1,48 @@ +from contextlib import contextmanager from dataclasses import dataclass, replace +from typing import Generator @dataclass class SerializationOptions: with_comments: bool = True with_meta: bool = False - unwrap_dollar_string: bool = False - - def replace(self, **kwargs) -> "SerializationOptions": + wrap_objects: bool = False + wrap_tuples: bool = False + + +@dataclass +class DeserializationOptions: + pass + + +@dataclass +class SerializationContext: + inside_dollar_string: bool = False + + def replace(self, **kwargs) -> "SerializationContext": return replace(self, **kwargs) - + + @contextmanager + def copy(self, **kwargs) -> Generator["SerializationContext", None, None]: + """Context manager that yields a modified copy of the context""" + modified_context = self.replace(**kwargs) + yield modified_context + + @contextmanager + def modify(self, **kwargs): + original_values = {key: getattr(self, key) for key in kwargs} + + for key, value in kwargs.items(): + setattr(self, key, value) + + try: + yield + finally: + # Restore original values + for key, value in original_values.items(): + setattr(self, key, value) + def is_dollar_string(value: str) -> bool: if not isinstance(value, str): From f0f6fc995624fc19878cfa86743aa899c7344b6c Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Tue, 12 Aug 2025 14:48:52 +0200 Subject: [PATCH 07/11] add JSON -> LarkElement deserializer; batch of other changes --- hcl2/const.py | 1 + hcl2/rule_transformer/deserializer.py | 264 ++++++++++++++++++- hcl2/rule_transformer/rules/abstract.py | 3 +- hcl2/rule_transformer/rules/base.py | 23 +- hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/literal_rules.py | 6 - hcl2/rule_transformer/rules/strings.py | 57 ++-- hcl2/rule_transformer/rules/tokens.py | 72 +++-- hcl2/rule_transformer/transformer.py | 7 +- hcl2/rule_transformer/utils.py | 1 + 10 files changed, 365 insertions(+), 71 deletions(-) diff --git a/hcl2/const.py b/hcl2/const.py index 1d46f35a..1bd4a4ce 100644 --- a/hcl2/const.py +++ b/hcl2/const.py @@ -2,3 +2,4 @@ START_LINE_KEY = "__start_line__" END_LINE_KEY = "__end_line__" +IS_BLOCK = "__is_block__" diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 5bdcf775..7b834968 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,7 +1,54 @@ import json +from functools import lru_cache from typing import Any, TextIO, List +from regex import regex + +from hcl2 import parses +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkElement, LarkRule +from hcl2.rule_transformer.rules.base import ( + BlockRule, + AttributeRule, + BodyRule, + StartRule, +) +from hcl2.rule_transformer.rules.containers import ( + TupleRule, + ObjectRule, + ObjectElemRule, + ObjectElemKeyExpressionRule, + ObjectElemKeyDotAccessor, + ObjectElemKeyRule, +) +from hcl2.rule_transformer.rules.expressions import ExprTermRule +from hcl2.rule_transformer.rules.literal_rules import ( + IdentifierRule, + IntLitRule, + FloatLitRule, +) +from hcl2.rule_transformer.rules.strings import ( + StringRule, + InterpolationRule, + StringPartRule, +) +from hcl2.rule_transformer.rules.tokens import ( + NAME, + EQ, + DBLQUOTE, + STRING_CHARS, + ESCAPED_INTERPOLATION, + INTERP_START, + RBRACE, + IntLiteral, + FloatLiteral, + RSQB, + LSQB, + COMMA, + DOT, + LBRACE, +) +from hcl2.rule_transformer.transformer import RuleTransformer from hcl2.rule_transformer.utils import DeserializationOptions @@ -9,8 +56,13 @@ class Deserializer: def __init__(self, options=DeserializationOptions()): self.options = options + @property + @lru_cache + def _transformer(self) -> RuleTransformer: + return RuleTransformer() + def load_python(self, value: Any) -> LarkElement: - pass + return StartRule([self._deserialize(value)]) def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) @@ -19,13 +71,209 @@ def load(self, file: TextIO) -> LarkElement: return self.loads(file.read()) def _deserialize(self, value: Any) -> LarkElement: - pass + if isinstance(value, dict): + if self._contains_block_marker(value): + elements = self._deserialize_block_elements(value) + return BodyRule(elements) + + return self._deserialize_object(value) + + if isinstance(value, list): + return self._deserialize_list(value) + + return self._deserialize_text(value) + + def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: + children = [] + + for key, value in value.items(): + if self._is_block(value): + # this value is a list of blocks, iterate over each block and deserialize them + for block in value: + children.append(self._deserialize_block(key, block)) + else: + + # otherwise it's just an attribute + if key != IS_BLOCK: + children.append(self._deserialize_attribute(key, value)) + + return children + + def _deserialize_text(self, value) -> LarkRule: + try: + int_val = int(value) + return IntLitRule([IntLiteral(int_val)]) + except ValueError: + pass + + try: + float_val = float(value) + return FloatLitRule([FloatLiteral(float_val)]) + except ValueError: + pass + + if isinstance(value, str): + if value.startswith('"') and value.endswith('"'): + return self._deserialize_string(value) + + if self._is_expression(value): + return self._deserialize_expression(value) + + return self._deserialize_identifier(value) + + elif isinstance(value, bool): + return self._deserialize_identifier(str(value).lower()) + + return self._deserialize_identifier(str(value)) + + def _deserialize_identifier(self, value: str) -> IdentifierRule: + return IdentifierRule([NAME(value)]) + + def _deserialize_string(self, value: str) -> StringRule: + result = [] + + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] + # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + + for part in parts: + if part == '"': + continue + + if part.startswith('"'): + part = part[1:] + if part.endswith('"'): + part = part[:-1] + + e = self._deserialize_string_part(part) + result.append(e) + + return StringRule([DBLQUOTE(), *result, DBLQUOTE()]) + + def _deserialize_string_part(self, value: str) -> StringPartRule: + if value.startswith("$${") and value.endswith("}"): + return StringPartRule([ESCAPED_INTERPOLATION(value)]) + + if value.startswith("${") and value.endswith("}"): + return StringPartRule( + [ + InterpolationRule( + [INTERP_START(), self._deserialize_expression(value), RBRACE()] + ) + ] + ) + + return StringPartRule([STRING_CHARS(value)]) + + def _deserialize_expression(self, value: str) -> ExprTermRule: + """Deserialize an expression string into an ExprTermRule.""" + # instead of processing expression manually and trying to recognize what kind of expression it is, + # turn it into HCL2 code and parse it with lark: + + # unwrap from ${ and } + value = value[2:-1] + # create HCL2 snippet + value = f"temp = {value}" + # parse the above + parsed_tree = parses(value) + # transform parsed tree into LarkElement tree + rules_tree = self._transformer.transform(parsed_tree) + # extract expression from the tree + return rules_tree.body.children[0].expression + + def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: + """Deserialize a block by extracting labels and body""" + labels = [first_label] + body = value + + # Keep peeling off single-key layers until we hit the body (dict with IS_BLOCK) + while isinstance(body, dict) and not body.get(IS_BLOCK): + non_block_keys = [k for k in body.keys() if k != IS_BLOCK] + if len(non_block_keys) == 1: + # This is another label level + label = non_block_keys[0] + labels.append(label) + body = body[label] + else: + # Multiple keys = this is the body + break + + return BlockRule( + [*[self._deserialize(label) for label in labels], self._deserialize(body)] + ) + + def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + children = [ + self._deserialize_identifier(name), + EQ(), + ExprTermRule([self._deserialize(value)]), + ] + return AttributeRule(children) + + def _deserialize_list(self, value: List) -> TupleRule: + children = [] + for element in value: + deserialized = self._deserialize(element) + if not isinstance(deserialized, ExprTermRule): + # whatever an element of the list is, it has to be nested inside ExprTermRule + deserialized = ExprTermRule([deserialized]) + children.append(deserialized) + children.append(COMMA()) + + return TupleRule([LSQB(), *children, RSQB()]) + + def _deserialize_object(self, value: dict) -> ObjectRule: + children = [] + for key, value in value.items(): + children.append(self._deserialize_object_elem(key, value)) + return ObjectRule([LBRACE(), *children, RBRACE()]) + + def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: + if self._is_expression(key): + key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + elif "." in key: + parts = key.split(".") + children = [] + for part in parts: + children.append(self._deserialize_identifier(part)) + children.append(DOT()) + key = ObjectElemKeyDotAccessor(children[:-1]) # without the last comma + else: + key = self._deserialize_text(key) + + return ObjectElemRule( + [ + ObjectElemKeyRule([key]), + EQ(), + ExprTermRule([self._deserialize_text(value)]), + ] + ) + + def _is_expression(self, value: str) -> bool: + return value.startswith("${") and value.endswith("}") + + def _is_block(self, value: Any) -> bool: + """Simple check: if it's a list containing dicts with IS_BLOCK markers""" + if not isinstance(value, list) or len(value) == 0: + return False - def _deserialize_dict(self, value: dict) -> LarkRule: - pass + # Check if any item in the list has IS_BLOCK marker (directly or nested) + for item in value: + if isinstance(item, dict) and self._contains_block_marker(item): + return True - def _deserialize_list(self, value: List) -> LarkRule: - pass + return False - def _deserialize_expression(self, value: str) -> LarkRule: - pass + def _contains_block_marker(self, obj: dict) -> bool: + """Recursively check if a dict contains IS_BLOCK marker anywhere""" + if obj.get(IS_BLOCK): + return True + for value in obj.values(): + if isinstance(value, dict) and self._contains_block_marker(value): + return True + if isinstance(value, list): + for element in value: + if self._contains_block_marker(element): + return True + return False diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index e32d9ddb..33dcc9ca 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -36,7 +36,7 @@ def serialize( class LarkToken(LarkElement, ABC): - def __init__(self, value: Union[str, int]): + def __init__(self, value: Union[str, int, float]): self._value = value super().__init__() @@ -100,7 +100,6 @@ def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): for index, child in enumerate(children): if child is not None: - print(child) child.set_index(index) child.set_parent(self) diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index da74954b..5c8468d4 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -3,9 +3,11 @@ from lark.tree import Meta -from hcl2.dict_transformer import START_LINE, END_LINE +from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule from hcl2.rule_transformer.rules.tokens import NAME, EQ from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -42,7 +44,7 @@ class BodyRule(LarkRule): _children: List[ Union[ NewLineOrCommentRule, - # AttributeRule, + AttributeRule, "BlockRule", ] ] @@ -58,6 +60,7 @@ def serialize( attributes: List[AttributeRule] = [] comments = [] inline_comments = [] + for child in self._children: if isinstance(child, BlockRule): @@ -116,7 +119,11 @@ def serialize( class BlockRule(LarkRule): - _children: Tuple[BodyRule] + _children: Tuple[ + IdentifierRule, + Optional[Union[IdentifierRule, StringRule]], + BodyRule, + ] def __init__(self, children, meta: Optional[Meta] = None): super().__init__(children, meta) @@ -141,15 +148,11 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: result = self._body.serialize(options) + if options.explicit_blocks: + result.update({IS_BLOCK: True}) + labels = self._labels for label in reversed(labels[1:]): result = {label.serialize(options): result} - result.update( - { - START_LINE: self._meta.line, - END_LINE: self._meta.end_line, - } - ) - return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 412a1667..54958514 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -78,7 +78,7 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: result = ( - f"{":".join(identifier.serialize(options, context) for identifier in self.identifiers)}" + f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" f"({self.arguments.serialize(options, context) if self.arguments else ""})" ) if not context.inside_dollar_string: diff --git a/hcl2/rule_transformer/rules/literal_rules.py b/hcl2/rule_transformer/rules/literal_rules.py index db7e8289..baf8546f 100644 --- a/hcl2/rule_transformer/rules/literal_rules.py +++ b/hcl2/rule_transformer/rules/literal_rules.py @@ -43,12 +43,6 @@ def lark_name() -> str: return "float_lit" -class StringPartRule(TokenRule): - @staticmethod - def lark_name() -> str: - return "string_part" - - class BinaryOperatorRule(TokenRule): @staticmethod def lark_name() -> str: diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index dc3b85b0..769ad5b9 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,15 +1,13 @@ -from typing import Tuple, Optional, List, Any, Union - -from lark.tree import Meta +from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule -from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule -from hcl2.rule_transformer.rules.literal_rules import StringPartRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.tokens import ( INTERP_START, RBRACE, DBLQUOTE, STRING_CHARS, + ESCAPED_INTERPOLATION, ) from hcl2.rule_transformer.utils import ( SerializationOptions, @@ -18,41 +16,58 @@ ) -class StringRule(LarkRule): +class InterpolationRule(LarkRule): - _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] + _children: Tuple[ + INTERP_START, + ExpressionRule, + RBRACE, + ] @staticmethod def lark_name() -> str: - return "string" + return "interpolation" @property - def string_parts(self): - return self.children[1:-1] + def expression(self): + return self.children[1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + return to_dollar_string(self.expression.serialize(options)) -class InterpolationRule(LarkRule): +class StringPartRule(LarkRule): + _children: Tuple[Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]] - _children: Tuple[ - INTERP_START, - ExpressionRule, - RBRACE, - ] + @staticmethod + def lark_name() -> str: + return "string_part" + + @property + def content(self) -> Union[STRING_CHARS, ESCAPED_INTERPOLATION, InterpolationRule]: + return self._children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.content.serialize(options, context) + + +class StringRule(LarkRule): + + _children: Tuple[DBLQUOTE, List[StringPartRule], DBLQUOTE] @staticmethod def lark_name() -> str: - return "interpolation" + return "string" @property - def expression(self): - return self.children[1] + def string_parts(self): + return self.children[1:-1] def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - return to_dollar_string(self.expression.serialize(options)) + return '"' + "".join(part.serialize() for part in self.string_parts) + '"' diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 7dd79f63..59e524f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -1,5 +1,5 @@ from functools import lru_cache -from typing import Callable, Any, Type +from typing import Callable, Any, Type, Optional, Tuple from hcl2.rule_transformer.rules.abstract import LarkToken @@ -10,9 +10,9 @@ class StringToken(LarkToken): cached subclass whose static `lark_name()` yields the given string. """ - @staticmethod + @classmethod @lru_cache(maxsize=None) - def __build_subclass(name: str) -> Type["StringToken"]: + def __build_subclass(cls, name: str) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" return type( # type: ignore f"{name}_TOKEN", @@ -28,7 +28,7 @@ def __class_getitem__(cls, name: str) -> Type["StringToken"]: raise TypeError("StringToken[...] expects a single str argument") return cls.__build_subclass(name) - def __init__(self, value: Any) -> None: + def __init__(self, value: Optional[Any] = None): super().__init__(value) @property @@ -36,27 +36,59 @@ def serialize_conversion(self) -> Callable[[Any], str]: return str +class StaticStringToken(LarkToken): + @classmethod + @lru_cache(maxsize=None) + def __build_subclass( + cls, name: str, default_value: str = None + ) -> Type["StringToken"]: + """Create a subclass with a constant `lark_name`.""" + + return type( # type: ignore + f"{name}_TOKEN", + (cls,), + { + "__slots__": (), + "lark_name": staticmethod(lambda _n=name: _n), + "_default_value": default_value, + }, + ) + + def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: + name, default_value = value + return cls.__build_subclass(name, default_value) + + def __init__(self): + super().__init__(getattr(self, "_default_value")) + + @property + def serialize_conversion(self) -> Callable[[Any], str]: + return str + + # explicitly define various kinds of string-based tokens for type hinting +# variable value NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -EQ = StringToken["EQ"] -COLON = StringToken["COLON"] -LPAR = StringToken["LPAR"] -RPAR = StringToken["RPAR"] -LBRACE = StringToken["LBRACE"] -RBRACE = StringToken["RBRACE"] -DOT = StringToken["DOT"] -COMMA = StringToken["COMMA"] -ELLIPSIS = StringToken["ELLIPSIS"] -QMARK = StringToken["QMARK"] -LSQB = StringToken["LSQB"] -RSQB = StringToken["RSQB"] -INTERP_START = StringToken["INTERP_START"] -DBLQUOTE = StringToken["DBLQUOTE"] -ATTR_SPLAT = StringToken["ATTR_SPLAT"] -FULL_SPLAT = StringToken["FULL_SPLAT"] +# static value +EQ = StaticStringToken[("EQ", "=")] +COLON = StaticStringToken[("COLON", ":")] +LPAR = StaticStringToken[("LPAR", "(")] +RPAR = StaticStringToken[("RPAR", ")")] +LBRACE = StaticStringToken[("LBRACE", "{")] +RBRACE = StaticStringToken[("RBRACE", "}")] +DOT = StaticStringToken[("DOT", ".")] +COMMA = StaticStringToken[("COMMA", ",")] +ELLIPSIS = StaticStringToken[("ELLIPSIS", "...")] +QMARK = StaticStringToken[("QMARK", "?")] +LSQB = StaticStringToken[("LSQB", "[")] +RSQB = StaticStringToken[("RSQB", "]")] +INTERP_START = StaticStringToken[("INTERP_START", "${")] +DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] +ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] +FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 41e970d6..a7d91605 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -1,6 +1,4 @@ # pylint: disable=missing-function-docstring,unused-argument -from typing import List, Union - from lark import Token, Tree, v_args, Transformer, Discard from lark.tree import Meta @@ -42,9 +40,12 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, +) +from hcl2.rule_transformer.rules.strings import ( + InterpolationRule, + StringRule, StringPartRule, ) -from hcl2.rule_transformer.rules.strings import InterpolationRule, StringRule from hcl2.rule_transformer.rules.tokens import ( NAME, IntLiteral, diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 8ffeab8b..404bdcdd 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -9,6 +9,7 @@ class SerializationOptions: with_meta: bool = False wrap_objects: bool = False wrap_tuples: bool = False + explicit_blocks: bool = True @dataclass From d8ac92d8f41de654218280aeb26f2cf4a45879f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Wed, 27 Aug 2025 11:35:56 +0200 Subject: [PATCH 08/11] add heredoc rules and deserialization; require heredoc openers to be on their on separate line in lark grammar; whitespace trimming based on current implementation in dict_transformer.py; --- hcl2/rule_transformer/deserializer.py | 32 ++++++++--- hcl2/rule_transformer/hcl2.lark | 4 +- hcl2/rule_transformer/rules/strings.py | 73 ++++++++++++++++++++++++++ hcl2/rule_transformer/rules/tokens.py | 6 ++- hcl2/rule_transformer/transformer.py | 12 ++++- hcl2/rule_transformer/utils.py | 8 ++- 6 files changed, 123 insertions(+), 12 deletions(-) diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index 7b834968..a17a9510 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,6 @@ import json from functools import lru_cache -from typing import Any, TextIO, List +from typing import Any, TextIO, List, Union from regex import regex @@ -31,6 +31,8 @@ StringRule, InterpolationRule, StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -47,9 +49,11 @@ COMMA, DOT, LBRACE, + HEREDOC_TRIM_TEMPLATE, + HEREDOC_TEMPLATE, ) from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions +from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN class Deserializer: @@ -99,7 +103,7 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: return children - def _deserialize_text(self, value) -> LarkRule: + def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) return IntLitRule([IntLiteral(int_val)]) @@ -114,6 +118,16 @@ def _deserialize_text(self, value) -> LarkRule: if isinstance(value, str): if value.startswith('"') and value.endswith('"'): + if not self.options.heredocs_to_strings and value.startswith('"<<-'): + match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], True) + + if not self.options.heredocs_to_strings and value.startswith('"<<'): + match = HEREDOC_PATTERN.match(value[1:-1]) + if match: + return self._deserialize_heredoc(value[1:-1], False) + return self._deserialize_string(value) if self._is_expression(value): @@ -131,11 +145,12 @@ def _deserialize_identifier(self, value: str) -> IdentifierRule: def _deserialize_string(self, value: str) -> StringRule: result = [] - - pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") - parts = [part for part in pattern.split(value) if part != ""] + # split string into individual parts based on lark grammar # e.g. 'aaa$${bbb}ccc${"ddd-${eee}"}' -> ['aaa', '$${bbb}', 'ccc', '${"ddd-${eee}"}'] # 'aa-${"bb-${"cc-${"dd-${5 + 5}"}"}"}' -> ['aa-', '${"bb-${"cc-${"dd-${5 + 5}"}"}"}'] + pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") + parts = [part for part in pattern.split(value) if part != ""] + for part in parts: if part == '"': @@ -166,6 +181,11 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) + def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + if trim: + return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" # instead of processing expression manually and trying to recognize what kind of expression it is, diff --git a/hcl2/rule_transformer/hcl2.lark b/hcl2/rule_transformer/hcl2.lark index 3f8d913e..24140ada 100644 --- a/hcl2/rule_transformer/hcl2.lark +++ b/hcl2/rule_transformer/hcl2.lark @@ -67,8 +67,8 @@ ELLIPSIS : "..." COLONS: "::" // Heredocs -HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc)\n/ -HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n?(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ +HEREDOC_TEMPLATE : /<<(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc)\n/ +HEREDOC_TEMPLATE_TRIM : /<<-(?P[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?\n\s*(?P=heredoc_trim)\n/ // Ignore whitespace (but not newlines, as they're significant in HCL) %ignore /[ \t]+/ diff --git a/hcl2/rule_transformer/rules/strings.py b/hcl2/rule_transformer/rules/strings.py index 769ad5b9..4e28e976 100644 --- a/hcl2/rule_transformer/rules/strings.py +++ b/hcl2/rule_transformer/rules/strings.py @@ -1,3 +1,4 @@ +import sys from typing import Tuple, List, Any, Union from hcl2.rule_transformer.rules.abstract import LarkRule @@ -8,11 +9,15 @@ DBLQUOTE, STRING_CHARS, ESCAPED_INTERPOLATION, + HEREDOC_TEMPLATE, + HEREDOC_TRIM_TEMPLATE, ) from hcl2.rule_transformer.utils import ( SerializationOptions, SerializationContext, to_dollar_string, + HEREDOC_TRIM_PATTERN, + HEREDOC_PATTERN, ) @@ -71,3 +76,71 @@ def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: return '"' + "".join(part.serialize() for part in self.string_parts) + '"' + + +class HeredocTemplateRule(LarkRule): + + _children: Tuple[HEREDOC_TEMPLATE] + _trim_chars = "\n\t " + + + @staticmethod + def lark_name() -> str: + return "heredoc_template" + + @property + def heredoc(self): + return self.children[0] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + result = heredoc.rstrip(self._trim_chars) + return f'"{result}"' + + +class HeredocTrimTemplateRule(HeredocTemplateRule): + + _children: Tuple[HEREDOC_TRIM_TEMPLATE] + + @staticmethod + def lark_name() -> str: + return "heredoc_trim_template" + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions + # This is a special version of heredocs that are declared with "<<-" + # This will calculate the minimum number of leading spaces in each line of a heredoc + # and then remove that number of spaces from each line + + heredoc = self.heredoc.serialize(options, context) + + if not options.preserve_heredocs: + match = HEREDOC_TRIM_PATTERN.match(heredoc) + if not match: + raise RuntimeError(f"Invalid Heredoc token: {heredoc}") + heredoc = match.group(2) + + heredoc = heredoc.rstrip(self._trim_chars) + lines = heredoc.split("\n") + + # calculate the min number of leading spaces in each line + min_spaces = sys.maxsize + for line in lines: + leading_spaces = len(line) - len(line.lstrip(" ")) + min_spaces = min(min_spaces, leading_spaces) + + # trim off that number of leading spaces from each line + lines = [line[min_spaces:] for line in lines] + return '"' + "\n".join(lines) + '"' + \ No newline at end of file diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 59e524f3..5b1959f3 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -67,12 +67,14 @@ def serialize_conversion(self) -> Callable[[Any], str]: # explicitly define various kinds of string-based tokens for type hinting -# variable value +# variable values NAME = StringToken["NAME"] STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -# static value +HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +# static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] LPAR = StaticStringToken[("LPAR", "(")] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index a7d91605..37ae445c 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -44,7 +44,9 @@ from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, + StringPartRule, + HeredocTemplateRule, + HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( NAME, @@ -127,6 +129,14 @@ def string_part(self, meta: Meta, args) -> StringPartRule: def interpolation(self, meta: Meta, args) -> InterpolationRule: return InterpolationRule(args, meta) + @v_args(meta=True) + def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: + return HeredocTemplateRule(args, meta) + + @v_args(meta=True) + def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: + return HeredocTrimTemplateRule(args, meta) + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 404bdcdd..98370ca3 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -1,7 +1,12 @@ +import re from contextlib import contextmanager from dataclasses import dataclass, replace from typing import Generator +HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) +HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) + + @dataclass class SerializationOptions: @@ -10,11 +15,12 @@ class SerializationOptions: wrap_objects: bool = False wrap_tuples: bool = False explicit_blocks: bool = True + preserve_heredocs: bool = True @dataclass class DeserializationOptions: - pass + heredocs_to_strings: bool = False @dataclass From 5932662bfe5045c2e944f7c9e3fc55c94077c4c9 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 15 Sep 2025 12:26:59 +0200 Subject: [PATCH 09/11] add `for` expressions rules --- .../rule_transformer/rules/for_expressions.py | 283 ++++++++++++++++++ hcl2/rule_transformer/rules/functions.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 4 + hcl2/rule_transformer/transformer.py | 36 ++- 4 files changed, 320 insertions(+), 5 deletions(-) create mode 100644 hcl2/rule_transformer/rules/for_expressions.py diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py new file mode 100644 index 00000000..18abe6c8 --- /dev/null +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -0,0 +1,283 @@ +from typing import Any, Tuple, Optional, List + +from lark.tree import Meta + +from hcl2.rule_transformer.rules.abstract import LarkRule, LarkElement +from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.tokens import ( + LSQB, + RSQB, + LBRACE, + RBRACE, + FOR, + IN, + IF, + COMMA, + COLON, + ELLIPSIS, + FOR_OBJECT_ARROW, +) +from hcl2.rule_transformer.rules.whitespace import ( + NewLineOrCommentRule, + InlineCommentMixIn, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) + + +class ForIntroRule(InlineCommentMixIn): + """Rule for the intro part of for expressions: 'for key, value in collection :'""" + + _children: Tuple[ + FOR, + Optional[NewLineOrCommentRule], + IdentifierRule, + Optional[COMMA], + Optional[IdentifierRule], + Optional[NewLineOrCommentRule], + IN, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + COLON, + Optional[NewLineOrCommentRule], + ] + + @staticmethod + def lark_name() -> str: + return "for_intro" + + def __init__(self, children, meta: Optional[Meta] = None): + # Insert null comments at positions where they might be missing + self._possibly_insert_null_second_identifier(children) + self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + super().__init__(children, meta) + + def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): + second_identifier_present = ( + len([child for child in children if isinstance(child, IdentifierRule)]) == 2 + ) + if not second_identifier_present: + children.insert(3, None) + children.insert(4, None) + + @property + def first_iterator(self) -> IdentifierRule: + """Returns the first iterator""" + return self._children[2] + + @property + def second_iterator(self) -> Optional[IdentifierRule]: + """Returns the second iterator or None if not present""" + return self._children[4] + + @property + def iterable(self) -> ExpressionRule: + """Returns the collection expression being iterated over""" + return self._children[8] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + result = "for " + + result += f"{self.first_iterator.serialize(options, context)}" + if self.second_iterator: + result += f", {self.second_iterator.serialize(options, context)}" + + result += f" in {self.iterable.serialize(options, context)} : " + + return result + + +class ForCondRule(InlineCommentMixIn): + """Rule for the optional condition in for expressions: 'if condition'""" + + _children: Tuple[ + IF, + Optional[NewLineOrCommentRule], + ExpressionRule, # condition expression + ] + + @staticmethod + def lark_name() -> str: + return "for_cond" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1]) + super().__init__(children, meta) + + @property + def condition_expr(self) -> ExpressionRule: + """Returns the condition expression""" + return self._children[2] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> str: + return f"if {self.condition_expr.serialize(options, context)}" + + +class ForTupleExprRule(ExpressionRule): + """Rule for tuple/array for expressions: [for item in items : expression]""" + + _children: Tuple[ + LSQB, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RSQB, + ] + + @staticmethod + def lark_name() -> str: + return "for_tuple_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 5, 7]) + self._possibly_insert_null_condition(children) + super().__init__(children, meta) + + def _possibly_insert_null_condition(self, children: List[LarkElement]): + if not len([child for child in children if isinstance(child, ForCondRule)]): + children.insert(6, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[4] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[6] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + + result = "[" + + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += self.value_expr.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "]" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result + + +class ForObjectExprRule(ExpressionRule): + """Rule for object for expressions: {for key, value in items : key => value}""" + + _children: Tuple[ + LBRACE, + Optional[NewLineOrCommentRule], + ForIntroRule, + Optional[NewLineOrCommentRule], + ExpressionRule, + FOR_OBJECT_ARROW, + Optional[NewLineOrCommentRule], + ExpressionRule, + Optional[NewLineOrCommentRule], + Optional[ELLIPSIS], + Optional[NewLineOrCommentRule], + Optional[ForCondRule], + Optional[NewLineOrCommentRule], + RBRACE, + ] + + @staticmethod + def lark_name() -> str: + return "for_object_expr" + + def __init__(self, children, meta: Optional[Meta] = None): + self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) + self._possibly_insert_null_optionals(children) + super().__init__(children, meta) + + def _possibly_insert_null_optionals(self, children: List[LarkElement]): + has_ellipsis = False + has_condition = False + + for child in children: + # if not has_ellipsis and isinstance(child, ELLIPSIS): + if ( + has_ellipsis is False + and child is not None + and child.lark_name() == ELLIPSIS.lark_name() + ): + has_ellipsis = True + if not has_condition and isinstance(child, ForCondRule): + has_condition = True + + if not has_ellipsis: + children.insert(9, None) + + if not has_condition: + children.insert(11, None) + + @property + def for_intro(self) -> ForIntroRule: + """Returns the for intro rule""" + return self._children[2] + + @property + def key_expr(self) -> ExpressionRule: + """Returns the key expression""" + return self._children[4] + + @property + def value_expr(self) -> ExpressionRule: + """Returns the value expression""" + return self._children[7] + + @property + def ellipsis(self) -> Optional[ELLIPSIS]: + """Returns the optional ellipsis token""" + return self._children[9] + + @property + def condition(self) -> Optional[ForCondRule]: + """Returns the optional condition rule""" + return self._children[11] + + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = "{" + with context.modify(inside_dollar_string=True): + result += self.for_intro.serialize(options, context) + result += f"{self.key_expr.serialize(options, context)} => " + + result += self.value_expr.serialize( + SerializationOptions(wrap_objects=True), context + ) + + if self.ellipsis is not None: + result += self.ellipsis.serialize(options, context) + + if self.condition is not None: + result += f" {self.condition.serialize(options, context)}" + + result += "}" + if not context.inside_dollar_string: + result = to_dollar_string(result) + return result diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 54958514..b25fed62 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -40,7 +40,7 @@ def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([argument.serialize(options, context) for argument in self.arguments]) + result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) if self.has_ellipsis: result += " ..." return result diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 5b1959f3..67d53fcf 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -91,6 +91,10 @@ def serialize_conversion(self) -> Callable[[Any], str]: DBLQUOTE = StaticStringToken[("DBLQUOTE", '"')] ATTR_SPLAT = StaticStringToken[("ATTR_SPLAT", ".*")] FULL_SPLAT = StaticStringToken[("FULL_SPLAT", "[*]")] +FOR = StaticStringToken[("FOR", "for")] +IN = StaticStringToken[("IN", "in")] +IF = StaticStringToken[("IF", "if")] +FOR_OBJECT_ARROW = StaticStringToken[("FOR_OBJECT_ARROW", "=>")] class IntLiteral(LarkToken): diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 37ae445c..1ab1dfda 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -23,6 +23,12 @@ ExprTermRule, ConditionalRule, ) +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, + ForIntroRule, + ForCondRule, +) from hcl2.rule_transformer.rules.functions import ArgumentsRule, FunctionCallRule from hcl2.rule_transformer.rules.indexing import ( IndexExprTermRule, @@ -40,12 +46,13 @@ IntLitRule, IdentifierRule, BinaryOperatorRule, + KeywordRule, ) from hcl2.rule_transformer.rules.strings import ( InterpolationRule, StringRule, - StringPartRule, - HeredocTemplateRule, + StringPartRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -72,6 +79,7 @@ def __init__(self, discard_new_line_or_comments: bool = False): self.discard_new_line_or_comments = discard_new_line_or_comments def __default_token__(self, token: Token) -> StringToken: + # TODO make this return StaticStringToken where applicable return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: @@ -109,6 +117,10 @@ def new_line_or_comment(self, meta: Meta, args) -> NewLineOrCommentRule: def identifier(self, meta: Meta, args) -> IdentifierRule: return IdentifierRule(args, meta) + @v_args(meta=True) + def keyword(self, meta: Meta, args) -> KeywordRule: + return KeywordRule(args, meta) + @v_args(meta=True) def int_lit(self, meta: Meta, args) -> IntLitRule: return IntLitRule(args, meta) @@ -132,11 +144,11 @@ def interpolation(self, meta: Meta, args) -> InterpolationRule: @v_args(meta=True) def heredoc_template(self, meta: Meta, args) -> HeredocTemplateRule: return HeredocTemplateRule(args, meta) - + @v_args(meta=True) def heredoc_template_trim(self, meta: Meta, args) -> HeredocTrimTemplateRule: return HeredocTrimTemplateRule(args, meta) - + @v_args(meta=True) def expr_term(self, meta: Meta, args) -> ExprTermRule: return ExprTermRule(args, meta) @@ -236,3 +248,19 @@ def full_splat(self, meta: Meta, args) -> FullSplatRule: @v_args(meta=True) def full_splat_expr_term(self, meta: Meta, args) -> FullSplatExprTermRule: return FullSplatExprTermRule(args, meta) + + @v_args(meta=True) + def for_tuple_expr(self, meta: Meta, args) -> ForTupleExprRule: + return ForTupleExprRule(args, meta) + + @v_args(meta=True) + def for_object_expr(self, meta: Meta, args) -> ForObjectExprRule: + return ForObjectExprRule(args, meta) + + @v_args(meta=True) + def for_intro(self, meta: Meta, args) -> ForIntroRule: + return ForIntroRule(args, meta) + + @v_args(meta=True) + def for_cond(self, meta: Meta, args) -> ForCondRule: + return ForCondRule(args, meta) From 107fcb223f176793e04aa750f2c120cb38d00afa Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Mon, 29 Sep 2025 13:10:35 +0200 Subject: [PATCH 10/11] add Lark AST -> HCL2 reconstructor and LarkTree formatter; various other fixes and changes: * preserve order of serialized attributes and blocks * make RuleTransformer.__default_token__ differentiate between StringToken and StaticStringToken * add separate ProviderFunctionCallRule class for more accurate reconstruction --- hcl2/rule_transformer/deserializer.py | 120 +++++--- hcl2/rule_transformer/formatter.py | 262 ++++++++++++++++++ hcl2/rule_transformer/reconstructor.py | 204 ++++++++++++++ hcl2/rule_transformer/rules/abstract.py | 5 +- hcl2/rule_transformer/rules/base.py | 48 ++-- hcl2/rule_transformer/rules/containers.py | 69 +++-- hcl2/rule_transformer/rules/expressions.py | 9 +- .../rule_transformer/rules/for_expressions.py | 92 +++--- hcl2/rule_transformer/rules/functions.py | 63 +++-- hcl2/rule_transformer/rules/indexing.py | 2 +- hcl2/rule_transformer/rules/tokens.py | 12 +- hcl2/rule_transformer/rules/whitespace.py | 9 +- hcl2/rule_transformer/transformer.py | 3 + hcl2/rule_transformer/utils.py | 6 - 14 files changed, 738 insertions(+), 166 deletions(-) create mode 100644 hcl2/rule_transformer/formatter.py create mode 100644 hcl2/rule_transformer/reconstructor.py diff --git a/hcl2/rule_transformer/deserializer.py b/hcl2/rule_transformer/deserializer.py index a17a9510..56e1ad44 100644 --- a/hcl2/rule_transformer/deserializer.py +++ b/hcl2/rule_transformer/deserializer.py @@ -1,6 +1,8 @@ import json +from abc import ABC, abstractmethod +from dataclasses import dataclass from functools import lru_cache -from typing import Any, TextIO, List, Union +from typing import Any, TextIO, List, Union, Optional from regex import regex @@ -31,7 +33,7 @@ StringRule, InterpolationRule, StringPartRule, - HeredocTemplateRule, + HeredocTemplateRule, HeredocTrimTemplateRule, ) from hcl2.rule_transformer.rules.tokens import ( @@ -51,14 +53,38 @@ LBRACE, HEREDOC_TRIM_TEMPLATE, HEREDOC_TEMPLATE, + COLON, ) +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.transformer import RuleTransformer -from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN +from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN -class Deserializer: - def __init__(self, options=DeserializationOptions()): - self.options = options +@dataclass +class DeserializerOptions: + heredocs_to_strings: bool = False + indent_length: int = 2 + object_elements_colon: bool = False + object_elements_trailing_comma: bool = True + + +class LarkElementTreeDeserializer(ABC): + def __init__(self, options: DeserializerOptions = None): + self.options = options or DeserializerOptions() + + @abstractmethod + def loads(self, value: str) -> LarkElement: + raise NotImplementedError() + + def load(self, file: TextIO) -> LarkElement: + return self.loads(file.read()) + + +class BaseDeserializer(LarkElementTreeDeserializer): + def __init__(self, options=None): + super().__init__(options) + self._current_line = 1 + self._last_new_line: Optional[NewLineOrCommentRule] = None @property @lru_cache @@ -66,19 +92,23 @@ def _transformer(self) -> RuleTransformer: return RuleTransformer() def load_python(self, value: Any) -> LarkElement: - return StartRule([self._deserialize(value)]) + result = StartRule([self._deserialize(value)]) + return result def loads(self, value: str) -> LarkElement: return self.load_python(json.loads(value)) - def load(self, file: TextIO) -> LarkElement: - return self.loads(file.read()) - def _deserialize(self, value: Any) -> LarkElement: if isinstance(value, dict): if self._contains_block_marker(value): - elements = self._deserialize_block_elements(value) - return BodyRule(elements) + + children = [] + + block_elements = self._deserialize_block_elements(value) + for element in block_elements: + children.append(element) + + return BodyRule(children) return self._deserialize_object(value) @@ -89,14 +119,13 @@ def _deserialize(self, value: Any) -> LarkElement: def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: children = [] - for key, value in value.items(): if self._is_block(value): # this value is a list of blocks, iterate over each block and deserialize them for block in value: children.append(self._deserialize_block(key, block)) - else: + else: # otherwise it's just an attribute if key != IS_BLOCK: children.append(self._deserialize_attribute(key, value)) @@ -106,28 +135,24 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]: def _deserialize_text(self, value: Any) -> LarkRule: try: int_val = int(value) + if "." in str(value): + return FloatLitRule([FloatLiteral(float(value))]) return IntLitRule([IntLiteral(int_val)]) except ValueError: pass - try: - float_val = float(value) - return FloatLitRule([FloatLiteral(float_val)]) - except ValueError: - pass - if isinstance(value, str): if value.startswith('"') and value.endswith('"'): if not self.options.heredocs_to_strings and value.startswith('"<<-'): match = HEREDOC_TRIM_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], True) - + if not self.options.heredocs_to_strings and value.startswith('"<<'): match = HEREDOC_PATTERN.match(value[1:-1]) if match: return self._deserialize_heredoc(value[1:-1], False) - + return self._deserialize_string(value) if self._is_expression(value): @@ -151,7 +176,6 @@ def _deserialize_string(self, value: str) -> StringRule: pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})") parts = [part for part in pattern.split(value) if part != ""] - for part in parts: if part == '"': continue @@ -181,10 +205,12 @@ def _deserialize_string_part(self, value: str) -> StringPartRule: return StringPartRule([STRING_CHARS(value)]) - def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: + def _deserialize_heredoc( + self, value: str, trim: bool + ) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]: if trim: return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)]) - return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) + return HeredocTemplateRule([HEREDOC_TEMPLATE(value)]) def _deserialize_expression(self, value: str) -> ExprTermRule: """Deserialize an expression string into an ExprTermRule.""" @@ -200,7 +226,9 @@ def _deserialize_expression(self, value: str) -> ExprTermRule: # transform parsed tree into LarkElement tree rules_tree = self._transformer.transform(parsed_tree) # extract expression from the tree - return rules_tree.body.children[0].expression + result = rules_tree.body.children[0].expression + + return result def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: """Deserialize a block by extracting labels and body""" @@ -220,14 +248,24 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule: break return BlockRule( - [*[self._deserialize(label) for label in labels], self._deserialize(body)] + [ + *[self._deserialize(label) for label in labels], + LBRACE(), + self._deserialize(body), + RBRACE(), + ] ) def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule: + expr_term = self._deserialize(value) + + if not isinstance(expr_term, ExprTermRule): + expr_term = ExprTermRule([expr_term]) + children = [ self._deserialize_identifier(name), EQ(), - ExprTermRule([self._deserialize(value)]), + expr_term, ] return AttributeRule(children) @@ -247,11 +285,21 @@ def _deserialize_object(self, value: dict) -> ObjectRule: children = [] for key, value in value.items(): children.append(self._deserialize_object_elem(key, value)) + + if self.options.object_elements_trailing_comma: + children.append(COMMA()) + return ObjectRule([LBRACE(), *children, RBRACE()]) def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: if self._is_expression(key): - key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)]) + key = ObjectElemKeyExpressionRule( + [ + child + for child in self._deserialize_expression(key).children + if child is not None + ] + ) elif "." in key: parts = key.split(".") children = [] @@ -262,13 +310,13 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule: else: key = self._deserialize_text(key) - return ObjectElemRule( - [ - ObjectElemKeyRule([key]), - EQ(), - ExprTermRule([self._deserialize_text(value)]), - ] - ) + result = [ + ObjectElemKeyRule([key]), + COLON() if self.options.object_elements_colon else EQ(), + ExprTermRule([self._deserialize(value)]), + ] + + return ObjectElemRule(result) def _is_expression(self, value: str) -> bool: return value.startswith("${") and value.endswith("}") diff --git a/hcl2/rule_transformer/formatter.py b/hcl2/rule_transformer/formatter.py new file mode 100644 index 00000000..ad0247dc --- /dev/null +++ b/hcl2/rule_transformer/formatter.py @@ -0,0 +1,262 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import List + +from hcl2.rule_transformer.rules.abstract import LarkElement +from hcl2.rule_transformer.rules.base import ( + StartRule, + BlockRule, + AttributeRule, + BodyRule, +) +from hcl2.rule_transformer.rules.containers import ObjectRule, ObjectElemRule, TupleRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ExpressionRule +from hcl2.rule_transformer.rules.for_expressions import ( + ForTupleExprRule, + ForObjectExprRule, +) +from hcl2.rule_transformer.rules.tokens import NL_OR_COMMENT, LBRACE, COLON, LSQB, COMMA +from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule + + +@dataclass +class FormatterOptions: + indent_length: int = 2 + open_empty_blocks: bool = True + open_empty_objects: bool = True + open_empty_tuples: bool = False + + vertically_align_attributes: bool = True + vertically_align_object_elements: bool = True + + +class LarkElementTreeFormatter(ABC): + def __init__(self, options: FormatterOptions = None): + self.options = options or FormatterOptions() + + @abstractmethod + def format_tree(self, tree: LarkElement): + raise NotImplementedError() + + +class BaseFormatter(LarkElementTreeFormatter): + def __init__(self, options: FormatterOptions = None): + super().__init__(options) + self._current_line = 1 + self._current_indent_level = 0 + + def format_tree(self, tree: LarkElement): + if isinstance(tree, StartRule): + self.format_start_rule(tree) + + def format_start_rule(self, rule: StartRule): + self.format_body_rule(rule.body, 0) + # for child in rule.body.children: + # if isinstance(child, BlockRule): + # self.format_block_rule(child, 1) + + def format_block_rule(self, rule: BlockRule, indent_level: int = 0): + if self.options.vertically_align_attributes: + self._vertically_align_attributes_in_body(rule.body) + + self.format_body_rule(rule.body, indent_level) + if len(rule.body.children) > 0: + rule.children.insert(-1, self._build_newline(indent_level - 1)) + elif self.options.open_empty_blocks: + rule.children.insert(-1, self._build_newline(indent_level - 1, 2)) + + def format_body_rule(self, rule: BodyRule, indent_level: int = 0): + + in_start = isinstance(rule.parent, StartRule) + + new_children = [] + if not in_start: + new_children.append(self._build_newline(indent_level)) + + for i, child in enumerate(rule.children): + new_children.append(child) + + if isinstance(child, AttributeRule): + self.format_attribute_rule(child, indent_level) + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, BlockRule): + self.format_block_rule(child, indent_level + 1) + + if i > 0: + new_children.insert(-2, self._build_newline(indent_level)) + new_children.append(self._build_newline(indent_level, 2)) + + new_children.pop(-1) + rule._children = new_children + + def format_attribute_rule(self, rule: AttributeRule, indent_level: int = 0): + self.format_expression(rule.expression, indent_level + 1) + + def format_tuple_rule(self, rule: TupleRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_tuples: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for child in rule.children: + new_children.append(child) + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + if isinstance(child, (COMMA, LSQB)): + new_children.append(self._build_newline(indent_level)) + + self._deindent_last_line() + rule._children = new_children + + def format_object_rule(self, rule: ObjectRule, indent_level: int = 0): + if len(rule.elements) == 0: + if self.options.open_empty_objects: + rule.children.insert(1, self._build_newline(indent_level - 1, 2)) + return + + new_children = [] + for i in range(len(rule.children)): + child = rule.children[i] + next_child = rule.children[i + 1] if i + 1 < len(rule.children) else None + new_children.append(child) + + if isinstance(child, LBRACE): + new_children.append(self._build_newline(indent_level)) + + if ( + next_child + and isinstance(next_child, ObjectElemRule) + and isinstance(child, (ObjectElemRule, COMMA)) + ): + new_children.append(self._build_newline(indent_level)) + + if isinstance(child, ObjectElemRule): + self.format_expression(child.expression, indent_level + 1) + + new_children.insert(-1, self._build_newline(indent_level)) + self._deindent_last_line() + + rule._children = new_children + + if self.options.vertically_align_object_elements: + self._vertically_align_object_elems(rule) + + def format_expression(self, rule: ExprTermRule, indent_level: int = 0): + if isinstance(rule.expression, ObjectRule): + self.format_object_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, TupleRule): + self.format_tuple_rule(rule.expression, indent_level) + + elif isinstance(rule.expression, ForTupleExprRule): + self.format_fortupleexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ForObjectExprRule): + self.format_forobjectexpr(rule.expression, indent_level) + + elif isinstance(rule.expression, ExprTermRule): + self.format_expression(rule.expression) + + def format_fortupleexpr(self, expression: ForTupleExprRule, indent_level: int = 0): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 5, 7] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + self._deindent_last_line() + # expression.children[8] = self._build_newline(indent_level - 1) + + def format_forobjectexpr( + self, expression: ForObjectExprRule, indent_level: int = 0 + ): + for child in expression.children: + if isinstance(child, ExprTermRule): + self.format_expression(child, indent_level + 1) + + indexes = [1, 3, 12] + for index in indexes: + expression.children[index] = self._build_newline(indent_level) + + self._deindent_last_line() + + def _vertically_align_attributes_in_body(self, body: BodyRule): + attributes_sequence: List[AttributeRule] = [] + + for child in body.children: + if isinstance(child, AttributeRule): + attributes_sequence.append(child) + + elif attributes_sequence: + max_length = max( + len(attribute.identifier.token.value) + for attribute in attributes_sequence + ) + for attribute in attributes_sequence: + name_length = len(attribute.identifier.token.value) + spaces_to_add = max_length - name_length + attribute.children[1].set_value( + " " * spaces_to_add + attribute.children[1].value + ) + attributes_sequence = [] + + def _vertically_align_object_elems(self, rule: ObjectRule): + max_length = max(len(elem.key.serialize()) for elem in rule.elements) + for elem in rule.elements: + key_length = len(elem.key.serialize()) + print(elem.key.serialize(), key_length) + + spaces_to_add = max_length - key_length + + separator = elem.children[1] + if isinstance(separator, COLON): + spaces_to_add += 1 + + elem.children[1].set_value(" " * spaces_to_add + separator.value) + + def _move_to_next_line(self, times: int = 1): + self._current_line += times + + def _increase_indent_level(self, times: int = 1): + self._current_indent_level += times + + def _decrease_indent_level(self, times: int = 1): + self._current_indent_level -= times + if self._current_indent_level < 0: + self._current_indent_level = 0 + + def _build_newline( + self, next_line_indent: int = 0, count: int = 1 + ) -> NewLineOrCommentRule: + result = NewLineOrCommentRule( + [ + NL_OR_COMMENT( + ("\n" * count) + " " * self.options.indent_length * next_line_indent + ) + ] + ) + self._last_new_line = result + return result + + def _deindent_last_line(self, times: int = 1): + token = self._last_new_line.token + for i in range(times): + if token.value.endswith(" " * self.options.indent_length): + token.set_value(token.value[: -self.options.indent_length]) + + # def _build_meta(self, indent_level: int = 0, length: int = 0) -> Meta: + # result = Meta() + # result.empty = length == 0 + # result.line = self._current_line + # result.column = indent_level * self.options.indent_length + # # result.start_pos = + # # result.end_line = + # # result.end_column = + # # result.end_pos = + # # result.orig_expansion = + # # result.match_tree = + # return result diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py new file mode 100644 index 00000000..7d316b2c --- /dev/null +++ b/hcl2/rule_transformer/reconstructor.py @@ -0,0 +1,204 @@ +from typing import List, Union + +from lark import Tree, Token +from hcl2.rule_transformer.rules import tokens +from hcl2.rule_transformer.rules.base import BlockRule +from hcl2.rule_transformer.rules.for_expressions import ForIntroRule +from hcl2.rule_transformer.rules.literal_rules import IdentifierRule +from hcl2.rule_transformer.rules.strings import StringRule +from hcl2.rule_transformer.rules.expressions import ExprTermRule, ConditionalRule + + +class HCLReconstructor: + """This class converts a Lark.Tree AST back into a string representing the underlying HCL code.""" + + def __init__(self): + self._reset_state() + + def _reset_state(self): + """State tracking for formatting decisions""" + self._last_was_space = True + self._current_indent = 0 + self._last_token_name = None + self._last_rule_name = None + self._in_parentheses = False + self._in_object = False + self._in_tuple = False + + def _should_add_space_before( + self, current_node: Union[Tree, Token], parent_rule_name: str = None + ) -> bool: + """Determine if we should add a space before the current token/rule.""" + + # Don't add space if we already have one + if self._last_was_space: + return False + + # Don't add space at the beginning + if self._last_token_name is None: + return False + + if isinstance(current_node, Token): + token_type = current_node.type + + # Space before '{' in blocks + if ( + token_type == tokens.LBRACE.lark_name() + and parent_rule_name == BlockRule.lark_name() + ): + return True + + # Space around Conditional Expression operators + if ( + parent_rule_name == ConditionalRule.lark_name() + and token_type in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + or self._last_token_name + in [tokens.COLON.lark_name(), tokens.QMARK.lark_name()] + ): + return True + + # Space after + if ( + parent_rule_name == ForIntroRule.lark_name() + and token_type == tokens.COLON.lark_name() + ): + + return True + + # Space after commas in tuples and function arguments... + if self._last_token_name == tokens.COMMA.lark_name(): + # ... except for last comma + if token_type == tokens.RSQB.lark_name(): + return False + return True + + if token_type in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + tokens.ELLIPSIS.lark_name(), + ]: + return True + + if ( + self._last_token_name + in [ + tokens.FOR.lark_name(), + tokens.IN.lark_name(), + tokens.IF.lark_name(), + ] + and token_type != "NL_OR_COMMENT" + ): + return True + + # Space around for_object arrow + if tokens.FOR_OBJECT_ARROW.lark_name() in [ + token_type, + self._last_token_name, + ]: + return True + + # Space after ellipsis in function arguments + if self._last_token_name == tokens.ELLIPSIS.lark_name(): + return True + + if tokens.EQ.lark_name() in [token_type, self._last_token_name]: + return True + + # space around binary operators + if tokens.BINARY_OP.lark_name() in [token_type, self._last_token_name]: + return True + + elif isinstance(current_node, Tree): + rule_name = current_node.data + + if parent_rule_name == BlockRule.lark_name(): + # Add space between multiple string/identifier labels in blocks + if rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ] and self._last_rule_name in [ + StringRule.lark_name(), + IdentifierRule.lark_name(), + ]: + return True + + return False + + def _reconstruct_tree(self, tree: Tree, parent_rule_name: str = None) -> List[str]: + """Recursively reconstruct a Tree node into HCL text fragments.""" + result = [] + rule_name = tree.data + + if rule_name == ExprTermRule.lark_name(): + # Check if parenthesized + if ( + len(tree.children) >= 3 + and isinstance(tree.children[0], Token) + and tree.children[0].type == tokens.LPAR.lark_name() + and isinstance(tree.children[-1], Token) + and tree.children[-1].type == tokens.RPAR.lark_name() + ): + self._in_parentheses = True + + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + self._in_parentheses = False + + else: + for child in tree.children: + result.extend(self._reconstruct_node(child, rule_name)) + + if self._should_add_space_before(tree, parent_rule_name): + result.insert(0, " ") + + # Update state tracking + self._last_rule_name = rule_name + if result: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: + """Reconstruct a Token node into HCL text fragments.""" + result = str(token.value) + if self._should_add_space_before(token, parent_rule_name): + result = " " + result + + self._last_token_name = token.type + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + + return result + + def _reconstruct_node( + self, node: Union[Tree, Token], parent_rule_name: str = None + ) -> List[str]: + """Reconstruct any node (Tree or Token) into HCL text fragments.""" + if isinstance(node, Tree): + return self._reconstruct_tree(node, parent_rule_name) + elif isinstance(node, Token): + return [self._reconstruct_token(node, parent_rule_name)] + else: + # Fallback: convert to string + return [str(node)] + + def reconstruct(self, tree: Tree, postproc=None, insert_spaces=False) -> str: + """Convert a Lark.Tree AST back into a string representation of HCL.""" + # Reset state + self._reset_state() + + # Reconstruct the tree + fragments = self._reconstruct_node(tree) + + # Join fragments and apply post-processing + result = "".join(fragments) + + if postproc: + result = postproc(result) + + # Ensure file ends with newline + if result and not result.endswith("\n"): + result += "\n" + + return result diff --git a/hcl2/rule_transformer/rules/abstract.py b/hcl2/rule_transformer/rules/abstract.py index 33dcc9ca..e83fed2b 100644 --- a/hcl2/rule_transformer/rules/abstract.py +++ b/hcl2/rule_transformer/rules/abstract.py @@ -49,6 +49,9 @@ def serialize_conversion(self) -> Callable: def value(self): return self._value + def set_value(self, value: Any): + self._value = value + def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: @@ -96,7 +99,7 @@ def to_lark(self) -> Tree: def __init__(self, children: List[LarkElement], meta: Optional[Meta] = None): super().__init__() self._children = children - self._meta = meta + self._meta = meta or Meta() for index, child in enumerate(children): if child is not None: diff --git a/hcl2/rule_transformer/rules/base.py b/hcl2/rule_transformer/rules/base.py index 5c8468d4..c879b772 100644 --- a/hcl2/rule_transformer/rules/base.py +++ b/hcl2/rule_transformer/rules/base.py @@ -5,10 +5,10 @@ from hcl2.const import IS_BLOCK from hcl2.rule_transformer.rules.abstract import LarkRule, LarkToken -from hcl2.rule_transformer.rules.expressions import ExpressionRule +from hcl2.rule_transformer.rules.expressions import ExpressionRule, ExprTermRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.strings import StringRule -from hcl2.rule_transformer.rules.tokens import NAME, EQ +from hcl2.rule_transformer.rules.tokens import NAME, EQ, LBRACE, RBRACE from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext @@ -16,9 +16,9 @@ class AttributeRule(LarkRule): _children: Tuple[ - NAME, + IdentifierRule, EQ, - ExpressionRule, + ExprTermRule, ] @staticmethod @@ -26,11 +26,11 @@ def lark_name() -> str: return "attribute" @property - def identifier(self) -> NAME: + def identifier(self) -> IdentifierRule: return self._children[0] @property - def expression(self) -> ExpressionRule: + def expression(self) -> ExprTermRule: return self._children[2] def serialize( @@ -56,40 +56,32 @@ def lark_name() -> str: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - blocks: List[BlockRule] = [] - attributes: List[AttributeRule] = [] + attribute_names = set() comments = [] inline_comments = [] + result = defaultdict(list) + for child in self._children: if isinstance(child, BlockRule): - blocks.append(child) + name = child.labels[0].serialize(options) + if name in attribute_names: + raise RuntimeError(f"Attribute {name} is already defined.") + result[name].append(child.serialize(options)) if isinstance(child, AttributeRule): - attributes.append(child) - # collect in-line comments from attribute assignments, expressions etc - inline_comments.extend(child.expression.inline_comments()) + attribute_names.add(child) + result.update(child.serialize(options)) + if options.with_comments: + # collect in-line comments from attribute assignments, expressions etc + inline_comments.extend(child.expression.inline_comments()) - if isinstance(child, NewLineOrCommentRule): + if isinstance(child, NewLineOrCommentRule) and options.with_comments: child_comments = child.to_list() if child_comments: comments.extend(child_comments) - result = {} - - for attribute in attributes: - result.update(attribute.serialize(options)) - - result_blocks = defaultdict(list) - for block in blocks: - name = block.labels[0].serialize(options) - if name in result.keys(): - raise RuntimeError(f"Attribute {name} is already defined.") - result_blocks[name].append(block.serialize(options)) - - result.update(**result_blocks) - if options.with_comments: if comments: result["__comments__"] = comments @@ -122,7 +114,9 @@ class BlockRule(LarkRule): _children: Tuple[ IdentifierRule, Optional[Union[IdentifierRule, StringRule]], + LBRACE, BodyRule, + RBRACE, ] def __init__(self, children, meta: Optional[Meta] = None): diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index 11ac0f5e..b82abc58 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -14,13 +14,22 @@ EQ, LBRACE, COMMA, - RBRACE, LSQB, RSQB, LPAR, RPAR, DOT, + RBRACE, + LSQB, + RSQB, + LPAR, + RPAR, + DOT, ) from hcl2.rule_transformer.rules.whitespace import ( NewLineOrCommentRule, InlineCommentMixIn, ) -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class TupleRule(InlineCommentMixIn): @@ -33,7 +42,7 @@ class TupleRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], COMMA, Optional[NewLineOrCommentRule], - ... + # ... ], ExpressionRule, Optional[NewLineOrCommentRule], @@ -52,14 +61,18 @@ def elements(self) -> List[ExpressionRule]: child for child in self.children[1:-1] if isinstance(child, ExpressionRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_tuples: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_tuples and not context.inside_dollar_string: return [element.serialize(options, context) for element in self.elements] with context.modify(inside_dollar_string=True): - result = f"[{", ".join( + result = "[" + result += ", ".join( str(element.serialize(options, context)) for element in self.elements - )}]" + ) + result += "]" if not context.inside_dollar_string: result = to_dollar_string(result) @@ -81,7 +94,9 @@ def lark_name() -> str: def value(self) -> key_T: return self._children[0] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return self.value.serialize(options, context) @@ -93,7 +108,6 @@ class ObjectElemKeyExpressionRule(LarkRule): RPAR, ] - @staticmethod def lark_name() -> str: return "object_elem_key_expression" @@ -102,7 +116,9 @@ def lark_name() -> str: def expression(self) -> ExpressionRule: return self._children[1] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: with context.modify(inside_dollar_string=True): result = f"({self.expression.serialize(options, context)})" if not context.inside_dollar_string: @@ -117,8 +133,7 @@ class ObjectElemKeyDotAccessor(LarkRule): Tuple[ IdentifierRule, DOT, - ... - ] + ], ] @staticmethod @@ -129,8 +144,12 @@ def lark_name() -> str: def identifiers(self) -> List[IdentifierRule]: return [child for child in self._children if isinstance(child, IdentifierRule)] - def serialize(self, options=SerializationOptions(), context=SerializationContext()) -> Any: - return ".".join(identifier.serialize(options, context) for identifier in self.identifiers) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return ".".join( + identifier.serialize(options, context) for identifier in self.identifiers + ) class ObjectElemRule(LarkRule): @@ -153,9 +172,13 @@ def key(self) -> ObjectElemKeyRule: def expression(self): return self._children[2] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: return { - self.key.serialize(options, context): self.expression.serialize(options, context) + self.key.serialize(options, context): self.expression.serialize( + options, context + ) } @@ -169,7 +192,6 @@ class ObjectRule(InlineCommentMixIn): Optional[NewLineOrCommentRule], Optional[COMMA], Optional[NewLineOrCommentRule], - ... ], RBRACE, ] @@ -184,8 +206,10 @@ def elements(self) -> List[ObjectElemRule]: child for child in self.children[1:-1] if isinstance(child, ObjectElemRule) ] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - if not options.wrap_objects: + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + if not options.wrap_objects and not context.inside_dollar_string: result = {} for element in self.elements: result.update(element.serialize(options, context)) @@ -193,12 +217,13 @@ def serialize(self, options = SerializationOptions(), context = SerializationCon return result with context.modify(inside_dollar_string=True): - result = f"{{{", ".join( + result = "{" + result += ", ".join( f"{element.key.serialize(options, context)} = {element.expression.serialize(options,context)}" for element in self.elements - )}}}" + ) + result += "}" if not context.inside_dollar_string: result = to_dollar_string(result) - return result diff --git a/hcl2/rule_transformer/rules/expressions.py b/hcl2/rule_transformer/rules/expressions.py index d89f3b3c..0e0c9be8 100644 --- a/hcl2/rule_transformer/rules/expressions.py +++ b/hcl2/rule_transformer/rules/expressions.py @@ -16,7 +16,6 @@ from hcl2.rule_transformer.utils import ( wrap_into_parentheses, to_dollar_string, - unwrap_dollar_string, SerializationOptions, SerializationContext, ) @@ -58,7 +57,7 @@ def __init__(self, children, meta: Optional[Meta] = None): self._parentheses = True else: children = [None, *children, None] - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) @property @@ -100,7 +99,7 @@ def lark_name() -> str: return "conditional" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [2, 4, 6]) + self._insert_optionals(children, [2, 4, 6]) super().__init__(children, meta) @property @@ -118,7 +117,7 @@ def if_false(self) -> ExpressionRule: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - with context.modify(inside_dollar_string=False): + with context.modify(inside_dollar_string=True): result = ( f"{self.condition.serialize(options, context)} " f"? {self.if_true.serialize(options, context)} " @@ -144,7 +143,7 @@ def lark_name() -> str: return "binary_term" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property diff --git a/hcl2/rule_transformer/rules/for_expressions.py b/hcl2/rule_transformer/rules/for_expressions.py index 18abe6c8..3a89aba3 100644 --- a/hcl2/rule_transformer/rules/for_expressions.py +++ b/hcl2/rule_transformer/rules/for_expressions.py @@ -52,18 +52,23 @@ def lark_name() -> str: return "for_intro" def __init__(self, children, meta: Optional[Meta] = None): - # Insert null comments at positions where they might be missing - self._possibly_insert_null_second_identifier(children) - self._possibly_insert_null_comments(children, [1, 5, 7, 9, 11]) + + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_second_identifier(self, children: List[LarkRule]): - second_identifier_present = ( - len([child for child in children if isinstance(child, IdentifierRule)]) == 2 - ) - if not second_identifier_present: - children.insert(3, None) - children.insert(4, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + identifiers = [child for child in children if isinstance(child, IdentifierRule)] + second_identifier = identifiers[1] if len(identifiers) == 2 else None + + indexes = [1, 5, 7, 9, 11] + if second_identifier is None: + indexes.extend([3, 4]) + + super()._insert_optionals(children, sorted(indexes)) + + if second_identifier is not None: + children[3] = COMMA() + children[4] = second_identifier @property def first_iterator(self) -> IdentifierRule: @@ -90,7 +95,6 @@ def serialize( result += f", {self.second_iterator.serialize(options, context)}" result += f" in {self.iterable.serialize(options, context)} : " - return result @@ -108,7 +112,7 @@ def lark_name() -> str: return "for_cond" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1]) + self._insert_optionals(children, [1]) super().__init__(children, meta) @property @@ -142,13 +146,25 @@ def lark_name() -> str: return "for_tuple_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 5, 7]) - self._possibly_insert_null_condition(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_condition(self, children: List[LarkElement]): - if not len([child for child in children if isinstance(child, ForCondRule)]): - children.insert(6, None) + def _insert_optionals(self, children: List, indexes: List[int] = None): + condition = None + + for child in children: + if isinstance(child, ForCondRule): + condition = child + break + + indexes = [1, 3, 5, 7] + + if condition is None: + indexes.append(6) + + super()._insert_optionals(children, sorted(indexes)) + + children[6] = condition @property def for_intro(self) -> ForIntroRule: @@ -209,30 +225,30 @@ def lark_name() -> str: return "for_object_expr" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3, 6, 8, 10, 12]) - self._possibly_insert_null_optionals(children) + self._insert_optionals(children) super().__init__(children, meta) - def _possibly_insert_null_optionals(self, children: List[LarkElement]): - has_ellipsis = False - has_condition = False + def _insert_optionals(self, children: List, indexes: List[int] = None): + ellipsis_ = None + condition = None for child in children: - # if not has_ellipsis and isinstance(child, ELLIPSIS): - if ( - has_ellipsis is False - and child is not None - and child.lark_name() == ELLIPSIS.lark_name() - ): - has_ellipsis = True - if not has_condition and isinstance(child, ForCondRule): - has_condition = True - - if not has_ellipsis: - children.insert(9, None) - - if not has_condition: - children.insert(11, None) + if ellipsis_ is None and isinstance(child, ELLIPSIS): + ellipsis_ = child + if condition is None and isinstance(child, ForCondRule): + condition = child + + indexes = [1, 3, 6, 8, 10, 12] + + if ellipsis_ is None: + indexes.append(9) + if condition is None: + indexes.append(11) + + super()._insert_optionals(children, sorted(indexes)) + + children[9] = ellipsis_ + children[11] = condition @property def for_intro(self) -> ForIntroRule: @@ -262,6 +278,7 @@ def condition(self) -> Optional[ForCondRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: + result = "{" with context.modify(inside_dollar_string=True): result += self.for_intro.serialize(options, context) @@ -270,7 +287,6 @@ def serialize( result += self.value_expr.serialize( SerializationOptions(wrap_objects=True), context ) - if self.ellipsis is not None: result += self.ellipsis.serialize(options, context) diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index b25fed62..9e52a47b 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -4,8 +4,15 @@ from hcl2.rule_transformer.rules.expressions import ExpressionRule from hcl2.rule_transformer.rules.literal_rules import IdentifierRule from hcl2.rule_transformer.rules.tokens import COMMA, ELLIPSIS, StringToken, LPAR, RPAR -from hcl2.rule_transformer.rules.whitespace import InlineCommentMixIn, NewLineOrCommentRule -from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext, to_dollar_string +from hcl2.rule_transformer.rules.whitespace import ( + InlineCommentMixIn, + NewLineOrCommentRule, +) +from hcl2.rule_transformer.utils import ( + SerializationOptions, + SerializationContext, + to_dollar_string, +) class ArgumentsRule(InlineCommentMixIn): @@ -17,7 +24,7 @@ class ArgumentsRule(InlineCommentMixIn): COMMA, Optional[NewLineOrCommentRule], ExpressionRule, - ... + # ... ], Optional[Union[COMMA, ELLIPSIS]], Optional[NewLineOrCommentRule], @@ -39,8 +46,12 @@ def has_ellipsis(self) -> bool: def arguments(self) -> List[ExpressionRule]: return [child for child in self._children if isinstance(child, ExpressionRule)] - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ", ".join([str(argument.serialize(options, context)) for argument in self.arguments]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = ", ".join( + [str(argument.serialize(options, context)) for argument in self.arguments] + ) if self.has_ellipsis: result += " ..." return result @@ -75,30 +86,32 @@ def arguments(self) -> Optional[ArgumentsRule]: if isinstance(child, ArgumentsRule): return child - - def serialize(self, options = SerializationOptions(), context = SerializationContext()) -> Any: - result = ( - f"{"::".join(identifier.serialize(options, context) for identifier in self.identifiers)}" - f"({self.arguments.serialize(options, context) if self.arguments else ""})" + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += ( + f"({self.arguments.serialize(options, context) if self.arguments else ''})" ) + if not context.inside_dollar_string: result = to_dollar_string(result) return result -# class ProviderFunctionCallRule(FunctionCallRule): -# _children: Tuple[ -# IdentifierRule, -# IdentifierRule, -# IdentifierRule, -# LPAR, -# Optional[NewLineOrCommentRule], -# Optional[ArgumentsRule], -# Optional[NewLineOrCommentRule], -# RPAR, -# ] -# -# @staticmethod -# def lark_name() -> str: -# return "provider_function_call" +class ProviderFunctionCallRule(FunctionCallRule): + _children: Tuple[ + IdentifierRule, + IdentifierRule, + IdentifierRule, + LPAR, + Optional[NewLineOrCommentRule], + Optional[ArgumentsRule], + Optional[NewLineOrCommentRule], + RPAR, + ] + + @staticmethod + def lark_name() -> str: + return "provider_function_call" diff --git a/hcl2/rule_transformer/rules/indexing.py b/hcl2/rule_transformer/rules/indexing.py index 7a9b53a5..20decf00 100644 --- a/hcl2/rule_transformer/rules/indexing.py +++ b/hcl2/rule_transformer/rules/indexing.py @@ -67,7 +67,7 @@ def serialize( return f"[{self.index_expression.serialize(options)}]" def __init__(self, children, meta: Optional[Meta] = None): - self._possibly_insert_null_comments(children, [1, 3]) + self._insert_optionals(children, [1, 3]) super().__init__(children, meta) diff --git a/hcl2/rule_transformer/rules/tokens.py b/hcl2/rule_transformer/rules/tokens.py index 67d53fcf..ba948d3e 100644 --- a/hcl2/rule_transformer/rules/tokens.py +++ b/hcl2/rule_transformer/rules/tokens.py @@ -37,6 +37,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: class StaticStringToken(LarkToken): + + classes_by_value = {} + @classmethod @lru_cache(maxsize=None) def __build_subclass( @@ -44,7 +47,7 @@ def __build_subclass( ) -> Type["StringToken"]: """Create a subclass with a constant `lark_name`.""" - return type( # type: ignore + result = type( # type: ignore f"{name}_TOKEN", (cls,), { @@ -53,6 +56,8 @@ def __build_subclass( "_default_value": default_value, }, ) + cls.classes_by_value[default_value] = result + return result def __class_getitem__(cls, value: Tuple[str, str]) -> Type["StringToken"]: name, default_value = value @@ -72,8 +77,9 @@ def serialize_conversion(self) -> Callable[[Any], str]: STRING_CHARS = StringToken["STRING_CHARS"] ESCAPED_INTERPOLATION = StringToken["ESCAPED_INTERPOLATION"] BINARY_OP = StringToken["BINARY_OP"] -HEREDOC_TEMPLATE = STRING_CHARS["HEREDOC_TEMPLATE"] -HEREDOC_TRIM_TEMPLATE = STRING_CHARS["HEREDOC_TRIM_TEMPLATE"] +HEREDOC_TEMPLATE = StringToken["HEREDOC_TEMPLATE"] +HEREDOC_TRIM_TEMPLATE = StringToken["HEREDOC_TRIM_TEMPLATE"] +NL_OR_COMMENT = StringToken["NL_OR_COMMENT"] # static values EQ = StaticStringToken[("EQ", "=")] COLON = StaticStringToken[("COLON", ":")] diff --git a/hcl2/rule_transformer/rules/whitespace.py b/hcl2/rule_transformer/rules/whitespace.py index fa24355c..62069b78 100644 --- a/hcl2/rule_transformer/rules/whitespace.py +++ b/hcl2/rule_transformer/rules/whitespace.py @@ -3,7 +3,7 @@ from hcl2.rule_transformer.rules.abstract import LarkToken, LarkRule from hcl2.rule_transformer.rules.literal_rules import TokenRule -from hcl2.rule_transformer.utils import SerializationOptions +from hcl2.rule_transformer.utils import SerializationOptions, SerializationContext class NewLineOrCommentRule(TokenRule): @@ -15,6 +15,11 @@ def lark_name() -> str: def from_string(cls, string: str) -> "NewLineOrCommentRule": return cls([LarkToken("NL_OR_COMMENT", string)]) + def serialize( + self, options=SerializationOptions(), context=SerializationContext() + ) -> Any: + return self.token.serialize() + def to_list( self, options: SerializationOptions = SerializationOptions() ) -> Optional[List[str]]: @@ -43,7 +48,7 @@ def to_list( class InlineCommentMixIn(LarkRule, ABC): - def _possibly_insert_null_comments(self, children: List, indexes: List[int] = None): + def _insert_optionals(self, children: List, indexes: List[int] = None): for index in indexes: try: child = children[index] diff --git a/hcl2/rule_transformer/transformer.py b/hcl2/rule_transformer/transformer.py index 1ab1dfda..931eab8e 100644 --- a/hcl2/rule_transformer/transformer.py +++ b/hcl2/rule_transformer/transformer.py @@ -60,6 +60,7 @@ IntLiteral, FloatLiteral, StringToken, + StaticStringToken, ) from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule @@ -80,6 +81,8 @@ def __init__(self, discard_new_line_or_comments: bool = False): def __default_token__(self, token: Token) -> StringToken: # TODO make this return StaticStringToken where applicable + if token.value in StaticStringToken.classes_by_value.keys(): + return StaticStringToken.classes_by_value[token.value]() return StringToken[token.type](token.value) def FLOAT_LITERAL(self, token: Token) -> FloatLiteral: diff --git a/hcl2/rule_transformer/utils.py b/hcl2/rule_transformer/utils.py index 98370ca3..8f1d7352 100644 --- a/hcl2/rule_transformer/utils.py +++ b/hcl2/rule_transformer/utils.py @@ -7,7 +7,6 @@ HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S) - @dataclass class SerializationOptions: with_comments: bool = True @@ -18,11 +17,6 @@ class SerializationOptions: preserve_heredocs: bool = True -@dataclass -class DeserializationOptions: - heredocs_to_strings: bool = False - - @dataclass class SerializationContext: inside_dollar_string: bool = False From 5ccfa657f28f152ea338c03d36508e365046c6f7 Mon Sep 17 00:00:00 2001 From: Kamil Kozik Date: Fri, 12 Dec 2025 14:09:37 +0100 Subject: [PATCH 11/11] * HCLReconstructor._reconstruct_token - handle 0 length tokens * FunctionCallRule.serialize - properly serialize into dollar string * remove unused import --- hcl2/rule_transformer/reconstructor.py | 3 ++- hcl2/rule_transformer/rules/containers.py | 1 - hcl2/rule_transformer/rules/functions.py | 7 +++---- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/hcl2/rule_transformer/reconstructor.py b/hcl2/rule_transformer/reconstructor.py index 7d316b2c..6aa8a4a4 100644 --- a/hcl2/rule_transformer/reconstructor.py +++ b/hcl2/rule_transformer/reconstructor.py @@ -167,7 +167,8 @@ def _reconstruct_token(self, token: Token, parent_rule_name: str = None) -> str: result = " " + result self._last_token_name = token.type - self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") + if len(token) != 0: + self._last_was_space = result[-1].endswith(" ") or result[-1].endswith("\n") return result diff --git a/hcl2/rule_transformer/rules/containers.py b/hcl2/rule_transformer/rules/containers.py index b82abc58..a2f53436 100644 --- a/hcl2/rule_transformer/rules/containers.py +++ b/hcl2/rule_transformer/rules/containers.py @@ -1,4 +1,3 @@ -import json from typing import Tuple, List, Optional, Union, Any from hcl2.rule_transformer.rules.abstract import LarkRule diff --git a/hcl2/rule_transformer/rules/functions.py b/hcl2/rule_transformer/rules/functions.py index 9e52a47b..92cc8b11 100644 --- a/hcl2/rule_transformer/rules/functions.py +++ b/hcl2/rule_transformer/rules/functions.py @@ -89,10 +89,9 @@ def arguments(self) -> Optional[ArgumentsRule]: def serialize( self, options=SerializationOptions(), context=SerializationContext() ) -> Any: - result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" - result += ( - f"({self.arguments.serialize(options, context) if self.arguments else ''})" - ) + with context.modify(inside_dollar_string=True): + result = f"{'::'.join(identifier.serialize(options, context) for identifier in self.identifiers)}" + result += f"({self.arguments.serialize(options, context) if self.arguments else ''})" if not context.inside_dollar_string: result = to_dollar_string(result)