diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3fe61e8..8c2f05b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -3,14 +3,22 @@ name: CI on: push: branches: - - main + - main pull_request: jobs: - tests: - name: Python ${{ matrix.python-version }} - runs-on: ubuntu-24.04 + pre-commit: + name: Run pre-commit hooks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@main + - uses: actions/setup-python@main + - run: python -m pip install --upgrade pre-commit + - run: pre-commit run -a + tests: + name: Run tests using Python ${{ matrix.python-version }} + runs-on: ubuntu-latest strategy: matrix: python-version: @@ -19,18 +27,10 @@ jobs: - '3.11' - '3.12' - '3.13' - steps: - - uses: actions/checkout@v2 - - - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip setuptools wheel - python -m pip install --upgrade nox - - - name: Run tox targets for ${{ matrix.python-version }} - run: nox --session tests-${{ matrix.python-version }} + - uses: actions/checkout@main + - uses: actions/setup-python@main + with: + python-version: ${{ matrix.python-version }} + - run: python -m pip install --upgrade nox + - run: nox --session tests-${{ matrix.python-version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b0fc01..17708cc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,18 @@ repos: -- repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort -- repo: https://github.com/psf/black - rev: 23.1.0 - hooks: - - id: black -- repo: https://github.com/pycqa/flake8 - rev: 6.0.0 - hooks: - - id: flake8 + - repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + - repo: https://github.com/pycqa/flake8 + rev: 7.3.0 + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.17.1 + hooks: + - id: mypy + exclude: ^noxfile\.py$ diff --git a/djhtml/__main__.py b/djhtml/__main__.py index ffb693f..2842ac0 100644 --- a/djhtml/__main__.py +++ b/djhtml/__main__.py @@ -15,13 +15,17 @@ """ +from __future__ import annotations + import sys +from collections.abc import Iterator from pathlib import Path -from . import modes, options +from . import modes +from .options import options -def main(): +def main() -> None: changed_files = 0 unchanged_files = 0 problematic_files = 0 @@ -54,7 +58,7 @@ def main(): source = input_file.read() except Exception as e: problematic_files += 1 - _error(e) + _error(str(e)) continue # Guess tabwidth @@ -70,8 +74,9 @@ def main(): guess = probabilities.index(max(probabilities)) # Indent input file + extra_blocks = dict(options.extra_block or ()) try: - result = Mode(source, extra_blocks=options.extra_block).indent( + result = Mode(source, extra_blocks=extra_blocks).indent( options.tabwidth or guess or 4 ) except modes.MaxLineLengthExceeded: @@ -103,7 +108,7 @@ def main(): except Exception as e: changed_files -= 1 problematic_files += 1 - _error(e) + _error(str(e)) continue _info(f"reindented {output_file.name}") elif changed and filename != "-": @@ -134,7 +139,7 @@ def main(): sys.exit(0) -def _generate_filenames(paths, suffixes): +def _generate_filenames(paths: list[str], suffixes: list[str]) -> Iterator[str]: for filename in paths: if filename == "-": yield filename @@ -143,18 +148,20 @@ def _generate_filenames(paths, suffixes): if path.is_dir(): yield from _generate_filenames_from_directory(path, suffixes) else: - yield path + yield str(path) -def _generate_filenames_from_directory(directory, suffixes): +def _generate_filenames_from_directory( + directory: Path, suffixes: list[str] +) -> Iterator[str]: for path in directory.iterdir(): if path.is_file() and path.suffix in suffixes: - yield path + yield str(path) elif path.is_dir(): yield from _generate_filenames_from_directory(path, suffixes) -def _verify_changed(source, result): +def _verify_changed(source: str, result: str) -> bool: output_lines = result.split("\n") changed = False for line_nr, line in enumerate(source.split("\n")): @@ -165,7 +172,7 @@ def _verify_changed(source, result): return changed -def _get_depth(line): +def _get_depth(line: str) -> int: count = 0 for char in line: if char == " ": @@ -177,11 +184,11 @@ def _get_depth(line): return count -def _info(msg): +def _info(msg: str) -> None: print(msg, file=sys.stderr) -def _error(msg): +def _error(msg: str) -> None: _info(f"Error: {msg}") diff --git a/djhtml/lines.py b/djhtml/lines.py index 03f8153..53c0da1 100644 --- a/djhtml/lines.py +++ b/djhtml/lines.py @@ -1,10 +1,24 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .tokens import Token + + class Line: """ A single output line not including the final newline. """ - def __init__(self, tokens=None, level=0, offset=0, ignore=False): + def __init__( + self, + tokens: list[Token.BaseToken] | None = None, + level: int = 0, + offset: int = 0, + ignore: bool = False, + ) -> None: """ Lines are currently never instantiated with arguments, but that doesn't mean they can't. @@ -15,7 +29,7 @@ def __init__(self, tokens=None, level=0, offset=0, ignore=False): self.offset = offset self.ignore = ignore - def append(self, token): + def append(self, token: Token.BaseToken) -> None: """ Append token to line. @@ -23,7 +37,7 @@ def append(self, token): self.tokens.append(token) @property - def text(self): + def text(self) -> str: """ The text of this line including the original leading/trailing spaces. @@ -32,7 +46,7 @@ def text(self): return "".join([token.text for token in self.tokens]) @property - def indents(self): + def indents(self) -> bool: """ Whether this line has more opening than closing tokens. @@ -41,7 +55,7 @@ def indents(self): [token for token in self.tokens if token.dedents] ) - def indent(self, tabwidth): + def indent(self, tabwidth: int) -> str: """ The final, indented text of this line. @@ -52,7 +66,7 @@ def indent(self, tabwidth): return " " * (tabwidth * self.level + self.offset) + text return "" - def __len__(self): + def __len__(self) -> int: """ The length of the line (so far), excluding the whitespace at the beginning. Be careful calling len() because it might @@ -62,7 +76,7 @@ def __len__(self): """ return len(self.text.lstrip()) - def __repr__(self): + def __repr__(self) -> str: kwargs = "" for attr in ["level", "offset", "ignore"]: if value := getattr(self, attr): diff --git a/djhtml/modes.py b/djhtml/modes.py index 5f6238e..c3ea11a 100644 --- a/djhtml/modes.py +++ b/djhtml/modes.py @@ -1,18 +1,42 @@ +from __future__ import annotations + import re +from abc import ABC, abstractmethod +from typing import ClassVar, Sequence, TypedDict from .lines import Line from .tokens import Token -class BaseMode: +class OffsetDict(TypedDict): + relative: int + absolute: int + + +class BaseMode(ABC): """ Base class for the different modes. """ + RAW_TOKENS: ClassVar[Sequence[str]] + COMMENT_TAGS: ClassVar[Sequence[str]] MAX_LINE_LENGTH = 10_000 - def __init__(self, source=None, return_mode=None, extra_blocks=None): + offsets: OffsetDict + previous_offsets: list[OffsetDict] + + @abstractmethod + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, BaseMode]: ... + + def __init__( + self, + source: str = "", + return_mode: BaseMode | None = None, + extra_blocks: dict[str, str] | None = None, + ) -> None: """ Instantiate with source text before calling indent(), or with the return_mode when invoked from within another mode. @@ -24,13 +48,13 @@ def __init__(self, source=None, return_mode=None, extra_blocks=None): self.source = source self.return_mode = return_mode or self self.token_re = compile_re(self.RAW_TOKENS) - self.extra_blocks = dict(extra_blocks or []) + self.extra_blocks = extra_blocks or {} # To keep track of the current and previous offsets. - self.offsets = dict(relative=0, absolute=0) + self.offsets = OffsetDict(relative=0, absolute=0) self.previous_offsets = [] - def indent(self, tabwidth): + def indent(self, tabwidth: int) -> str: """ Return the indented text as a single string. @@ -39,7 +63,7 @@ def indent(self, tabwidth): self.parse() return "\n".join([line.indent(tabwidth) for line in self.lines]) - def tokenize(self): + def tokenize(self) -> None: """ Split the source text into tokens and place them on lines. @@ -91,7 +115,7 @@ def tokenize(self): # Set the new source to the old tail for the next iteration. src = tail - def parse(self): + def parse(self) -> None: """ You found the top-secret indenting algorithm! @@ -101,9 +125,9 @@ def parse(self): thereby accomodates different languages used interchangeably. """ - stack = [] + stack: list[Token.BaseToken] = [] - def mode_in_stack(mode): + def mode_in_stack(mode: type[BaseMode]) -> bool: """ Helper function to see if a token from a specific mode is in the stack. @@ -172,7 +196,7 @@ def mode_in_stack(mode): if token.text.strip(): first_token = False - def debug(self): + def debug(self) -> str: self.tokenize() self.parse() return "\n".join([repr(line) for line in self.lines]) @@ -212,10 +236,13 @@ class DjTXT(BaseMode): } OPENING_TAG = r"{%[-+]? *[#/]?(\w+).*?[-+]?%}" - def create_token(self, raw_token, src, line): + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, BaseMode]: mode = self if tag := re.match(self.OPENING_TAG, raw_token): + token: Token.BaseToken name = tag.group(1) if name in self.COMMENT_TAGS: token, mode = Token.Open(raw_token, mode=DjTXT, ignore=True), Comment( @@ -230,10 +257,10 @@ def create_token(self, raw_token, src, line): else: token = Token.Text(raw_token, mode=DjTXT, **self.offsets) elif raw_token == "{#": - token, mode = Token.Open(raw_token, mode=DjTXT, ignore=True), Comment( - "{# fmt:on #}", mode=DjTXT, return_mode=self - ) if src.startswith(" fmt:off #}") else Comment( - "#}", mode=DjTXT, return_mode=self + token, mode = Token.Open(raw_token, mode=DjTXT, ignore=True), ( + Comment("{# fmt:on #}", mode=DjTXT, return_mode=self) + if src.startswith(" fmt:off #}") + else Comment("#}", mode=DjTXT, return_mode=self) ) else: @@ -241,15 +268,15 @@ def create_token(self, raw_token, src, line): return token, mode - def _has_closing_token(self, name, raw_token, src): + def _has_closing_token(self, name: str, raw_token: str, src: str) -> bool: endtag = self.extra_blocks.get(name) if endtag: - return re.search(f"{{%[-+]? *{endtag}(?: .*?|)%}}", src) + return bool(re.search(f"{{%[-+]? *{endtag}(?: .*?|)%}}", src)) if not re.search(f"{{%[-+]? *(end_?|/){name}(?: .*?|)%}}", src): return False if regex := self.AMBIGUOUS_BLOCK_TAGS.get(name): if regex[0]: - return re.search(regex[0], raw_token) + return bool(re.search(regex[0], raw_token)) if regex[1]: return not re.search(regex[1], raw_token) return True @@ -287,16 +314,18 @@ class DjHTML(DjTXT): "wbr", ] - def create_token(self, raw_token, src, line): - mode = self + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, "BaseMode"]: + mode: BaseMode = self if raw_token == "<": if match := re.match(r"([\w\-\.:]+)(\s*)", src): tagname = match[1] following_spaces = match[2] absolute = True - token = Token.Text(raw_token, mode=DjHTML) - offsets = dict( + token: Token.BaseToken = Token.Text(raw_token, mode=DjHTML) + offsets = OffsetDict( relative=-1 if line.indents else 0, absolute=len(line) + len(tagname) + 2, ) @@ -304,7 +333,7 @@ def create_token(self, raw_token, src, line): # Use "relative" multi-line indendation instead absolute = False token.indents = True - offsets = dict(relative=0, absolute=0) + offsets = OffsetDict(relative=0, absolute=0) mode = InsideHTMLTag(tagname, line, self, absolute, offsets) else: token = Token.Text(raw_token, mode=DjHTML) @@ -345,13 +374,15 @@ class DjCSS(DjTXT): r"", ] - def create_token(self, raw_token, src, line): - mode = self + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, "BaseMode"]: + mode: BaseMode = self if raw_token in "{(": self.previous_offsets.append(self.offsets.copy()) - self.offsets = dict(relative=0, absolute=0) - token = Token.Open(raw_token, mode=DjCSS) + self.offsets = OffsetDict(relative=0, absolute=0) + token: Token.BaseToken = Token.Open(raw_token, mode=DjCSS) elif raw_token in "})": if self.previous_offsets: self.offsets = self.previous_offsets.pop() @@ -404,16 +435,23 @@ class DjJS(DjTXT): r"", ] - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__( + self, + source: str = "", + return_mode: BaseMode | None = None, + extra_blocks: dict[str, str] | None = None, + ) -> None: + super().__init__(source, return_mode, extra_blocks) self.haskell = False self.haskell_re = re.compile(r"^ *, ([$\w-]+ *=|[$\w-]+;?)") self.variable_re = re.compile(r"^ *([$\w-]+ *=|[$\w-]+;?)") self.previous_line_ended_with_comma = False self.extra_blocks = {} - def create_token(self, raw_token, src, line): - mode = self + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, "BaseMode"]: + mode: BaseMode = self persist_relative_offset = False # Reset absolute offset in almost all cases @@ -430,8 +468,8 @@ def create_token(self, raw_token, src, line): # Opening and closing tokens if raw_token in "{[(": self.previous_offsets.append(self.offsets.copy()) - self.offsets = dict(relative=0, absolute=0) - token = Token.Open(raw_token, mode=DjJS) + self.offsets = OffsetDict(relative=0, absolute=0) + token: Token.BaseToken = Token.Open(raw_token, mode=DjJS) elif raw_token in ")]}": if self.previous_offsets: self.offsets = self.previous_offsets.pop() @@ -513,14 +551,18 @@ class Comment(DjTXT): """ - def __init__(self, endtag, *, mode, return_mode): + def __init__( + self, endtag: str, *, mode: type[BaseMode], return_mode: BaseMode + ) -> None: self.endtag = endtag self.mode = mode self.return_mode = return_mode self.token_re = compile_re([r"\n", endtag]) self.extra_blocks = {} - def create_token(self, raw_token, src, line): + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, "BaseMode"]: if re.match(self.endtag, raw_token): return Token.Close(raw_token, mode=self.mode, ignore=True), self.return_mode return Token.Text(raw_token, mode=Comment, ignore=True), self @@ -534,7 +576,16 @@ class InsideHTMLTag(DjTXT): RAW_TOKENS = DjTXT.RAW_TOKENS + [r"/?>", r"[^ ='\">/\n]+=", r'"', r"'"] - def __init__(self, tagname, line, return_mode, absolute, offsets): + inside_attr: str | bool + + def __init__( + self, + tagname: str, + line: Line, + return_mode: BaseMode, + absolute: int, + offsets: OffsetDict, + ) -> None: self.tagname = tagname self.return_mode = return_mode self.absolute = absolute @@ -544,8 +595,10 @@ def __init__(self, tagname, line, return_mode, absolute, offsets): self.additional_offset = -len(tagname) - 1 if absolute else 0 self.extra_blocks = {} - def create_token(self, raw_token, src, line): - mode = self + def create_token( + self, raw_token: str, src: str, line: Line + ) -> tuple[Token.BaseToken, "BaseMode"]: + mode: BaseMode = self if not line: self.additional_offset = 0 @@ -556,14 +609,16 @@ def create_token(self, raw_token, src, line): if raw_token in ['"', "'"]: if self.inside_attr: - token = Token.Text(raw_token, mode=InsideHTMLTag, **self.offsets) + token: Token.BaseToken = Token.Text( + raw_token, mode=InsideHTMLTag, **self.offsets + ) if self.inside_attr == raw_token: self.inside_attr = False token.absolute = self.offsets["absolute"] - 1 self.offsets["absolute"] = self.previous_offset else: self.inside_attr = raw_token - self.previous_offset = self.offsets["absolute"] + self.previous_offset: int = self.offsets["absolute"] self.offsets["absolute"] += self.additional_offset token = Token.Text(raw_token, mode=InsideHTMLTag, **self.offsets) elif not self.inside_attr and raw_token == "/>": @@ -598,5 +653,5 @@ class MaxLineLengthExceeded(Exception): pass -def compile_re(raw_tokens): +def compile_re(raw_tokens: Sequence[str]) -> re.Pattern[str]: return re.compile("(" + "|".join(raw_tokens) + ")") diff --git a/djhtml/options.py b/djhtml/options.py index 831f9b0..2223e71 100644 --- a/djhtml/options.py +++ b/djhtml/options.py @@ -66,17 +66,15 @@ type=lambda x: tuple(x.split(",")), ) -# Parse arguments and assign attributes to self -self = sys.modules[__name__] -args = parser.parse_args(namespace=self) +options = parser.parse_args() -if show_version: +if options.show_version: print(version("djhtml")) sys.exit() -elif show_help or not input_filenames: +elif options.show_help or not options.input_filenames: parser.print_help() sys.exit() -elif in_place: +elif options.in_place: sys.exit( """ You have called DjHTML with the -i or --in-place argument which diff --git a/djhtml/tokens.py b/djhtml/tokens.py index 831fe7a..f9fb638 100644 --- a/djhtml/tokens.py +++ b/djhtml/tokens.py @@ -1,18 +1,31 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .modes import BaseMode + + class Token: """ Container class for token types. """ - class _Base: + class BaseToken: indents = False dedents = False ignore = False is_double = False def __init__( - self, text, *, mode, level=0, relative=0, absolute=0, ignore=False - ): + self, + text: str, + *, + mode: type["BaseMode"], + level: int = 0, + relative: int = 0, + absolute: int = 0, + ignore: bool = False, + ) -> None: """ Tokens must have a text and a mode class. The level represents the line level of opening tokens and is set @@ -28,30 +41,30 @@ def __init__( self.absolute = absolute self.ignore = ignore - def __repr__(self): + def __repr__(self) -> str: kwargs = f", mode={self.mode.__name__}" for attr in ["level", "relative", "absolute", "ignore"]: if value := getattr(self, attr): kwargs += f", {attr}={value!r}" return f"{self.__class__.__name__}({self.text!r}{kwargs})" - class Text(_Base): + class Text(BaseToken): pass - class Open(_Base): + class Open(BaseToken): indents = True - class OpenDouble(_Base): + class OpenDouble(BaseToken): indents = True is_double = True - class Close(_Base): + class Close(BaseToken): dedents = True - class CloseDouble(_Base): + class CloseDouble(BaseToken): dedents = True is_double = True - class CloseAndOpen(_Base): + class CloseAndOpen(BaseToken): indents = True dedents = True diff --git a/pyproject.toml b/pyproject.toml index fed528d..56c172e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,6 @@ [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" + +[tool.mypy] +strict = true diff --git a/tests/generate_tokens.py b/tests/generate_tokens.py index ceb6959..cf87c04 100755 --- a/tests/generate_tokens.py +++ b/tests/generate_tokens.py @@ -13,4 +13,6 @@ if filename.suffix == ".html": with open(DIR / filename) as html: with open(DIR / (filename.stem + ".tokens"), "w") as f: - f.write(DjHTML(html.read(), extra_blocks=[("weird_tag", "endweird")]).debug()) + f.write( + DjHTML(html.read(), extra_blocks={"weird_tag": "endweird"}).debug() + ) diff --git a/tests/test_suite.py b/tests/test_suite.py index ce4fa73..cab0eba 100644 --- a/tests/test_suite.py +++ b/tests/test_suite.py @@ -8,7 +8,7 @@ class TestSuite(unittest.TestCase): maxDiff = None DIR = Path(__file__).parent / "suite" - def test_available_files(self): + def test_available_files(self) -> None: """ Loop over all the files in the suite directory and compare the expected output to the actual output. @@ -19,7 +19,7 @@ def test_available_files(self): with self.subTest(filename): self._test_file(filename.stem) - def _test_file(self, basename): + def _test_file(self, basename: str) -> None: with open(self.DIR / (basename + ".html")) as f: expected_output = f.read() @@ -28,18 +28,18 @@ def _test_file(self, basename): # Indent the expected output to 0 (no indentation) unindented = DjHTML( - expected_output, extra_blocks=[("weird_tag", "endweird")] + expected_output, extra_blocks={"weird_tag": "endweird"} ).indent(0) self.assertNotEqual(unindented, expected_output) # Re-indent the unindented output to 4 actual_output = DjHTML( - unindented, extra_blocks=[("weird_tag", "endweird")] + unindented, extra_blocks={"weird_tag": "endweird"} ).indent(4) self.assertEqual(expected_output, actual_output) # Compare the tokenization actual_tokens = DjHTML( - actual_output, extra_blocks=[("weird_tag", "endweird")] + actual_output, extra_blocks={"weird_tag": "endweird"} ).debug() self.assertEqual(expected_tokens, actual_tokens)