From 3419b62bbbc857f0180796833d9a479acae38902 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Tue, 30 Sep 2025 15:35:20 +0900 Subject: [PATCH 01/48] =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB?= =?UTF-8?q?=E6=A4=9C=E7=B4=A2=E9=83=A8=E5=88=86=E3=82=92=E4=BD=9C=E6=88=90?= =?UTF-8?q?=E4=B8=AD=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .flake8 | 5 ++ .gitignore | 216 +++++++++++++++++++++++++++++++++++++++++++++ Pipfile | 18 ++++ Pipfile.lock | 172 ++++++++++++++++++++++++++++++++++++ pyproject.toml | 11 +++ src/app.py | 27 ++++++ tests/doc/link.md | 4 + tests/doc/link2.md | 5 ++ tests/test_main.py | 18 ++++ 9 files changed, 476 insertions(+) create mode 100644 .flake8 create mode 100644 .gitignore create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 pyproject.toml create mode 100644 src/app.py create mode 100644 tests/doc/link.md create mode 100644 tests/doc/link2.md create mode 100644 tests/test_main.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..7a29d36 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +exclude = .git,__pycache__,.pytest_cache,docs/source/conf.py,old,build,dist,tests,tmp,.aws-sam,.venv +max-complexity = 15 +max-line-length = 120 +show-source = True diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..64d49ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml \ No newline at end of file diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..c4dc663 --- /dev/null +++ b/Pipfile @@ -0,0 +1,18 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] + +[dev-packages] +pytest = "*" +flake8 = "*" +black = "*" + +[requires] +python_version = "3.13" + +[scripts] +lint = "flake8 -v" +format = "black ./src/ -v" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..5595ae3 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,172 @@ +{ + "_meta": { + "hash": { + "sha256": "28dd85f5c63895b14ba3dab207386e088b6084b5e429466ba9339cdeeacf7571" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.13" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": {}, + "develop": { + "black": { + "hashes": [ + "sha256:0172a012f725b792c358d57fe7b6b6e8e67375dd157f64fa7a3097b3ed3e2175", + "sha256:0474bca9a0dd1b51791fcc507a4e02078a1c63f6d4e4ae5544b9848c7adfb619", + "sha256:154b06d618233fe468236ba1f0e40823d4eb08b26f5e9261526fde34916b9140", + "sha256:1b9dc70c21ef8b43248f1d86aedd2aaf75ae110b958a7909ad8463c4aa0880b0", + "sha256:2ab0ce111ef026790e9b13bd216fa7bc48edd934ffc4cbf78808b235793cbc92", + "sha256:3bec74ee60f8dfef564b573a96b8930f7b6a538e846123d5ad77ba14a8d7a64f", + "sha256:456386fe87bad41b806d53c062e2974615825c7a52159cde7ccaeb0695fa28fa", + "sha256:474b34c1342cdc157d307b56c4c65bce916480c4a8f6551fdc6bf9b486a7c4ae", + "sha256:77e7060a00c5ec4b3367c55f39cf9b06e68965a4f2e61cecacd6d0d9b7ec945a", + "sha256:846d58e3ce7879ec1ffe816bb9df6d006cd9590515ed5d17db14e17666b2b357", + "sha256:8e46eecf65a095fa62e53245ae2795c90bdecabd53b50c448d0a8bcd0d2e74c4", + "sha256:9101ee58ddc2442199a25cb648d46ba22cd580b00ca4b44234a324e3ec7a0f7e", + "sha256:a16b14a44c1af60a210d8da28e108e13e75a284bf21a9afa6b4571f96ab8bb9d", + "sha256:aaf319612536d502fdd0e88ce52d8f1352b2c0a955cc2798f79eeca9d3af0608", + "sha256:b756fc75871cb1bcac5499552d771822fd9db5a2bb8db2a7247936ca48f39831", + "sha256:c0372a93e16b3954208417bfe448e09b0de5cc721d521866cd9e0acac3c04a1f", + "sha256:ce41ed2614b706fd55fd0b4a6909d06b5bab344ffbfadc6ef34ae50adba3d4f7", + "sha256:d119957b37cc641596063cd7db2656c5be3752ac17877017b2ffcdb9dfc4d2b1", + "sha256:e3c1f4cd5e93842774d9ee4ef6cd8d17790e65f44f7cdbaab5f2cf8ccf22a823", + "sha256:e593466de7b998374ea2585a471ba90553283fb9beefcfa430d84a2651ed5933", + "sha256:ef69351df3c84485a8beb6f7b8f9721e2009e20ef80a8d619e2d1788b7816d47", + "sha256:f96b6726d690c96c60ba682955199f8c39abc1ae0c3a494a9c62c0184049a713" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==25.9.0" + }, + "click": { + "hashes": [ + "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", + "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4" + ], + "markers": "python_version >= '3.10'", + "version": "==8.3.0" + }, + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "flake8": { + "hashes": [ + "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e", + "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==7.3.0" + }, + "iniconfig": { + "hashes": [ + "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", + "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760" + ], + "markers": "python_version >= '3.8'", + "version": "==2.1.0" + }, + "mccabe": { + "hashes": [ + "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", + "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" + ], + "markers": "python_version >= '3.6'", + "version": "==0.7.0" + }, + "mypy-extensions": { + "hashes": [ + "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", + "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558" + ], + "markers": "python_version >= '3.8'", + "version": "==1.1.0" + }, + "packaging": { + "hashes": [ + "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", + "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + ], + "markers": "python_version >= '3.8'", + "version": "==25.0" + }, + "pathspec": { + "hashes": [ + "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", + "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.1" + }, + "platformdirs": { + "hashes": [ + "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", + "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf" + ], + "markers": "python_version >= '3.9'", + "version": "==4.4.0" + }, + "pluggy": { + "hashes": [ + "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", + "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746" + ], + "markers": "python_version >= '3.9'", + "version": "==1.6.0" + }, + "pycodestyle": { + "hashes": [ + "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783", + "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" + ], + "markers": "python_version >= '3.9'", + "version": "==2.14.0" + }, + "pyflakes": { + "hashes": [ + "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58", + "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f" + ], + "markers": "python_version >= '3.9'", + "version": "==3.4.0" + }, + "pygments": { + "hashes": [ + "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", + "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" + ], + "markers": "python_version >= '3.8'", + "version": "==2.19.2" + }, + "pytest": { + "hashes": [ + "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", + "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==8.4.2" + }, + "pytokens": { + "hashes": [ + "sha256:c9a4bfa0be1d26aebce03e6884ba454e842f186a59ea43a6d3b25af58223c044", + "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.1.10" + } + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6ee0b04 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.black] +exclude = ''' +/( + tmp + | .aws-sam +)/ +''' + +[tool.pytest.ini_options] +pythonpath = "src" +testpaths = ["tests"] diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..3536f3f --- /dev/null +++ b/src/app.py @@ -0,0 +1,27 @@ +import argparse +from pathlib import Path + + +def lookup_file(path: str, filter="*.md"): + p = Path(path) + files = [str(item) for item in p.rglob(filter)] + return files + + +def create_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("src") + return parser + + +def main(args=None): + parser = create_parser() + parsed_args = parser.parse_args(args) + files = lookup_file(parsed_args.src) + print(files) + print(parsed_args.src) + return files + + +if __name__ == "__main__": + main() diff --git a/tests/doc/link.md b/tests/doc/link.md new file mode 100644 index 0000000..44ff416 --- /dev/null +++ b/tests/doc/link.md @@ -0,0 +1,4 @@ +# TestDocument +## Require + +[example.com](https://example.com) \ No newline at end of file diff --git a/tests/doc/link2.md b/tests/doc/link2.md new file mode 100644 index 0000000..6a07638 --- /dev/null +++ b/tests/doc/link2.md @@ -0,0 +1,5 @@ +# TestDocument2 +## Require + +[example.com](https://example.com) +[example.jp](https://example.jp) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..a9c9109 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,18 @@ +import app +import pytest +from pprint import pprint as pp + + +@pytest.mark.parametrize( + ["path"], [pytest.param(".\\tests\\doc\\"), pytest.param("tests/doc/")] +) +def test_check(path: str): + files = app.lookup_file(path) + + pp(files) + + assert len(files) == 2 + + +def test_main(): + res = app.main(["tests/doc/"]) From d0aa7792cc9ba6f21060f03190f311c43839baad Mon Sep 17 00:00:00 2001 From: DogFortune Date: Tue, 30 Sep 2025 17:16:23 +0900 Subject: [PATCH 02/48] =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB?= =?UTF-8?q?=E3=81=8B=E3=82=89=E3=83=AA=E3=83=B3=E3=82=AF=E3=82=92=E6=8A=BD?= =?UTF-8?q?=E5=87=BA=E3=81=99=E3=82=8B=E3=81=A8=E3=81=93=E3=82=8D=E3=81=BE?= =?UTF-8?q?=E3=81=A7=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 18 ++++++++++++++++-- tests/test_main.py | 9 +++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 3536f3f..941f45f 100644 --- a/src/app.py +++ b/src/app.py @@ -1,8 +1,24 @@ import argparse from pathlib import Path +import re + + +def extract_link(file_path: str): + links = [] + # 指定したファイルからリンクを抽出します。重複はこの時点で除外しますが、ファイルをまたいだリンクの重複チェックはしない。 + # 欲しいのはファイル名と行数とリンク + + with open(file_path, "r") as f: + lines = f.readlines() + for i, line in enumerate(lines): + print(f"{i+1}: {line}") + if "http" in line: + links.append({"line": i + 1, "link": line}) + return links def lookup_file(path: str, filter="*.md"): + # 指定したディレクトリから検査対象のファイルを抽出します。デフォルトはmdです。 p = Path(path) files = [str(item) for item in p.rglob(filter)] return files @@ -18,8 +34,6 @@ def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) files = lookup_file(parsed_args.src) - print(files) - print(parsed_args.src) return files diff --git a/tests/test_main.py b/tests/test_main.py index a9c9109..bdcd386 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,6 +3,15 @@ from pprint import pprint as pp +def test_extract_link(): + file_list = app.lookup_file("tests/doc/") + links = app.extract_link(file_list[0]) + assert len(links) == 1 + + links = app.extract_link(file_list[1]) + assert len(links) == 2 + + @pytest.mark.parametrize( ["path"], [pytest.param(".\\tests\\doc\\"), pytest.param("tests/doc/")] ) From b2b3b23881abe6f01761d2d705bd887f97ea3bab Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 11:18:21 +0900 Subject: [PATCH 03/48] =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=AF=E3=83=81?= =?UTF-8?q?=E3=82=A7=E3=83=83=E3=82=AF=E6=A9=9F=E6=A7=8B=E3=82=92=E5=AE=9F?= =?UTF-8?q?=E8=A3=85=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.yml | 6 ++++++ src/app.py | 15 ++++++++++++--- tests/test_main.py | 22 +++++++++++++++++++--- 3 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8fd1943 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,6 @@ +services: + app: + image: kennethreitz/httpbin:latest + restart: always + ports: + - 8000:80 \ No newline at end of file diff --git a/src/app.py b/src/app.py index 941f45f..3c766c3 100644 --- a/src/app.py +++ b/src/app.py @@ -1,17 +1,26 @@ import argparse from pathlib import Path -import re +from urllib.request import urlopen +from urllib.error import HTTPError, URLError + + +def check_link(url: str): + try: + res = urlopen(url, timeout=5) + return {"result": True, "code": res.code, "url": res.url} + except HTTPError as e: + return {"result": False, "code": e.code, "url": e.url} + except URLError as e: + return {"result": False, "code": e.code, "url": e.url} def extract_link(file_path: str): links = [] # 指定したファイルからリンクを抽出します。重複はこの時点で除外しますが、ファイルをまたいだリンクの重複チェックはしない。 # 欲しいのはファイル名と行数とリンク - with open(file_path, "r") as f: lines = f.readlines() for i, line in enumerate(lines): - print(f"{i+1}: {line}") if "http" in line: links.append({"line": i + 1, "link": line}) return links diff --git a/tests/test_main.py b/tests/test_main.py index bdcd386..62797c0 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,7 +3,26 @@ from pprint import pprint as pp +@pytest.mark.parametrize( + ["url", "expected_result", "expected_status_code"], + [ + pytest.param("http://127.0.0.1:8000/status/200", True, 200), + pytest.param("http://127.0.0.1:8000/status/404", False, 404), + pytest.param("http://127.0.0.1:8000/status/500", False, 500), + ], +) +def test_check_link(url: str, expected_result: bool, expected_status_code: int): + res = app.check_link(url) + + assert type(res) is dict + assert res["result"] == expected_result + assert res["code"] == expected_status_code + + def test_extract_link(): + # ファイルからリンクを抽出するテスト。主にリンクの数、重複したリンクがない事を保証します。 + + # TODO: 重複チェックはまだできていない。 file_list = app.lookup_file("tests/doc/") links = app.extract_link(file_list[0]) assert len(links) == 1 @@ -17,9 +36,6 @@ def test_extract_link(): ) def test_check(path: str): files = app.lookup_file(path) - - pp(files) - assert len(files) == 2 From c7b4e61a65f12db0a9628a3c0f1f79350232c463 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 11:50:04 +0900 Subject: [PATCH 04/48] =?UTF-8?q?=E8=A1=A8=E7=A4=BA=E3=82=AA=E3=83=97?= =?UTF-8?q?=E3=82=B7=E3=83=A7=E3=83=B3=E3=81=AE=E8=A8=AD=E5=AE=9A=20refs?= =?UTF-8?q?=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 3 +++ tests/test_main.py | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/app.py b/src/app.py index 3c766c3..7bb8e5a 100644 --- a/src/app.py +++ b/src/app.py @@ -36,6 +36,9 @@ def lookup_file(path: str, filter="*.md"): def create_parser(): parser = argparse.ArgumentParser() parser.add_argument("src") + group = parser.add_mutually_exclusive_group() + group.add_argument("--verbose", action="store_true", help="Increase verbosity") + group.add_argument("--quiet", action="store_true", help="Decrease verbosity") return parser diff --git a/tests/test_main.py b/tests/test_main.py index 62797c0..dcd2d52 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -20,8 +20,7 @@ def test_check_link(url: str, expected_result: bool, expected_status_code: int): def test_extract_link(): - # ファイルからリンクを抽出するテスト。主にリンクの数、重複したリンクがない事を保証します。 - + # ファイルからリンクを抽出するテスト。リンクの数、重複したリンクがない事をテストします。 # TODO: 重複チェックはまだできていない。 file_list = app.lookup_file("tests/doc/") links = app.extract_link(file_list[0]) From fee2b6efa467e188b4b69361204032e240743a40 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 12:47:06 +0900 Subject: [PATCH 05/48] =?UTF-8?q?url=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E3=81=8C=E6=8A=9C=E3=81=91=E3=81=A6=E3=81=84=E3=81=9F=E3=81=AE?= =?UTF-8?q?=E3=81=A7=E8=BF=BD=E5=8A=A0=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_main.py b/tests/test_main.py index dcd2d52..78d0ace 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -17,6 +17,7 @@ def test_check_link(url: str, expected_result: bool, expected_status_code: int): assert type(res) is dict assert res["result"] == expected_result assert res["code"] == expected_status_code + assert res["url"] == url def test_extract_link(): From 5b9767a968c3ca81de64159afda4672f9e3d2991 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 13:00:05 +0900 Subject: [PATCH 06/48] add github actions --- .github/workflows/lint-test-format.yml | 77 ++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 .github/workflows/lint-test-format.yml diff --git a/.github/workflows/lint-test-format.yml b/.github/workflows/lint-test-format.yml new file mode 100644 index 0000000..064de96 --- /dev/null +++ b/.github/workflows/lint-test-format.yml @@ -0,0 +1,77 @@ +name: test-lint-format + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main +permissions: + checks: write + pull-requests: write +jobs: + lint: + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pipenv" + - name: install pipenv + run: | + python -m pip install --upgrade pip + python -m pip install pipenv + - name: install packages + run: pipenv sync --dev + - name: run lint + run: pipenv run lint + format: + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pipenv" + - name: install pipenv + run: | + python -m pip install --upgrade pip + python -m pip install pipenv + - name: install packages + run: pipenv sync --dev + - name: run format + run: pipenv run format + test: + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pipenv" + - name: install pipenv + run: | + python -m pip install --upgrade pip + python -m pip install pipenv + - name: install packages + run: pipenv sync --dev + - name: Cache Docker images. + uses: ScribeMD/docker-cache@0.5.0 + with: + key: ${{ hashFiles('docker-compose.yml') }} + - name: start docker compose + run: docker compose up -d + - name: run test + run: pipenv run pytest -q --junit-xml pytest.xml + continue-on-error: true + - name: Publish Test Report + uses: mikepenz/action-junit-report@v5 + if: success() || failure() + with: + report_paths: "pytest.xml" From 712ee19842fca84f4af5197998845f3b7587b865 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 13:07:05 +0900 Subject: [PATCH 07/48] fix --- pyproject.toml | 8 -------- tests/test_main.py | 5 +---- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6ee0b04..2fe857e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,3 @@ -[tool.black] -exclude = ''' -/( - tmp - | .aws-sam -)/ -''' - [tool.pytest.ini_options] pythonpath = "src" testpaths = ["tests"] diff --git a/tests/test_main.py b/tests/test_main.py index 78d0ace..ccfb593 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,5 @@ import app import pytest -from pprint import pprint as pp @pytest.mark.parametrize( @@ -31,9 +30,7 @@ def test_extract_link(): assert len(links) == 2 -@pytest.mark.parametrize( - ["path"], [pytest.param(".\\tests\\doc\\"), pytest.param("tests/doc/")] -) +@pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) def test_check(path: str): files = app.lookup_file(path) assert len(files) == 2 From 86bb3fcfa54df12ba5c6fb54e38be15562c462d0 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 13:20:08 +0900 Subject: [PATCH 08/48] =?UTF-8?q?Docker=E3=82=A4=E3=83=A1=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=82=AD=E3=83=A3=E3=83=83=E3=82=B7=E3=83=A5=E3=81=AE?= =?UTF-8?q?Action=E3=82=92=E5=A4=89=E6=9B=B4=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit キャッシュ問題がまだマージされていない --- .github/workflows/lint-test-format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint-test-format.yml b/.github/workflows/lint-test-format.yml index 064de96..66f50ce 100644 --- a/.github/workflows/lint-test-format.yml +++ b/.github/workflows/lint-test-format.yml @@ -62,7 +62,7 @@ jobs: - name: install packages run: pipenv sync --dev - name: Cache Docker images. - uses: ScribeMD/docker-cache@0.5.0 + uses: AndreKurait/docker-cache@0.6.0 with: key: ${{ hashFiles('docker-compose.yml') }} - name: start docker compose From 86bec68d0b3eb8baa07d72827b8462c37d5ee3ef Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 16:13:39 +0900 Subject: [PATCH 09/48] =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB?= =?UTF-8?q?=E3=83=AA=E3=82=B9=E3=83=88=E3=81=8B=E3=82=89=E3=83=AA=E3=83=B3?= =?UTF-8?q?=E3=82=AF=E3=82=92=E6=8A=BD=E5=87=BA=E3=81=99=E3=82=8B=E9=83=A8?= =?UTF-8?q?=E5=88=86=E3=82=92=E6=94=B9=E4=BF=AE=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 検索したファイルリストを引数として渡すようにし、ファイルをまたいだ重複チェックもまとめてやる方針で実装中 重複チェックはまだ --- src/app.py | 16 +++++++++------- tests/doc/link2.md | 2 ++ tests/test_main.py | 33 ++++++++++++++++++++++----------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/app.py b/src/app.py index 7bb8e5a..c70931a 100644 --- a/src/app.py +++ b/src/app.py @@ -14,15 +14,17 @@ def check_link(url: str): return {"result": False, "code": e.code, "url": e.url} -def extract_link(file_path: str): +def extract_link(files: list): links = [] - # 指定したファイルからリンクを抽出します。重複はこの時点で除外しますが、ファイルをまたいだリンクの重複チェックはしない。 # 欲しいのはファイル名と行数とリンク - with open(file_path, "r") as f: - lines = f.readlines() - for i, line in enumerate(lines): - if "http" in line: - links.append({"line": i + 1, "link": line}) + for file_path in files: + with open(file_path, "r") as f: + lines = f.readlines() + item = {"filePath": file_path, "data": []} + for i, line in enumerate(lines): + if "http" in line: + item["data"].append({"line": i + 1, "link": line}) + links.append(item) return links diff --git a/tests/doc/link2.md b/tests/doc/link2.md index 6a07638..fac6cd7 100644 --- a/tests/doc/link2.md +++ b/tests/doc/link2.md @@ -2,4 +2,6 @@ ## Require [example.com](https://example.com) +[example.com](https://example.com) +[example.com](http://example.com) [example.jp](https://example.jp) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py index ccfb593..6e331ed 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,5 +1,27 @@ import app import pytest +from pprint import pprint as pp + + +class TestExtractLink: + def test_extract_link(self): + # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 + # 重複リンクにはフラグをつける。 + files = app.lookup_file("tests/doc/") + links = app.extract_link(files) + + assert len(links) == 2 + + item = links[0] + + assert "filePath" in item + assert "data" in item + assert type(item["data"]) is list + + data = item["data"][0] + + assert "line" in data + assert "link" in data @pytest.mark.parametrize( @@ -19,17 +41,6 @@ def test_check_link(url: str, expected_result: bool, expected_status_code: int): assert res["url"] == url -def test_extract_link(): - # ファイルからリンクを抽出するテスト。リンクの数、重複したリンクがない事をテストします。 - # TODO: 重複チェックはまだできていない。 - file_list = app.lookup_file("tests/doc/") - links = app.extract_link(file_list[0]) - assert len(links) == 1 - - links = app.extract_link(file_list[1]) - assert len(links) == 2 - - @pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) def test_check(path: str): files = app.lookup_file(path) From 301db457bdbad58eb10e9343ba3fa54c2e1027d1 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 17:18:08 +0900 Subject: [PATCH 10/48] =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=AF=E3=81=AE?= =?UTF-8?q?=E6=8A=BD=E5=87=BA=E3=81=A8=E9=87=8D=E8=A4=87=E3=83=81=E3=82=A7?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=92=E5=90=8C=E6=99=82=E3=81=AB=E5=AE=9F?= =?UTF-8?q?=E6=96=BD=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 20 ++++++++++++++++---- tests/doc/{link.md => doc1.md} | 0 tests/doc/{link2.md => doc2.md} | 0 tests/test_main.py | 19 +++++++++---------- 4 files changed, 25 insertions(+), 14 deletions(-) rename tests/doc/{link.md => doc1.md} (100%) rename tests/doc/{link2.md => doc2.md} (100%) diff --git a/src/app.py b/src/app.py index c70931a..c19411a 100644 --- a/src/app.py +++ b/src/app.py @@ -15,16 +15,28 @@ def check_link(url: str): def extract_link(files: list): - links = [] + links = {} + seen_urls = set() # 欲しいのはファイル名と行数とリンク for file_path in files: with open(file_path, "r") as f: lines = f.readlines() - item = {"filePath": file_path, "data": []} + links[f"{file_path}"] = [] for i, line in enumerate(lines): if "http" in line: - item["data"].append({"line": i + 1, "link": line}) - links.append(item) + url = ( + line.split("](")[1].rstrip(")\n") + if "](" in line + else line.strip() + ) + if url in seen_urls: + duplicate = True + else: + duplicate = False + seen_urls.add(url) + links[f"{file_path}"].append( + {"line": i + 1, "link": line, "duplicate": duplicate} + ) return links diff --git a/tests/doc/link.md b/tests/doc/doc1.md similarity index 100% rename from tests/doc/link.md rename to tests/doc/doc1.md diff --git a/tests/doc/link2.md b/tests/doc/doc2.md similarity index 100% rename from tests/doc/link2.md rename to tests/doc/doc2.md diff --git a/tests/test_main.py b/tests/test_main.py index 6e331ed..9a19248 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -6,22 +6,21 @@ class TestExtractLink: def test_extract_link(self): # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 - # 重複リンクにはフラグをつける。 + # 重複リンクにはフラグをつける。2つ目移行はFalseになるのでTrueのものだけリンクチェックすればOK files = app.lookup_file("tests/doc/") links = app.extract_link(files) assert len(links) == 2 - item = links[0] + doc1_result = [ + item for key, value in links.items() if "doc1.md" in key for item in value + ] + doc2_result = [ + item for key, value in links.items() if "doc2.md" in key for item in value + ] - assert "filePath" in item - assert "data" in item - assert type(item["data"]) is list - - data = item["data"][0] - - assert "line" in data - assert "link" in data + assert len(doc1_result) == 1 + assert len(doc2_result) == 4 @pytest.mark.parametrize( From c7f36835d4d2d255e3a87024074c8ff09286ed38 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 17:48:07 +0900 Subject: [PATCH 11/48] =?UTF-8?q?=E4=BE=8B=E5=A4=96=E5=91=A8=E3=82=8A?= =?UTF-8?q?=E3=81=AE=E6=94=B9=E4=BF=AE=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主にメッセージ周りの改修 --- src/app.py | 6 ++++-- tests/test_main.py | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/app.py b/src/app.py index c19411a..d63d54f 100644 --- a/src/app.py +++ b/src/app.py @@ -9,9 +9,11 @@ def check_link(url: str): res = urlopen(url, timeout=5) return {"result": True, "code": res.code, "url": res.url} except HTTPError as e: - return {"result": False, "code": e.code, "url": e.url} + # アクセスできて400や500系が来た時はこっち + return {"result": False, "code": e.code, "url": url, "reason": e.reason} except URLError as e: - return {"result": False, "code": e.code, "url": e.url} + # そもそもアクセスすらできなかった場合はこっち + return {"result": False, "url": url, "reason": e.reason} def extract_link(files: list): diff --git a/tests/test_main.py b/tests/test_main.py index 9a19248..48b3764 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -36,8 +36,10 @@ def test_check_link(url: str, expected_result: bool, expected_status_code: int): assert type(res) is dict assert res["result"] == expected_result - assert res["code"] == expected_status_code - assert res["url"] == url + if "code" in res: + assert res["code"] == expected_status_code + if "url" in res: + assert res["url"] == url @pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) From fc55de79af69aaf7b74d9f6d876f03b4e4d21b96 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 1 Oct 2025 17:55:27 +0900 Subject: [PATCH 12/48] =?UTF-8?q?URLError=E3=81=AE=E3=82=B1=E3=83=BC?= =?UTF-8?q?=E3=82=B9=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ステータスコードは取れないのでNoneとし、reasonを追加 --- src/app.py | 2 +- tests/test_main.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/app.py b/src/app.py index d63d54f..b9b0c23 100644 --- a/src/app.py +++ b/src/app.py @@ -13,7 +13,7 @@ def check_link(url: str): return {"result": False, "code": e.code, "url": url, "reason": e.reason} except URLError as e: # そもそもアクセスすらできなかった場合はこっち - return {"result": False, "url": url, "reason": e.reason} + return {"result": False, "code": None, "url": url, "reason": e.reason} def extract_link(files: list): diff --git a/tests/test_main.py b/tests/test_main.py index 48b3764..5794c15 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -29,6 +29,7 @@ def test_extract_link(self): pytest.param("http://127.0.0.1:8000/status/200", True, 200), pytest.param("http://127.0.0.1:8000/status/404", False, 404), pytest.param("http://127.0.0.1:8000/status/500", False, 500), + pytest.param("http://127.0.0.1:800", False, None), ], ) def test_check_link(url: str, expected_result: bool, expected_status_code: int): @@ -36,10 +37,8 @@ def test_check_link(url: str, expected_result: bool, expected_status_code: int): assert type(res) is dict assert res["result"] == expected_result - if "code" in res: - assert res["code"] == expected_status_code - if "url" in res: - assert res["url"] == url + assert res["code"] == expected_status_code + assert res["url"] == url @pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) From fe28d58a2a69652f9a9c274784fb7cee584005af Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 2 Oct 2025 17:54:03 +0900 Subject: [PATCH 13/48] =?UTF-8?q?=E8=A1=8C=E3=81=AE=E8=AA=AD=E3=81=BF?= =?UTF-8?q?=E8=BE=BC=E3=81=BF=E6=96=B9=E5=BC=8F=E3=81=AE=E6=94=B9=E4=BF=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 改行コードが含まれない方式でreadするように修正 それに伴い改修コードを除去する処理も削除 --- src/app.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/app.py b/src/app.py index b9b0c23..8c14c55 100644 --- a/src/app.py +++ b/src/app.py @@ -17,20 +17,18 @@ def check_link(url: str): def extract_link(files: list): + # 各ファイルからリンクを抽出します。 + # 重複しているリンクはフラグがTrueになります。 + # チェックすべきなのはこのフラフが links = {} seen_urls = set() - # 欲しいのはファイル名と行数とリンク for file_path in files: with open(file_path, "r") as f: - lines = f.readlines() + lines = f.read().splitlines() links[f"{file_path}"] = [] for i, line in enumerate(lines): if "http" in line: - url = ( - line.split("](")[1].rstrip(")\n") - if "](" in line - else line.strip() - ) + url = line.split("](")[1].rstrip(")") if url in seen_urls: duplicate = True else: From 202c423cc065dc148441b9d3f289b4ad00acc541 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 2 Oct 2025 17:57:58 +0900 Subject: [PATCH 14/48] =?UTF-8?q?=E6=AD=A3=E3=81=97=E3=81=8F=E9=87=8D?= =?UTF-8?q?=E8=A4=87=E5=88=A4=E5=AE=9A=E3=81=8C=E3=81=A7=E3=81=8D=E3=81=A6?= =?UTF-8?q?=E3=81=84=E3=82=8B=E3=81=8B=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 2 +- tests/test_main.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 8c14c55..56e5bdd 100644 --- a/src/app.py +++ b/src/app.py @@ -35,7 +35,7 @@ def extract_link(files: list): duplicate = False seen_urls.add(url) links[f"{file_path}"].append( - {"line": i + 1, "link": line, "duplicate": duplicate} + {"line": i + 1, "link": url, "duplicate": duplicate} ) return links diff --git a/tests/test_main.py b/tests/test_main.py index 5794c15..b8022e3 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -22,6 +22,12 @@ def test_extract_link(self): assert len(doc1_result) == 1 assert len(doc2_result) == 4 + # ちゃんと重複判定の数が正しいか、重複と見なしたリンクは想定しているものか + duplicated_link_list = [item for item in doc2_result if item["duplicate"]] + assert len(duplicated_link_list) == 2 + assert duplicated_link_list[0]["link"] == duplicated_link_list[1]["link"] + assert duplicated_link_list[0]["link"] == "https://example.com" + @pytest.mark.parametrize( ["url", "expected_result", "expected_status_code"], @@ -33,6 +39,9 @@ def test_extract_link(self): ], ) def test_check_link(url: str, expected_result: bool, expected_status_code: int): + # アクセスチェックした時に想定しているリクエストが返ってくる事。 + # 200系だけTrueで、それ以外はFalseで返ってくる事。 + # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 res = app.check_link(url) assert type(res) is dict From cf846ade1be584625804279c1f53b41b6277f7d2 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Fri, 3 Oct 2025 10:23:53 +0900 Subject: [PATCH 15/48] =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=AF=E3=83=AA?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=82=92=E4=BD=BF=E3=81=A3=E3=81=A6=E3=81=BE?= =?UTF-8?q?=E3=81=A8=E3=82=81=E3=81=A6=E3=83=AA=E3=83=B3=E3=82=AF=E3=82=92?= =?UTF-8?q?=E3=83=81=E3=82=A7=E3=83=83=E3=82=AF=E3=81=99=E3=82=8B=E9=83=A8?= =?UTF-8?q?=E5=88=86=E3=82=92=E5=AE=9F=E8=A3=85=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 29 +++++++++++++++++++++++++---- tests/test_main.py | 21 +++++++++++++++++---- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/app.py b/src/app.py index 56e5bdd..d3557a4 100644 --- a/src/app.py +++ b/src/app.py @@ -4,7 +4,7 @@ from urllib.error import HTTPError, URLError -def check_link(url: str): +def request(url: str): try: res = urlopen(url, timeout=5) return {"result": True, "code": res.code, "url": res.url} @@ -16,10 +16,30 @@ def check_link(url: str): return {"result": False, "code": None, "url": url, "reason": e.reason} -def extract_link(files: list): +def check_links(links: dict) -> list: + # リンクをチェックします。 + # チェックすべきなのはFalseのものだけ。 + results = [] + for file_path, link_items in links.items(): + for item in link_items: + if not item["duplicate"]: + res = request(item["link"]) + data = { + "file": file_path, + "line": item["line"], + "link": item["link"], + "result": res["result"], + "code": res["code"], + } + if "reason" in res: + data["reason"] = res["reason"] + results.append(data) + return results + + +def extract_link(files: list) -> dict: # 各ファイルからリンクを抽出します。 # 重複しているリンクはフラグがTrueになります。 - # チェックすべきなのはこのフラフが links = {} seen_urls = set() for file_path in files: @@ -60,7 +80,8 @@ def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) files = lookup_file(parsed_args.src) - return files + links = extract_link(files) + result = check_links(links) if __name__ == "__main__": diff --git a/tests/test_main.py b/tests/test_main.py index b8022e3..5dc3751 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,10 +3,19 @@ from pprint import pprint as pp +class TestCheckLins: + def test_check_links(self): + files = app.lookup_file("tests/doc/") + links = app.extract_link(files) + result = app.check_links(links) + + pp(result) + + class TestExtractLink: def test_extract_link(self): # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 - # 重複リンクにはフラグをつける。2つ目移行はFalseになるのでTrueのものだけリンクチェックすればOK + # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK files = app.lookup_file("tests/doc/") links = app.extract_link(files) @@ -38,16 +47,20 @@ def test_extract_link(self): pytest.param("http://127.0.0.1:800", False, None), ], ) -def test_check_link(url: str, expected_result: bool, expected_status_code: int): +def test_request(url: str, expected_result: bool, expected_status_code: int): # アクセスチェックした時に想定しているリクエストが返ってくる事。 # 200系だけTrueで、それ以外はFalseで返ってくる事。 # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 - res = app.check_link(url) + res = app.request(url) + + pp(res) assert type(res) is dict assert res["result"] == expected_result assert res["code"] == expected_status_code assert res["url"] == url + if not res["result"]: + assert "reason" in res @pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) @@ -57,4 +70,4 @@ def test_check(path: str): def test_main(): - res = app.main(["tests/doc/"]) + app.main(["tests/doc/"]) From 4066281fa242b71be61e06df745a34162c0866c1 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Fri, 3 Oct 2025 14:24:28 +0900 Subject: [PATCH 16/48] link -> url refs #1 --- src/app.py | 6 +++--- tests/test_main.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/app.py b/src/app.py index d3557a4..6835a8e 100644 --- a/src/app.py +++ b/src/app.py @@ -23,11 +23,11 @@ def check_links(links: dict) -> list: for file_path, link_items in links.items(): for item in link_items: if not item["duplicate"]: - res = request(item["link"]) + res = request(item["url"]) data = { "file": file_path, "line": item["line"], - "link": item["link"], + "url": item["url"], "result": res["result"], "code": res["code"], } @@ -55,7 +55,7 @@ def extract_link(files: list) -> dict: duplicate = False seen_urls.add(url) links[f"{file_path}"].append( - {"line": i + 1, "link": url, "duplicate": duplicate} + {"line": i + 1, "url": url, "duplicate": duplicate} ) return links diff --git a/tests/test_main.py b/tests/test_main.py index 5dc3751..a4490f7 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -15,6 +15,8 @@ def test_check_links(self): class TestExtractLink: def test_extract_link(self): # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 + # データ構造としてはdictのKeyにファイルのパス、Valueにリンクに関する情報が入っている。 + # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK files = app.lookup_file("tests/doc/") links = app.extract_link(files) @@ -34,8 +36,8 @@ def test_extract_link(self): # ちゃんと重複判定の数が正しいか、重複と見なしたリンクは想定しているものか duplicated_link_list = [item for item in doc2_result if item["duplicate"]] assert len(duplicated_link_list) == 2 - assert duplicated_link_list[0]["link"] == duplicated_link_list[1]["link"] - assert duplicated_link_list[0]["link"] == "https://example.com" + assert duplicated_link_list[0]["url"] == duplicated_link_list[1]["url"] + assert duplicated_link_list[0]["url"] == "https://example.com" @pytest.mark.parametrize( From 648be817e85146b10d0a900be95548a9616c0b23 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Fri, 3 Oct 2025 14:27:24 +0900 Subject: [PATCH 17/48] =?UTF-8?q?=E7=B5=90=E6=9E=9C=E3=82=92bool=E3=81=8B?= =?UTF-8?q?=E3=82=89StrEnum=E3=81=AB=E5=A4=89=E6=9B=B4=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit レポート表示しやすいように。かつ入力しやすいように --- src/app.py | 7 ++++--- src/enums.py | 6 ++++++ tests/test_main.py | 10 +++++----- 3 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 src/enums.py diff --git a/src/app.py b/src/app.py index 6835a8e..b8a251b 100644 --- a/src/app.py +++ b/src/app.py @@ -2,18 +2,19 @@ from pathlib import Path from urllib.request import urlopen from urllib.error import HTTPError, URLError +from enums import Result def request(url: str): try: res = urlopen(url, timeout=5) - return {"result": True, "code": res.code, "url": res.url} + return {"result": Result.OK, "code": res.code, "url": res.url} except HTTPError as e: # アクセスできて400や500系が来た時はこっち - return {"result": False, "code": e.code, "url": url, "reason": e.reason} + return {"result": Result.NG, "code": e.code, "url": url, "reason": e.reason} except URLError as e: # そもそもアクセスすらできなかった場合はこっち - return {"result": False, "code": None, "url": url, "reason": e.reason} + return {"result": Result.NG, "code": None, "url": url, "reason": e.reason} def check_links(links: dict) -> list: diff --git a/src/enums.py b/src/enums.py new file mode 100644 index 0000000..2a24171 --- /dev/null +++ b/src/enums.py @@ -0,0 +1,6 @@ +from enum import StrEnum + + +class Result(StrEnum): + OK = "OK" + NG = "NG" diff --git a/tests/test_main.py b/tests/test_main.py index a4490f7..cd15c0f 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -43,13 +43,13 @@ def test_extract_link(self): @pytest.mark.parametrize( ["url", "expected_result", "expected_status_code"], [ - pytest.param("http://127.0.0.1:8000/status/200", True, 200), - pytest.param("http://127.0.0.1:8000/status/404", False, 404), - pytest.param("http://127.0.0.1:8000/status/500", False, 500), - pytest.param("http://127.0.0.1:800", False, None), + pytest.param("http://127.0.0.1:8000/status/200", "OK", 200), + pytest.param("http://127.0.0.1:8000/status/404", "NG", 404), + pytest.param("http://127.0.0.1:8000/status/500", "NG", 500), + pytest.param("http://127.0.0.1:800", "NG", None), ], ) -def test_request(url: str, expected_result: bool, expected_status_code: int): +def test_request(url: str, expected_result: str, expected_status_code: int): # アクセスチェックした時に想定しているリクエストが返ってくる事。 # 200系だけTrueで、それ以外はFalseで返ってくる事。 # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 From e0575680e200c6d4195bc16f9611858b415eface Mon Sep 17 00:00:00 2001 From: DogFortune Date: Fri, 10 Oct 2025 13:30:05 +0900 Subject: [PATCH 18/48] =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=AF=E3=83=81?= =?UTF-8?q?=E3=82=A7=E3=83=83=E3=82=AF=E7=B5=90=E6=9E=9C=E3=81=AE=E5=86=85?= =?UTF-8?q?=E5=AE=B9=E7=A2=BA=E8=AA=8D=E3=83=86=E3=82=B9=E3=83=88=E3=81=AE?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_main.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index cd15c0f..6101610 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -9,7 +9,22 @@ def test_check_links(self): links = app.extract_link(files) result = app.check_links(links) - pp(result) + # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) + assert len(result) == 3 + + # 形式チェック + for item in result: + assert "file" in item and item["file"] is not None + assert "line" in item and item["line"] is not None + assert "url" in item and item["url"] is not None + assert "result" in item and item["result"] is not None + assert "code" in item + + if item["result"].upper() == "OK": + assert item["code"] is not None + else: + assert item["code"] is None + assert "reason" in item and item["reason"] is not None class TestExtractLink: @@ -55,8 +70,6 @@ def test_request(url: str, expected_result: str, expected_status_code: int): # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 res = app.request(url) - pp(res) - assert type(res) is dict assert res["result"] == expected_result assert res["code"] == expected_status_code From df193bd953a8857a1a0cfce473dc4dbc7eeaefaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=2E=C3=98?= Date: Sat, 11 Oct 2025 18:58:29 +0900 Subject: [PATCH 19/48] Create LICENSE --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..00b5018 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 DogFortune + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From 8727dd35a407139f587031742dc91cf6cec728d6 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Sun, 12 Oct 2025 16:14:22 +0900 Subject: [PATCH 20/48] =?UTF-8?q?=E3=82=B3=E3=83=BC=E3=83=89=E7=A7=BB?= =?UTF-8?q?=E5=8B=95=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 解析処理を別モジュールに移動 --- README_jp.md | 1 + src/{app.py => analyze.py} | 36 +++------------- src/main.py | 23 ++++++++++ tests/test_analyze.py | 81 +++++++++++++++++++++++++++++++++++ tests/test_main.py | 87 +------------------------------------- 5 files changed, 114 insertions(+), 114 deletions(-) rename src/{app.py => analyze.py} (79%) create mode 100644 src/main.py create mode 100644 tests/test_analyze.py diff --git a/README_jp.md b/README_jp.md index b01e445..b96791d 100644 --- a/README_jp.md +++ b/README_jp.md @@ -1 +1,2 @@ # LinkWatch + diff --git a/src/app.py b/src/analyze.py similarity index 79% rename from src/app.py rename to src/analyze.py index b8a251b..848df04 100644 --- a/src/app.py +++ b/src/analyze.py @@ -1,4 +1,3 @@ -import argparse from pathlib import Path from urllib.request import urlopen from urllib.error import HTTPError, URLError @@ -38,6 +37,13 @@ def check_links(links: dict) -> list: return results +def search(path: str, filter="*.md"): + # 指定したディレクトリから検査対象のファイルを抽出します。デフォルトはmdです。 + p = Path(path) + files = [str(item) for item in p.rglob(filter)] + return files + + def extract_link(files: list) -> dict: # 各ファイルからリンクを抽出します。 # 重複しているリンクはフラグがTrueになります。 @@ -59,31 +65,3 @@ def extract_link(files: list) -> dict: {"line": i + 1, "url": url, "duplicate": duplicate} ) return links - - -def lookup_file(path: str, filter="*.md"): - # 指定したディレクトリから検査対象のファイルを抽出します。デフォルトはmdです。 - p = Path(path) - files = [str(item) for item in p.rglob(filter)] - return files - - -def create_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("src") - group = parser.add_mutually_exclusive_group() - group.add_argument("--verbose", action="store_true", help="Increase verbosity") - group.add_argument("--quiet", action="store_true", help="Decrease verbosity") - return parser - - -def main(args=None): - parser = create_parser() - parsed_args = parser.parse_args(args) - files = lookup_file(parsed_args.src) - links = extract_link(files) - result = check_links(links) - - -if __name__ == "__main__": - main() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..4d2101f --- /dev/null +++ b/src/main.py @@ -0,0 +1,23 @@ +import analyze +import argparse + + +def create_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("src") + group = parser.add_mutually_exclusive_group() + group.add_argument("--verbose", action="store_true", help="Increase verbosity") + group.add_argument("--quiet", action="store_true", help="Decrease verbosity") + return parser + + +def main(args=None): + parser = create_parser() + parsed_args = parser.parse_args(args) + files = analyze.search(parsed_args.src) + links = analyze.extract_link(files) + result = analyze.check_links(links) + + +if __name__ == "__main__": + main() diff --git a/tests/test_analyze.py b/tests/test_analyze.py new file mode 100644 index 0000000..bfe8ceb --- /dev/null +++ b/tests/test_analyze.py @@ -0,0 +1,81 @@ +import pytest +import analyze + + +@pytest.mark.parametrize( + ["url", "expected_result", "expected_status_code"], + [ + pytest.param("http://127.0.0.1:8000/status/200", "OK", 200), + pytest.param("http://127.0.0.1:8000/status/404", "NG", 404), + pytest.param("http://127.0.0.1:8000/status/500", "NG", 500), + pytest.param("http://127.0.0.1:800", "NG", None), + ], +) +def test_request(url: str, expected_result: str, expected_status_code: int): + # アクセスチェックした時に想定しているリクエストが返ってくる事。 + # 200系だけTrueで、それ以外はFalseで返ってくる事。 + # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 + res = analyze.request(url) + + assert type(res) is dict + assert res["result"] == expected_result + assert res["code"] == expected_status_code + assert res["url"] == url + if not res["result"]: + assert "reason" in res + + +def test_check_links(): + files = analyze.search("tests/doc/") + links = analyze.extract_link(files) + result = analyze.check_links(links) + + # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) + assert len(result) == 3 + + # 形式チェック + for item in result: + assert "file" in item and item["file"] is not None + assert "line" in item and item["line"] is not None + assert "url" in item and item["url"] is not None + assert "result" in item and item["result"] is not None + assert "code" in item + + if item["result"].upper() == "OK": + assert item["code"] is not None + else: + assert item["code"] is None + assert "reason" in item and item["reason"] is not None + + +@pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) +def test_search(path: str): + files = analyze.search(path) + assert len(files) == 2 + + +def test_extract_link(): + # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 + # データ構造としてはdictのKeyにファイルのパス、Valueにリンクに関する情報が入っている。 + # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 + # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK + files = analyze.search("tests/doc/") + links = analyze.extract_link(files) + + assert len(links) == 2 + + doc1_result = [ + item for key, value in links.items() if "doc1.md" in key for item in value + ] + doc2_result = [ + item for key, value in links.items() if "doc2.md" in key for item in value + ] + + assert len(doc1_result) == 1 + assert len(doc2_result) == 4 + + # ちゃんと重複判定の数が正しいか、重複と見なしたリンクは想定しているものか + duplicated_link_list = [item for item in doc2_result if item["duplicate"]] + assert len(duplicated_link_list) == 2 + assert duplicated_link_list[0]["url"] == duplicated_link_list[1]["url"] + assert duplicated_link_list[0]["url"] == "https://example.com" diff --git a/tests/test_main.py b/tests/test_main.py index 6101610..65b033e 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,88 +1,5 @@ -import app -import pytest -from pprint import pprint as pp - - -class TestCheckLins: - def test_check_links(self): - files = app.lookup_file("tests/doc/") - links = app.extract_link(files) - result = app.check_links(links) - - # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) - assert len(result) == 3 - - # 形式チェック - for item in result: - assert "file" in item and item["file"] is not None - assert "line" in item and item["line"] is not None - assert "url" in item and item["url"] is not None - assert "result" in item and item["result"] is not None - assert "code" in item - - if item["result"].upper() == "OK": - assert item["code"] is not None - else: - assert item["code"] is None - assert "reason" in item and item["reason"] is not None - - -class TestExtractLink: - def test_extract_link(self): - # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 - # データ構造としてはdictのKeyにファイルのパス、Valueにリンクに関する情報が入っている。 - # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 - # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK - files = app.lookup_file("tests/doc/") - links = app.extract_link(files) - - assert len(links) == 2 - - doc1_result = [ - item for key, value in links.items() if "doc1.md" in key for item in value - ] - doc2_result = [ - item for key, value in links.items() if "doc2.md" in key for item in value - ] - - assert len(doc1_result) == 1 - assert len(doc2_result) == 4 - - # ちゃんと重複判定の数が正しいか、重複と見なしたリンクは想定しているものか - duplicated_link_list = [item for item in doc2_result if item["duplicate"]] - assert len(duplicated_link_list) == 2 - assert duplicated_link_list[0]["url"] == duplicated_link_list[1]["url"] - assert duplicated_link_list[0]["url"] == "https://example.com" - - -@pytest.mark.parametrize( - ["url", "expected_result", "expected_status_code"], - [ - pytest.param("http://127.0.0.1:8000/status/200", "OK", 200), - pytest.param("http://127.0.0.1:8000/status/404", "NG", 404), - pytest.param("http://127.0.0.1:8000/status/500", "NG", 500), - pytest.param("http://127.0.0.1:800", "NG", None), - ], -) -def test_request(url: str, expected_result: str, expected_status_code: int): - # アクセスチェックした時に想定しているリクエストが返ってくる事。 - # 200系だけTrueで、それ以外はFalseで返ってくる事。 - # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 - res = app.request(url) - - assert type(res) is dict - assert res["result"] == expected_result - assert res["code"] == expected_status_code - assert res["url"] == url - if not res["result"]: - assert "reason" in res - - -@pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) -def test_check(path: str): - files = app.lookup_file(path) - assert len(files) == 2 +import main def test_main(): - app.main(["tests/doc/"]) + main.main(["tests/doc/"]) From 6dbcaf595f7a0d7203744e725f61b63a171ec811 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Sun, 12 Oct 2025 16:48:38 +0900 Subject: [PATCH 21/48] =?UTF-8?q?=E3=83=87=E3=83=BC=E3=82=BF=E3=82=AF?= =?UTF-8?q?=E3=83=A9=E3=82=B9=E7=A7=BB=E8=A1=8C=E4=B8=AD=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ややこしくなってきたため --- src/analyze.py | 65 ++++++++++++++++++++++++++++++------------- tests/test_analyze.py | 38 ++++++++++++------------- 2 files changed, 64 insertions(+), 39 deletions(-) diff --git a/src/analyze.py b/src/analyze.py index 848df04..6d6210f 100644 --- a/src/analyze.py +++ b/src/analyze.py @@ -2,37 +2,63 @@ from urllib.request import urlopen from urllib.error import HTTPError, URLError from enums import Result +import dataclasses -def request(url: str): +@dataclasses.dataclass +class AnalyzeResult: + """ドキュメントの解析結果""" + + file: str + line: int + url: str + result: str + code: int + reason: str + + +@dataclasses.dataclass +class AnalyzeResponse: + """リンクにアクセスした結果""" + + result: Result + code: str | None + url: str + reason: str | None + + +@dataclasses.dataclass +class LinkInfo: + """ドキュメントから抽出したリンク情報""" + + line: int + url: str + duplicate: bool + + +def request(url: str) -> AnalyzeResponse: try: res = urlopen(url, timeout=5) - return {"result": Result.OK, "code": res.code, "url": res.url} + return AnalyzeResponse(Result.OK, res.code, res.url, None) except HTTPError as e: # アクセスできて400や500系が来た時はこっち - return {"result": Result.NG, "code": e.code, "url": url, "reason": e.reason} + return AnalyzeResponse(Result.NG, e.code, url, e.reason) except URLError as e: # そもそもアクセスすらできなかった場合はこっち - return {"result": Result.NG, "code": None, "url": url, "reason": e.reason} + return AnalyzeResponse(Result.NG, None, url, e.reason) -def check_links(links: dict) -> list: +def check_links(links: dict[str, LinkInfo]) -> list[AnalyzeResult]: # リンクをチェックします。 # チェックすべきなのはFalseのものだけ。 results = [] for file_path, link_items in links.items(): for item in link_items: - if not item["duplicate"]: - res = request(item["url"]) - data = { - "file": file_path, - "line": item["line"], - "url": item["url"], - "result": res["result"], - "code": res["code"], - } - if "reason" in res: - data["reason"] = res["reason"] + if not item.duplicate: + res = request(item.url) + data = AnalyzeResult( + file_path, item.line, item.url, res.result, res.code, res.reason + ) results.append(data) return results @@ -44,7 +70,7 @@ def search(path: str, filter="*.md"): return files -def extract_link(files: list) -> dict: +def extract_link(files: list) -> dict[str, LinkInfo]: # 各ファイルからリンクを抽出します。 # 重複しているリンクはフラグがTrueになります。 links = {} @@ -61,7 +87,6 @@ def extract_link(files: list) -> dict: else: duplicate = False seen_urls.add(url) - links[f"{file_path}"].append( - {"line": i + 1, "url": url, "duplicate": duplicate} - ) + data = LinkInfo(i + 1, url, duplicate) + links[f"{file_path}"].append(data) return links diff --git a/tests/test_analyze.py b/tests/test_analyze.py index bfe8ceb..0524d2c 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -17,12 +17,12 @@ def test_request(url: str, expected_result: str, expected_status_code: int): # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 res = analyze.request(url) - assert type(res) is dict - assert res["result"] == expected_result - assert res["code"] == expected_status_code - assert res["url"] == url - if not res["result"]: - assert "reason" in res + assert type(res) is analyze.AnalyzeResponse + assert res.result == expected_result + assert res.code == expected_status_code + assert res.url == url + if res.result.upper() == "NG": + assert res.reason is not None def test_check_links(): @@ -35,17 +35,17 @@ def test_check_links(): # 形式チェック for item in result: - assert "file" in item and item["file"] is not None - assert "line" in item and item["line"] is not None - assert "url" in item and item["url"] is not None - assert "result" in item and item["result"] is not None - assert "code" in item - - if item["result"].upper() == "OK": - assert item["code"] is not None + assert item.file is not None + assert item.line is not None + assert item.url is not None + assert item.result is not None + + if item.result.upper() == "OK": + assert item.code is not None + assert item.reason is None else: - assert item["code"] is None - assert "reason" in item and item["reason"] is not None + assert item.code is None + assert item.reason is not None @pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) @@ -75,7 +75,7 @@ def test_extract_link(): assert len(doc2_result) == 4 # ちゃんと重複判定の数が正しいか、重複と見なしたリンクは想定しているものか - duplicated_link_list = [item for item in doc2_result if item["duplicate"]] + duplicated_link_list = [item for item in doc2_result if item.duplicate] assert len(duplicated_link_list) == 2 - assert duplicated_link_list[0]["url"] == duplicated_link_list[1]["url"] - assert duplicated_link_list[0]["url"] == "https://example.com" + assert duplicated_link_list[0].url == duplicated_link_list[1].url + assert duplicated_link_list[0].url == "https://example.com" From 671192c782dffb47c8b3147877d708c298da8dd3 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Tue, 14 Oct 2025 11:13:30 +0900 Subject: [PATCH 22/48] =?UTF-8?q?=E3=82=AB=E3=82=B9=E3=82=BF=E3=83=A0?= =?UTF-8?q?=E3=82=A2=E3=82=AF=E3=82=B7=E3=83=A7=E3=83=B3=E7=94=A8=E3=83=95?= =?UTF-8?q?=E3=82=A1=E3=82=A4=E3=83=AB=E4=BD=9C=E6=88=90=E4=B8=AD=20refs?= =?UTF-8?q?=20#3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .dockerignore | 3 +++ Dockerfile | 6 ++++++ action.yml | 11 +++++++++++ 3 files changed, 20 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 action.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..483c5fc --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +venv +.pytest_cache +*.pyc \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..84877b6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,6 @@ +FROM gcr.io/distroless/python3-debian12:latest + +WORKDIR /app +COPY src/ . + +CMD [ "app.py" ] \ No newline at end of file diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..b9df9f6 --- /dev/null +++ b/action.yml @@ -0,0 +1,11 @@ +name: LinkWatch +description: "Check whether you can correctly access the URL listed in the document file." +author: DogFortune + +inputs: + output-option: + description: output format option + required: false + +runs: + using: \ No newline at end of file From 7c992cd749af649133eb3a2ff296ff8285287334 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Tue, 14 Oct 2025 20:44:54 +0900 Subject: [PATCH 23/48] =?UTF-8?q?=E3=83=AC=E3=83=9D=E3=83=BC=E3=83=88?= =?UTF-8?q?=E7=94=A8=E3=82=AF=E3=83=A9=E3=82=B9=E3=82=92=E4=BD=BF=E3=81=A3?= =?UTF-8?q?=E3=81=A6=E7=B5=90=E6=9E=9C=E3=82=92=E5=87=BA=E3=81=99=E6=96=B9?= =?UTF-8?q?=E9=87=9D=E3=81=AB=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/analyze.py | 17 +++-------------- src/main.py | 3 ++- src/report.py | 15 +++++++++++++++ tests/test_analyze.py | 8 +++++--- 4 files changed, 25 insertions(+), 18 deletions(-) create mode 100644 src/report.py diff --git a/src/analyze.py b/src/analyze.py index 6d6210f..d5706a7 100644 --- a/src/analyze.py +++ b/src/analyze.py @@ -2,21 +2,10 @@ from urllib.request import urlopen from urllib.error import HTTPError, URLError from enums import Result +from report import ReportData import dataclasses -@dataclasses.dataclass -class AnalyzeResult: - """ドキュメントの解析結果""" - - file: str - line: int - url: str - result: str - code: int - reason: str - - @dataclasses.dataclass class AnalyzeResponse: """リンクにアクセスした結果""" @@ -48,7 +37,7 @@ def request(url: str) -> AnalyzeResponse: return AnalyzeResponse(Result.NG, None, url, e.reason) -def check_links(links: dict[str, LinkInfo]) -> list[AnalyzeResult]: +def check_links(links: dict[str, LinkInfo]) -> list[ReportData]: # リンクをチェックします。 # チェックすべきなのはFalseのものだけ。 results = [] @@ -56,7 +45,7 @@ def check_links(links: dict[str, LinkInfo]) -> list[AnalyzeResult]: for item in link_items: if not item.duplicate: res = request(item.url) - data = AnalyzeResult( + data = ReportData( file_path, item.line, item.url, res.result, res.code, res.reason ) results.append(data) diff --git a/src/main.py b/src/main.py index 4d2101f..3fdaf2a 100644 --- a/src/main.py +++ b/src/main.py @@ -14,9 +14,10 @@ def create_parser(): def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) + files = analyze.search(parsed_args.src) links = analyze.extract_link(files) - result = analyze.check_links(links) + report_data_list = analyze.check_links(links) if __name__ == "__main__": diff --git a/src/report.py b/src/report.py new file mode 100644 index 0000000..d93cb62 --- /dev/null +++ b/src/report.py @@ -0,0 +1,15 @@ +import dataclasses + + +@dataclasses.dataclass +class ReportData: + file: str + line: int + url: str + result: str + code: int + reason: str + + +def console(data: ReportData): + line = "" diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 0524d2c..3e154ed 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -1,5 +1,6 @@ import pytest import analyze +from report import ReportData @pytest.mark.parametrize( @@ -28,13 +29,14 @@ def test_request(url: str, expected_result: str, expected_status_code: int): def test_check_links(): files = analyze.search("tests/doc/") links = analyze.extract_link(files) - result = analyze.check_links(links) + results_report_data = analyze.check_links(links) # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) - assert len(result) == 3 + assert len(results_report_data) == 3 # 形式チェック - for item in result: + for item in results_report_data: + assert type(item) is ReportData assert item.file is not None assert item.line is not None assert item.url is not None From 2e959612f9fbcac9b2d0f96309636092c0a488f1 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 15 Oct 2025 09:58:47 +0900 Subject: [PATCH 24/48] =?UTF-8?q?=E3=83=91=E3=83=A9=E3=83=A1=E3=83=BC?= =?UTF-8?q?=E3=82=BF=E3=83=BC=E3=82=92=E7=92=B0=E5=A2=83=E5=A4=89=E6=95=B0?= =?UTF-8?q?=E3=81=A7=E3=82=82=E5=8F=97=E3=81=91=E5=8F=96=E3=82=8C=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=20refs=20#3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- action.yml | 10 +++++++++- src/app.py | 13 ++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/action.yml b/action.yml index b9df9f6..405b8ce 100644 --- a/action.yml +++ b/action.yml @@ -6,6 +6,14 @@ inputs: output-option: description: output format option required: false + src: + description: check directory + required: true + default: "." runs: - using: \ No newline at end of file + using: docker + image: Dockerfile + env: + SRC_DIR: ${{ inputs.src }} + OUTOUT_FORMAT: ${{ inputs.output-option }} \ No newline at end of file diff --git a/src/app.py b/src/app.py index b8a251b..261747e 100644 --- a/src/app.py +++ b/src/app.py @@ -1,3 +1,4 @@ +import os import argparse from pathlib import Path from urllib.request import urlopen @@ -70,20 +71,22 @@ def lookup_file(path: str, filter="*.md"): def create_parser(): parser = argparse.ArgumentParser() - parser.add_argument("src") - group = parser.add_mutually_exclusive_group() - group.add_argument("--verbose", action="store_true", help="Increase verbosity") - group.add_argument("--quiet", action="store_true", help="Decrease verbosity") + parser.add_argument("src", default=os.environ.get("SRC_DIR", ".")) + parser.add_argument("--format", default=os.environ.get("OUTPUT_FORMAT", "CONSOLE")) return parser def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) - files = lookup_file(parsed_args.src) + src = parsed_args.src + + files = lookup_file(src) links = extract_link(files) result = check_links(links) + print(result) + if __name__ == "__main__": main() From 1003f056013f9995a3a6017a074ab2a9d0d82565 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 15 Oct 2025 10:20:04 +0900 Subject: [PATCH 25/48] =?UTF-8?q?=E3=83=95=E3=82=A9=E3=83=BC=E3=83=9E?= =?UTF-8?q?=E3=83=83=E3=83=88=E5=BD=A2=E5=BC=8F=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E4=BD=9C=E6=88=90=E4=B8=AD=20refs=201?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main.py | 11 ++++++++++- tests/test_main.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 7502b67..c2b98fb 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,10 @@ import argparse +def __format__setting(format: str): + raise NotImplementedError + + def create_parser(): parser = argparse.ArgumentParser() parser.add_argument("src", default=os.environ.get("SRC_DIR", ".")) @@ -14,7 +18,12 @@ def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) - files = analyze.search(parsed_args.src) + print(parsed_args) + + format = __format__setting(parsed_args.format) + src = parsed_args.src + + files = analyze.search(src) links = analyze.extract_link(files) report_data_list = analyze.check_links(links) diff --git a/tests/test_main.py b/tests/test_main.py index 65b033e..9252fec 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,5 +1,24 @@ import main +import pytest def test_main(): main.main(["tests/doc/"]) + + +@pytest.mark.parametrize( + ["format"], + [ + pytest.param("CONSOLE"), + pytest.param("console"), + pytest.param("JSON"), + pytest.param("json"), + ], +) +def test_format_args(format: str): + """フォーマットテスト + + :param format: _description_ + :type format: str + """ + main.main(["--format", format, "tests/doc/"]) From ac9a58bc7110011f57d34658ea70b6d719885c00 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 15 Oct 2025 10:52:04 +0900 Subject: [PATCH 26/48] =?UTF-8?q?=E5=87=BA=E5=8A=9B=E3=83=95=E3=82=A9?= =?UTF-8?q?=E3=83=BC=E3=83=9E=E3=83=83=E3=83=88=E3=81=AE=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E4=BD=9C=E6=88=90=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/enums.py | 6 +++++- src/main.py | 9 ++++++++- tests/test_main.py | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/enums.py b/src/enums.py index 2a24171..5a53929 100644 --- a/src/enums.py +++ b/src/enums.py @@ -1,4 +1,8 @@ -from enum import StrEnum +from enum import StrEnum, Enum, auto + + +class OutputType(Enum): + Console = auto() class Result(StrEnum): diff --git a/src/main.py b/src/main.py index c2b98fb..2bc634c 100644 --- a/src/main.py +++ b/src/main.py @@ -1,10 +1,17 @@ import os import analyze import argparse +from enums import OutputType def __format__setting(format: str): - raise NotImplementedError + match format.upper(): + case "CONSOLE": + return OutputType.Console + case "JSON" | "YAML": + raise NotImplementedError + case _: + raise ValueError def create_parser(): diff --git a/tests/test_main.py b/tests/test_main.py index 9252fec..bd7dd0a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -11,14 +11,43 @@ def test_main(): [ pytest.param("CONSOLE"), pytest.param("console"), - pytest.param("JSON"), - pytest.param("json"), ], ) def test_format_args(format: str): - """フォーマットテスト + """正常系:フォーマット込みで行って最後まで完了する事。 :param format: _description_ :type format: str """ main.main(["--format", format, "tests/doc/"]) + + +@pytest.mark.parametrize( + ["format"], + [ + pytest.param("JSON"), + pytest.param("json"), + pytest.param("YAML"), + pytest.param("yaml"), + ], +) +def test_raise_NotImplemented_format_args(format: str): + """異常系:未対応フォーマットを指定した場合、未実装を表す例外発生。 + :param format: _description_ + :type format: str + """ + with pytest.raises(NotImplementedError): + main.main(["--format", format, "tests/doc/"]) + + +@pytest.mark.parametrize( + ["format"], + [pytest.param("consol"), pytest.param("sample")], +) +def test_raise_ValueError_format_args(format: str): + """異常系:適切ではない値が来た場合、例外発生。 + :param format: _description_ + :type format: str + """ + with pytest.raises(ValueError): + main.main(["--format", format, "tests/doc/"]) From 5f4793ffbc60ad4ce7563fe8b7f273780a64f30a Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 15 Oct 2025 11:04:23 +0900 Subject: [PATCH 27/48] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E6=95=B4?= =?UTF-8?q?=E7=90=86=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 正常系と異常系をクラスで分離 環境変数で指定した際の異常系を追加 --- src/main.py | 2 - tests/test_main.py | 108 ++++++++++++++++++++++++++------------------- 2 files changed, 62 insertions(+), 48 deletions(-) diff --git a/src/main.py b/src/main.py index 2bc634c..9b64009 100644 --- a/src/main.py +++ b/src/main.py @@ -25,8 +25,6 @@ def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) - print(parsed_args) - format = __format__setting(parsed_args.format) src = parsed_args.src diff --git a/tests/test_main.py b/tests/test_main.py index bd7dd0a..6b80636 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,53 +1,69 @@ import main import pytest +import os -def test_main(): - main.main(["tests/doc/"]) - - -@pytest.mark.parametrize( - ["format"], - [ - pytest.param("CONSOLE"), - pytest.param("console"), - ], -) -def test_format_args(format: str): - """正常系:フォーマット込みで行って最後まで完了する事。 - - :param format: _description_ - :type format: str - """ - main.main(["--format", format, "tests/doc/"]) - - -@pytest.mark.parametrize( - ["format"], - [ - pytest.param("JSON"), - pytest.param("json"), - pytest.param("YAML"), - pytest.param("yaml"), - ], -) -def test_raise_NotImplemented_format_args(format: str): - """異常系:未対応フォーマットを指定した場合、未実装を表す例外発生。 - :param format: _description_ - :type format: str - """ - with pytest.raises(NotImplementedError): - main.main(["--format", format, "tests/doc/"]) +class TestValid: + """正常系""" + + def test_main(self): + """環境変数も引数も指定しない場合、コンソールモードで動作する事""" + main.main(["tests/doc/"]) + @pytest.mark.parametrize( + ["format"], + [ + pytest.param("CONSOLE"), + pytest.param("console"), + ], + ) + def test_format_args(self, format: str): + """フォーマット込みで行って最後まで完了する事。 -@pytest.mark.parametrize( - ["format"], - [pytest.param("consol"), pytest.param("sample")], -) -def test_raise_ValueError_format_args(format: str): - """異常系:適切ではない値が来た場合、例外発生。 - :param format: _description_ - :type format: str - """ - with pytest.raises(ValueError): + :param format: _description_ + :type format: str + """ main.main(["--format", format, "tests/doc/"]) + + +class TestInValid: + """異常系""" + + @pytest.mark.parametrize( + ["format"], + [ + pytest.param("JSON"), + pytest.param("json"), + pytest.param("YAML"), + pytest.param("yaml"), + ], + ) + def test_raise_NotImplemented_format_args(self, format: str): + """未対応フォーマットを指定した場合、未実装を表す例外発生。 + :param format: _description_ + :type format: str + """ + with pytest.raises(NotImplementedError): + main.main(["--format", format, "tests/doc/"]) + + @pytest.mark.parametrize( + ["format"], + [pytest.param("consol"), pytest.param("sample")], + ) + def test_raise_ValueError_format_args(self, format: str): + """適切ではない値が来た場合、例外発生。 + :param format: _description_ + :type format: str + """ + with pytest.raises(ValueError): + main.main(["--format", format, "tests/doc/"]) + + def test_raise_format_args_use_environment(self): + """環境変数でフォーマット指定をした時に適切ではない値が入っていた場合、例外発生。 + + :param format: _description_ + :type format: str + """ + os.environ["OUTPUT_FORMAT"] = "consol" + with pytest.raises(ValueError): + main.main(["tests/doc/"]) From b8b4ae604312b4eb308a2d89414bf5b024eff8d1 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 15 Oct 2025 13:31:46 +0900 Subject: [PATCH 28/48] =?UTF-8?q?=E3=83=A2=E3=82=B8=E3=83=A5=E3=83=BC?= =?UTF-8?q?=E3=83=AB=E3=81=AE=E5=90=8D=E5=89=8D=E5=A4=89=E6=9B=B4=20refs?= =?UTF-8?q?=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/{analyze.py => analyzer.py} | 2 +- src/main.py | 9 +++++---- src/{report.py => reporter.py} | 5 +++++ tests/test_analyze.py | 20 ++++++++++---------- 4 files changed, 21 insertions(+), 15 deletions(-) rename src/{analyze.py => analyzer.py} (98%) rename src/{report.py => reporter.py} (68%) diff --git a/src/analyze.py b/src/analyzer.py similarity index 98% rename from src/analyze.py rename to src/analyzer.py index d5706a7..f8da3d4 100644 --- a/src/analyze.py +++ b/src/analyzer.py @@ -2,7 +2,7 @@ from urllib.request import urlopen from urllib.error import HTTPError, URLError from enums import Result -from report import ReportData +from reporter import ReportData import dataclasses diff --git a/src/main.py b/src/main.py index 9b64009..22c4854 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,6 @@ import os -import analyze +import analyzer +import reporter import argparse from enums import OutputType @@ -28,9 +29,9 @@ def main(args=None): format = __format__setting(parsed_args.format) src = parsed_args.src - files = analyze.search(src) - links = analyze.extract_link(files) - report_data_list = analyze.check_links(links) + files = analyzer.search(src) + links = analyzer.extract_link(files) + report_data_list = analyzer.check_links(links) if __name__ == "__main__": diff --git a/src/report.py b/src/reporter.py similarity index 68% rename from src/report.py rename to src/reporter.py index d93cb62..e5c3c3c 100644 --- a/src/report.py +++ b/src/reporter.py @@ -1,4 +1,5 @@ import dataclasses +from enums import OutputType @dataclasses.dataclass @@ -11,5 +12,9 @@ class ReportData: reason: str +def generate(data: ReportData, type: OutputType): + pass + + def console(data: ReportData): line = "" diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 3e154ed..af1cd34 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -1,6 +1,6 @@ import pytest -import analyze -from report import ReportData +import analyzer +from reporter import ReportData @pytest.mark.parametrize( @@ -16,9 +16,9 @@ def test_request(url: str, expected_result: str, expected_status_code: int): # アクセスチェックした時に想定しているリクエストが返ってくる事。 # 200系だけTrueで、それ以外はFalseで返ってくる事。 # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 - res = analyze.request(url) + res = analyzer.request(url) - assert type(res) is analyze.AnalyzeResponse + assert type(res) is analyzer.AnalyzeResponse assert res.result == expected_result assert res.code == expected_status_code assert res.url == url @@ -27,9 +27,9 @@ def test_request(url: str, expected_result: str, expected_status_code: int): def test_check_links(): - files = analyze.search("tests/doc/") - links = analyze.extract_link(files) - results_report_data = analyze.check_links(links) + files = analyzer.search("tests/doc/") + links = analyzer.extract_link(files) + results_report_data = analyzer.check_links(links) # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) assert len(results_report_data) == 3 @@ -52,7 +52,7 @@ def test_check_links(): @pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) def test_search(path: str): - files = analyze.search(path) + files = analyzer.search(path) assert len(files) == 2 @@ -61,8 +61,8 @@ def test_extract_link(): # データ構造としてはdictのKeyにファイルのパス、Valueにリンクに関する情報が入っている。 # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK - files = analyze.search("tests/doc/") - links = analyze.extract_link(files) + files = analyzer.search("tests/doc/") + links = analyzer.extract_link(files) assert len(links) == 2 From 748d8d40c3997d8c602cd3da41359cdfc277cdb5 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 15 Oct 2025 13:54:43 +0900 Subject: [PATCH 29/48] =?UTF-8?q?=E7=B5=90=E6=9E=9C=E5=87=BA=E5=8A=9B?= =?UTF-8?q?=E3=81=BE=E3=81=A7=E8=A8=98=E8=BF=B0=20refs=20#1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit テストはまだなのでRED状態 --- src/main.py | 9 +++++++++ src/reporter.py | 6 +----- tests/{test_analyze.py => test_analyzer.py} | 0 tests/test_reporter.py | 9 +++++++++ 4 files changed, 19 insertions(+), 5 deletions(-) rename tests/{test_analyze.py => test_analyzer.py} (100%) create mode 100644 tests/test_reporter.py diff --git a/src/main.py b/src/main.py index 22c4854..95fd433 100644 --- a/src/main.py +++ b/src/main.py @@ -3,6 +3,14 @@ import reporter import argparse from enums import OutputType +from reporter import ReportData + + +def __output(data: list[ReportData], format: OutputType): + match format: + case OutputType.Console: + line = reporter.console(data) + print(line) def __format__setting(format: str): @@ -32,6 +40,7 @@ def main(args=None): files = analyzer.search(src) links = analyzer.extract_link(files) report_data_list = analyzer.check_links(links) + __output(report_data_list, format) if __name__ == "__main__": diff --git a/src/reporter.py b/src/reporter.py index e5c3c3c..97c856d 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -12,9 +12,5 @@ class ReportData: reason: str -def generate(data: ReportData, type: OutputType): - pass - - -def console(data: ReportData): +def console(data: list[ReportData]) -> str: line = "" diff --git a/tests/test_analyze.py b/tests/test_analyzer.py similarity index 100% rename from tests/test_analyze.py rename to tests/test_analyzer.py diff --git a/tests/test_reporter.py b/tests/test_reporter.py new file mode 100644 index 0000000..f3f73b0 --- /dev/null +++ b/tests/test_reporter.py @@ -0,0 +1,9 @@ +import reporter + + +class TestValid: + """正常系""" + + def test_console(self): + """コンソール出力テスト。文字列が想定している形である事""" + raise NotImplementedError From fa346e847e42fe4486bbb4926f2a02c27c50a152 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Sat, 1 Nov 2025 17:09:11 +0900 Subject: [PATCH 30/48] =?UTF-8?q?=E5=87=BA=E5=8A=9B=E3=83=86=E3=82=B9?= =?UTF-8?q?=E3=83=88=E5=AE=9F=E8=A3=85=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main.py | 3 ++- src/reporter.py | 13 ++++++++++--- tests/test_reporter.py | 10 +++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/main.py b/src/main.py index 95fd433..e716a18 100644 --- a/src/main.py +++ b/src/main.py @@ -9,7 +9,8 @@ def __output(data: list[ReportData], format: OutputType): match format: case OutputType.Console: - line = reporter.console(data) + report = reporter.Console(data) + line = report.generate() print(line) diff --git a/src/reporter.py b/src/reporter.py index 97c856d..55876fd 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -1,5 +1,5 @@ import dataclasses -from enums import OutputType +from pprint import pprint as pp @dataclasses.dataclass @@ -12,5 +12,12 @@ class ReportData: reason: str -def console(data: list[ReportData]) -> str: - line = "" +class Console: + def __init__(self, data: list[ReportData]): + self.data_list = data + + def generate(self) -> str: + pp(self.data_list) + line = "" + + return line diff --git a/tests/test_reporter.py b/tests/test_reporter.py index f3f73b0..0166e26 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -1,4 +1,6 @@ import reporter +import analyzer +from pprint import pprint as pp class TestValid: @@ -6,4 +8,10 @@ class TestValid: def test_console(self): """コンソール出力テスト。文字列が想定している形である事""" - raise NotImplementedError + files = analyzer.search("tests/doc/") + + links = analyzer.extract_link(files) + results_report_data = analyzer.check_links(links) + report = reporter.Console(results_report_data) + output_line = report.generate() + assert output_line is not None From a564f5dc94cbd0737fb158ba2f21eb5435c9eef6 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Mon, 10 Nov 2025 14:34:36 +0900 Subject: [PATCH 31/48] =?UTF-8?q?=E3=83=A2=E3=83=83=E3=82=AF=E3=82=B5?= =?UTF-8?q?=E3=83=BC=E3=83=90=E3=83=BC=E8=B5=B7=E5=8B=95=E3=83=81=E3=82=A7?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/conftest.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..71ff3d8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,24 @@ +import pytest +from urllib.request import Request, urlopen +from urllib.error import HTTPError, URLError + + +@pytest.fixture(scope="session", autouse=True) +def check_mock_server(): + """テスト実行前にモックサーバーの起動を確認""" + mock_server_url = "http://localhost:8000/get" + + try: + req = Request(mock_server_url) + with urlopen(req) as res: + if res.getcode() == 200: + return + except (HTTPError, URLError): + pass + + pytest.exit( + "\n\n❌ エラー: モックサーバーが起動していません\n" + "以下のコマンドでモックサーバーを起動してください:\n" + " docker-compose up -d\n", + returncode=1, + ) From d6aa01c6894b046914f0ac4d50f55abeee0045b4 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Tue, 11 Nov 2025 17:22:38 +0900 Subject: [PATCH 32/48] =?UTF-8?q?=E5=90=8D=E5=89=8D=E3=82=92main=E3=81=8B?= =?UTF-8?q?=E3=82=89app=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainが重なっていたのとオプションで特定のメソッドだけ実行したい時に想定外のメソッドまで呼ばれていたから明確に テストの順序が依存している部分があったので改善しつつ、今後依存を見つけられるようにプラグインを入れて毎回ランダムでテストが実行されるようにした 依存は環境変数の設定 --- Pipfile | 1 + Pipfile.lock | 25 +++++++++++++++++-------- src/{main.py => app.py} | 0 tests/{test_main.py => test_app.py} | 27 ++++++++++++++++----------- 4 files changed, 34 insertions(+), 19 deletions(-) rename src/{main.py => app.py} (100%) rename tests/{test_main.py => test_app.py} (71%) diff --git a/Pipfile b/Pipfile index c4dc663..25ad93a 100644 --- a/Pipfile +++ b/Pipfile @@ -9,6 +9,7 @@ name = "pypi" pytest = "*" flake8 = "*" black = "*" +pytest-randomly = "*" [requires] python_version = "3.13" diff --git a/Pipfile.lock b/Pipfile.lock index 5595ae3..77264fb 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "28dd85f5c63895b14ba3dab207386e088b6084b5e429466ba9339cdeeacf7571" + "sha256": "205c9da775a56185b15b98b207406a87485419ccb7f873a51f03cdf8aac2b351" }, "pipfile-spec": 6, "requires": { @@ -73,11 +73,11 @@ }, "iniconfig": { "hashes": [ - "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", - "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760" + "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", + "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12" ], - "markers": "python_version >= '3.8'", - "version": "==2.1.0" + "markers": "python_version >= '3.10'", + "version": "==2.3.0" }, "mccabe": { "hashes": [ @@ -153,12 +153,21 @@ }, "pytest": { "hashes": [ - "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", - "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79" + "sha256:8f44522eafe4137b0f35c9ce3072931a788a21ee40a2ed279e817d3cc16ed21e", + "sha256:e5ccdf10b0bac554970ee88fc1a4ad0ee5d221f8ef22321f9b7e4584e19d7f96" + ], + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==9.0.0" + }, + "pytest-randomly": { + "hashes": [ + "sha256:174e57bb12ac2c26f3578188490bd333f0e80620c3f47340158a86eca0593cd8", + "sha256:e0dfad2fd4f35e07beff1e47c17fbafcf98f9bf4531fd369d9260e2f858bfcb7" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==8.4.2" + "version": "==4.0.1" }, "pytokens": { "hashes": [ diff --git a/src/main.py b/src/app.py similarity index 100% rename from src/main.py rename to src/app.py diff --git a/tests/test_main.py b/tests/test_app.py similarity index 71% rename from tests/test_main.py rename to tests/test_app.py index 6b80636..bd19f14 100644 --- a/tests/test_main.py +++ b/tests/test_app.py @@ -1,14 +1,14 @@ -import main +import app import pytest -import os +from unittest.mock import patch class TestValid: """正常系""" - def test_main(self): + def test_main_with_minimal_arguments(self): """環境変数も引数も指定しない場合、コンソールモードで動作する事""" - main.main(["tests/doc/"]) + app.main(["tests/doc/"]) @pytest.mark.parametrize( ["format"], @@ -17,16 +17,21 @@ def test_main(self): pytest.param("console"), ], ) - def test_format_args(self, format: str): - """フォーマット込みで行って最後まで完了する事。 + def test_main_with_valid_command_line_arguments(self, format: str): + """フォーマット込みで行う一貫テスト。 :param format: _description_ :type format: str """ - main.main(["--format", format, "tests/doc/"]) + app.main(["--format", format, "tests/doc/"]) class TestInValid: + @pytest.fixture + def setup_environ(self): + with patch.dict("os.environ", {"OUTPUT_FORMAT": "consol"}): + yield + """異常系""" @pytest.mark.parametrize( @@ -44,7 +49,7 @@ def test_raise_NotImplemented_format_args(self, format: str): :type format: str """ with pytest.raises(NotImplementedError): - main.main(["--format", format, "tests/doc/"]) + app.main(["--format", format, "tests/doc/"]) @pytest.mark.parametrize( ["format"], @@ -56,14 +61,14 @@ def test_raise_ValueError_format_args(self, format: str): :type format: str """ with pytest.raises(ValueError): - main.main(["--format", format, "tests/doc/"]) + app.main(["--format", format, "tests/doc/"]) + @pytest.mark.usefixtures("setup_environ") def test_raise_format_args_use_environment(self): """環境変数でフォーマット指定をした時に適切ではない値が入っていた場合、例外発生。 :param format: _description_ :type format: str """ - os.environ["OUTPUT_FORMAT"] = "consol" with pytest.raises(ValueError): - main.main(["tests/doc/"]) + app.main(["tests/doc/"]) From 8b7bab632417117dc631921a59c9f3ec3d79ea5f Mon Sep 17 00:00:00 2001 From: DogFortune Date: Tue, 11 Nov 2025 18:02:20 +0900 Subject: [PATCH 33/48] =?UTF-8?q?=E3=83=AA=E3=83=B3=E3=82=AF=E3=81=AE?= =?UTF-8?q?=E6=8A=BD=E5=87=BA=E5=87=A6=E7=90=86=E3=81=AE=E6=94=B9=E5=96=84?= =?UTF-8?q?=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 単純な文字列検索だとパターン的に足りないので改修中 --- .gitmodules | 3 +++ src/analyzer.py | 8 +++++++- submodules/awesome | 1 + tests/doc/doc2.md | 5 ++++- tests/test_awesome.py | 6 ++++++ 5 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 .gitmodules create mode 160000 submodules/awesome create mode 100644 tests/test_awesome.py diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..d401afa --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "submodules/awesome"] + path = submodules/awesome + url = git@github.com:sindresorhus/awesome.git diff --git a/src/analyzer.py b/src/analyzer.py index f8da3d4..3a7932a 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -4,6 +4,10 @@ from enums import Result from reporter import ReportData import dataclasses +import re + +URL_PATTERN = r"""((?:[a-zA-Z]{1,10}://|//)[^"'/]{1,}.[a-zA-Z]{2,}[^"']{0,})""" +URL_RE = re.compile(URL_PATTERN) @dataclasses.dataclass @@ -69,7 +73,9 @@ def extract_link(files: list) -> dict[str, LinkInfo]: lines = f.read().splitlines() links[f"{file_path}"] = [] for i, line in enumerate(lines): - if "http" in line: + result = URL_RE.search(line) + if result: + print(result.group()) url = line.split("](")[1].rstrip(")") if url in seen_urls: duplicate = True diff --git a/submodules/awesome b/submodules/awesome new file mode 160000 index 0000000..a52fe1c --- /dev/null +++ b/submodules/awesome @@ -0,0 +1 @@ +Subproject commit a52fe1ca90889ca67fe78a5366b41afe6ed2d079 diff --git a/tests/doc/doc2.md b/tests/doc/doc2.md index fac6cd7..a1e2a28 100644 --- a/tests/doc/doc2.md +++ b/tests/doc/doc2.md @@ -4,4 +4,7 @@ [example.com](https://example.com) [example.com](https://example.com) [example.com](http://example.com) -[example.jp](https://example.jp) \ No newline at end of file +[example.jp](https://example.jp) + +## InValid +available at [https://contributor-covenant.org/version/1/4][version] \ No newline at end of file diff --git a/tests/test_awesome.py b/tests/test_awesome.py new file mode 100644 index 0000000..b4fe8f7 --- /dev/null +++ b/tests/test_awesome.py @@ -0,0 +1,6 @@ +import app + + +def test_awesome(): + """awesomeを使ったほぼ実環境に近いパターン""" + app.main(["submodules/awesome/"]) From e23b6341737366f4620ff5b8856f477efcd155aa Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 09:16:50 +0900 Subject: [PATCH 34/48] =?UTF-8?q?URL=E6=8A=BD=E5=87=BA=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E3=82=92=E6=94=B9=E4=BF=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 正規表現による抽出方式に変更 --- src/analyzer.py | 5 ++--- tests/doc/doc2.md | 10 ---------- tests/{doc => sample_doc}/doc1.md | 2 -- tests/sample_doc/doc2.md | 6 ++++++ tests/syntax/url_syntax.md | 3 +++ tests/test_analyzer.py | 6 +++--- tests/test_app.py | 10 +++++----- tests/test_reporter.py | 2 +- 8 files changed, 20 insertions(+), 24 deletions(-) delete mode 100644 tests/doc/doc2.md rename tests/{doc => sample_doc}/doc1.md (80%) create mode 100644 tests/sample_doc/doc2.md create mode 100644 tests/syntax/url_syntax.md diff --git a/src/analyzer.py b/src/analyzer.py index 3a7932a..9066619 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -6,7 +6,7 @@ import dataclasses import re -URL_PATTERN = r"""((?:[a-zA-Z]{1,10}://|//)[^"'/]{1,}.[a-zA-Z]{2,}[^"']{0,})""" +URL_PATTERN = r'https?://[^\s\)\]>"]+' URL_RE = re.compile(URL_PATTERN) @@ -75,8 +75,7 @@ def extract_link(files: list) -> dict[str, LinkInfo]: for i, line in enumerate(lines): result = URL_RE.search(line) if result: - print(result.group()) - url = line.split("](")[1].rstrip(")") + url = result.group() if url in seen_urls: duplicate = True else: diff --git a/tests/doc/doc2.md b/tests/doc/doc2.md deleted file mode 100644 index a1e2a28..0000000 --- a/tests/doc/doc2.md +++ /dev/null @@ -1,10 +0,0 @@ -# TestDocument2 -## Require - -[example.com](https://example.com) -[example.com](https://example.com) -[example.com](http://example.com) -[example.jp](https://example.jp) - -## InValid -available at [https://contributor-covenant.org/version/1/4][version] \ No newline at end of file diff --git a/tests/doc/doc1.md b/tests/sample_doc/doc1.md similarity index 80% rename from tests/doc/doc1.md rename to tests/sample_doc/doc1.md index 44ff416..e9958a1 100644 --- a/tests/doc/doc1.md +++ b/tests/sample_doc/doc1.md @@ -1,4 +1,2 @@ # TestDocument -## Require - [example.com](https://example.com) \ No newline at end of file diff --git a/tests/sample_doc/doc2.md b/tests/sample_doc/doc2.md new file mode 100644 index 0000000..a15e051 --- /dev/null +++ b/tests/sample_doc/doc2.md @@ -0,0 +1,6 @@ +# TestDocument2 +## Duplicate +[example.com](https://example.com) +[example.com](https://example.com) +[example.com](http://example.com) +[example.jp](https://example.jp) \ No newline at end of file diff --git a/tests/syntax/url_syntax.md b/tests/syntax/url_syntax.md new file mode 100644 index 0000000..e4c6620 --- /dev/null +++ b/tests/syntax/url_syntax.md @@ -0,0 +1,3 @@ +# syntax list +## Syntax List for Links +available at [https://contributor-covenant.org/version/1/4][version] \ No newline at end of file diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index af1cd34..67e590f 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -27,7 +27,7 @@ def test_request(url: str, expected_result: str, expected_status_code: int): def test_check_links(): - files = analyzer.search("tests/doc/") + files = analyzer.search("tests/sample_doc/") links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) @@ -50,7 +50,7 @@ def test_check_links(): assert item.reason is not None -@pytest.mark.parametrize(["path"], [pytest.param("tests/doc/")]) +@pytest.mark.parametrize(["path"], [pytest.param("tests/sample_doc/")]) def test_search(path: str): files = analyzer.search(path) assert len(files) == 2 @@ -61,7 +61,7 @@ def test_extract_link(): # データ構造としてはdictのKeyにファイルのパス、Valueにリンクに関する情報が入っている。 # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK - files = analyzer.search("tests/doc/") + files = analyzer.search("tests/sample_doc/") links = analyzer.extract_link(files) assert len(links) == 2 diff --git a/tests/test_app.py b/tests/test_app.py index bd19f14..99f05da 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -8,7 +8,7 @@ class TestValid: def test_main_with_minimal_arguments(self): """環境変数も引数も指定しない場合、コンソールモードで動作する事""" - app.main(["tests/doc/"]) + app.main(["tests/sample_doc/"]) @pytest.mark.parametrize( ["format"], @@ -23,7 +23,7 @@ def test_main_with_valid_command_line_arguments(self, format: str): :param format: _description_ :type format: str """ - app.main(["--format", format, "tests/doc/"]) + app.main(["--format", format, "tests/sample_doc/"]) class TestInValid: @@ -49,7 +49,7 @@ def test_raise_NotImplemented_format_args(self, format: str): :type format: str """ with pytest.raises(NotImplementedError): - app.main(["--format", format, "tests/doc/"]) + app.main(["--format", format, "tests/sample_doc/"]) @pytest.mark.parametrize( ["format"], @@ -61,7 +61,7 @@ def test_raise_ValueError_format_args(self, format: str): :type format: str """ with pytest.raises(ValueError): - app.main(["--format", format, "tests/doc/"]) + app.main(["--format", format, "tests/sample_doc/"]) @pytest.mark.usefixtures("setup_environ") def test_raise_format_args_use_environment(self): @@ -71,4 +71,4 @@ def test_raise_format_args_use_environment(self): :type format: str """ with pytest.raises(ValueError): - app.main(["tests/doc/"]) + app.main(["tests/sample_doc/"]) diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 0166e26..50a54e6 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -8,7 +8,7 @@ class TestValid: def test_console(self): """コンソール出力テスト。文字列が想定している形である事""" - files = analyzer.search("tests/doc/") + files = analyzer.search("tests/sample_doc/") links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) From 6e982038221396a89076d94f0bdf70de7cb51ad0 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 10:04:09 +0900 Subject: [PATCH 35/48] =?UTF-8?q?tqdm=E3=81=AB=E3=82=88=E3=82=8B=E9=80=B2?= =?UTF-8?q?=E6=8D=97=E8=A1=A8=E8=A8=98=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Pipfile | 1 + Pipfile.lock | 22 ++++++++++++++++++++-- src/analyzer.py | 32 ++++++++++++++++++++++---------- 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/Pipfile b/Pipfile index 25ad93a..b85b861 100644 --- a/Pipfile +++ b/Pipfile @@ -4,6 +4,7 @@ verify_ssl = true name = "pypi" [packages] +tqdm = "*" [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 77264fb..dcaacb3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "205c9da775a56185b15b98b207406a87485419ccb7f873a51f03cdf8aac2b351" + "sha256": "815c7ae523202ad56147c32f3c8d9ecba4fa43e112e7c674bc93b8d345b009fc" }, "pipfile-spec": 6, "requires": { @@ -15,7 +15,25 @@ } ] }, - "default": {}, + "default": { + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "tqdm": { + "hashes": [ + "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", + "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==4.67.1" + } + }, "develop": { "black": { "hashes": [ diff --git a/src/analyzer.py b/src/analyzer.py index 9066619..fddafdf 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -5,6 +5,7 @@ from reporter import ReportData import dataclasses import re +from tqdm import tqdm URL_PATTERN = r'https?://[^\s\)\]>"]+' URL_RE = re.compile(URL_PATTERN) @@ -31,7 +32,7 @@ class LinkInfo: def request(url: str) -> AnalyzeResponse: try: - res = urlopen(url, timeout=5) + res = urlopen(url, timeout=3) return AnalyzeResponse(Result.OK, res.code, res.url, None) except HTTPError as e: # アクセスできて400や500系が来た時はこっち @@ -45,14 +46,25 @@ def check_links(links: dict[str, LinkInfo]) -> list[ReportData]: # リンクをチェックします。 # チェックすべきなのはFalseのものだけ。 results = [] - for file_path, link_items in links.items(): - for item in link_items: - if not item.duplicate: - res = request(item.url) - data = ReportData( - file_path, item.line, item.url, res.result, res.code, res.reason - ) - results.append(data) + with tqdm(links.items()) as links_prog: + for file_path, link_items in links_prog: + links_prog.set_description(file_path) + with tqdm(link_items) as link_items_prog: + for item in link_items_prog: + if not item.duplicate: + res = request(item.url) + data = ReportData( + file_path, + item.line, + item.url, + res.result, + res.code, + res.reason, + ) + link_items_prog.set_description( + f"url: {item.url} result: {res.result}" + ) + results.append(data) return results @@ -69,7 +81,7 @@ def extract_link(files: list) -> dict[str, LinkInfo]: links = {} seen_urls = set() for file_path in files: - with open(file_path, "r") as f: + with open(file_path, "r", encoding="utf-8") as f: lines = f.read().splitlines() links[f"{file_path}"] = [] for i, line in enumerate(lines): From 3162d8ccae97376edae54c02f42505c4fdbdc776 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 10:38:15 +0900 Subject: [PATCH 36/48] =?UTF-8?q?=E3=83=AC=E3=83=9D=E3=83=BC=E3=82=BF?= =?UTF-8?q?=E3=83=BC=E6=94=B9=E4=BF=AE=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JSON追加と使い方が2行になっているので使いづらい --- src/analyzer.py | 28 ++++++++++++---------------- src/reporter.py | 17 +++++++++++++---- tests/test_awesome.py | 2 ++ tests/test_reporter.py | 8 +++++++- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/analyzer.py b/src/analyzer.py index fddafdf..62f95e3 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -49,22 +49,18 @@ def check_links(links: dict[str, LinkInfo]) -> list[ReportData]: with tqdm(links.items()) as links_prog: for file_path, link_items in links_prog: links_prog.set_description(file_path) - with tqdm(link_items) as link_items_prog: - for item in link_items_prog: - if not item.duplicate: - res = request(item.url) - data = ReportData( - file_path, - item.line, - item.url, - res.result, - res.code, - res.reason, - ) - link_items_prog.set_description( - f"url: {item.url} result: {res.result}" - ) - results.append(data) + for item in tqdm(link_items): + if not item.duplicate: + res = request(item.url) + data = ReportData( + file_path, + item.line, + item.url, + res.result, + res.code, + res.reason, + ) + results.append(data) return results diff --git a/src/reporter.py b/src/reporter.py index 55876fd..2c4a9c2 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -1,5 +1,6 @@ import dataclasses -from pprint import pprint as pp +from pprint import pformat +import json @dataclasses.dataclass @@ -17,7 +18,15 @@ def __init__(self, data: list[ReportData]): self.data_list = data def generate(self) -> str: - pp(self.data_list) - line = "" - + line = pformat(self.data_list) return line + + +class Json: + def __init__(self, data: list[ReportData], output_path: str): + self.data_list = data + self.path = output_path + + def generate(self): + with open(self.path, "w", encoding="utf-8") as f: + json.dump(self.data_list, f, indent=4) diff --git a/tests/test_awesome.py b/tests/test_awesome.py index b4fe8f7..a71d2c5 100644 --- a/tests/test_awesome.py +++ b/tests/test_awesome.py @@ -1,6 +1,8 @@ import app +import pytest +@pytest.mark.skip() def test_awesome(): """awesomeを使ったほぼ実環境に近いパターン""" app.main(["submodules/awesome/"]) diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 50a54e6..4c51b63 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -9,9 +9,15 @@ class TestValid: def test_console(self): """コンソール出力テスト。文字列が想定している形である事""" files = analyzer.search("tests/sample_doc/") - links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) + report = reporter.Console(results_report_data) output_line = report.generate() + assert output_line is not None + + def test_json(self): + files = analyzer.search("tests/sample_doc/") + links = analyzer.extract_link(files) + results_report_data = analyzer.check_links(links) From 6d981d5084bd02d73453630fc9c22c3cfc5e68e8 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 13:33:45 +0900 Subject: [PATCH 37/48] =?UTF-8?q?=E3=83=AC=E3=83=9D=E3=83=BC=E3=82=BF?= =?UTF-8?q?=E3=83=BC=E6=94=B9=E4=BF=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit クラスではなく関数でシンプルに呼び出せるように --- src/app.py | 3 +-- src/reporter.py | 22 +++++++--------------- tests/test_reporter.py | 12 ++++++++++-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/app.py b/src/app.py index e716a18..95fd433 100644 --- a/src/app.py +++ b/src/app.py @@ -9,8 +9,7 @@ def __output(data: list[ReportData], format: OutputType): match format: case OutputType.Console: - report = reporter.Console(data) - line = report.generate() + line = reporter.console(data) print(line) diff --git a/src/reporter.py b/src/reporter.py index 2c4a9c2..365900c 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -13,20 +13,12 @@ class ReportData: reason: str -class Console: - def __init__(self, data: list[ReportData]): - self.data_list = data +def console(data: list[ReportData]): + # TODO: 出力形式は仮でpformatを設定中。 + line = pformat(data) + return line - def generate(self) -> str: - line = pformat(self.data_list) - return line - -class Json: - def __init__(self, data: list[ReportData], output_path: str): - self.data_list = data - self.path = output_path - - def generate(self): - with open(self.path, "w", encoding="utf-8") as f: - json.dump(self.data_list, f, indent=4) +def json_dump(data: list[ReportData], output_path: str): + with open(output_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=4) diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 4c51b63..ddb46f8 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -1,6 +1,9 @@ import reporter import analyzer from pprint import pprint as pp +from tempfile import TemporaryDirectory +from pathlib import Path +import os class TestValid: @@ -12,8 +15,7 @@ def test_console(self): links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) - report = reporter.Console(results_report_data) - output_line = report.generate() + output_line = reporter.console(results_report_data) assert output_line is not None @@ -21,3 +23,9 @@ def test_json(self): files = analyzer.search("tests/sample_doc/") links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) + with TemporaryDirectory() as dir: + output_path = Path(dir, "result.json") + + reporter.json_dump(results_report_data, output_path) + + assert os.path.isfile(output_path) is True From 40f8d812dacd9db4b85e6b9a9ba2b9e5ba7f192d Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 14:44:55 +0900 Subject: [PATCH 38/48] =?UTF-8?q?=E8=87=AA=E4=BD=9C=E3=82=AF=E3=83=A9?= =?UTF-8?q?=E3=82=B9=E3=82=92=E5=90=AB=E3=82=93=E3=81=A0JSON=E3=81=AE?= =?UTF-8?q?=E6=9B=B8=E3=81=8D=E5=87=BA=E3=81=97=E6=A9=9F=E8=83=BD=E5=AE=9F?= =?UTF-8?q?=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit リストになっているのでトップにキーを加えたい --- Pipfile | 1 + Pipfile.lock | 50 +++++++++++++++++++++++++++++++++++++++++- src/reporter.py | 11 ++++++---- tests/test_reporter.py | 4 ++-- 4 files changed, 59 insertions(+), 7 deletions(-) diff --git a/Pipfile b/Pipfile index b85b861..ba1021a 100644 --- a/Pipfile +++ b/Pipfile @@ -5,6 +5,7 @@ name = "pypi" [packages] tqdm = "*" +dataclasses-json = "*" [dev-packages] pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index dcaacb3..45bb5fc 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "815c7ae523202ad56147c32f3c8d9ecba4fa43e112e7c674bc93b8d345b009fc" + "sha256": "a2d0cee61c4fa721f0c28a5774070546e974a9bc28d68b59a8e52bff137e588c" }, "pipfile-spec": 6, "requires": { @@ -24,6 +24,39 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", "version": "==0.4.6" }, + "dataclasses-json": { + "hashes": [ + "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", + "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0" + ], + "index": "pypi", + "markers": "python_version >= '3.7' and python_version < '4.0'", + "version": "==0.6.7" + }, + "marshmallow": { + "hashes": [ + "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c", + "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6" + ], + "markers": "python_version >= '3.9'", + "version": "==3.26.1" + }, + "mypy-extensions": { + "hashes": [ + "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", + "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558" + ], + "markers": "python_version >= '3.8'", + "version": "==1.1.0" + }, + "packaging": { + "hashes": [ + "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", + "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + ], + "markers": "python_version >= '3.8'", + "version": "==25.0" + }, "tqdm": { "hashes": [ "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", @@ -32,6 +65,21 @@ "index": "pypi", "markers": "python_version >= '3.7'", "version": "==4.67.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", + "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548" + ], + "markers": "python_version >= '3.9'", + "version": "==4.15.0" + }, + "typing-inspect": { + "hashes": [ + "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", + "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78" + ], + "version": "==0.9.0" } }, "develop": { diff --git a/src/reporter.py b/src/reporter.py index 365900c..1c3c4eb 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -1,9 +1,11 @@ -import dataclasses +from dataclasses import dataclass +from dataclasses_json import dataclass_json from pprint import pformat import json -@dataclasses.dataclass +@dataclass_json +@dataclass class ReportData: file: str line: int @@ -19,6 +21,7 @@ def console(data: list[ReportData]): return line -def json_dump(data: list[ReportData], output_path: str): +def dump_json(data: list[ReportData], output_path: str): + json_str = ReportData.schema().dumps(data, many=True, indent=4) with open(output_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=4) + f.write(json_str) diff --git a/tests/test_reporter.py b/tests/test_reporter.py index ddb46f8..7c03bfe 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -24,8 +24,8 @@ def test_json(self): links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) with TemporaryDirectory() as dir: - output_path = Path(dir, "result.json") + output_path = Path("tests", "result.json") - reporter.json_dump(results_report_data, output_path) + reporter.dump_json(results_report_data, output_path) assert os.path.isfile(output_path) is True From 5b93641b4bdbf15ce32545b5ff17c30f23542af1 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 15:03:54 +0900 Subject: [PATCH 39/48] =?UTF-8?q?=E3=82=AA=E3=83=96=E3=82=B8=E3=82=A7?= =?UTF-8?q?=E3=82=AF=E3=83=88=E3=81=A8=E3=81=97=E3=81=A6JSON=E3=82=92?= =?UTF-8?q?=E5=87=BA=E5=8A=9B=E3=81=99=E3=82=8B=E3=81=9F=E3=82=81=E3=81=AE?= =?UTF-8?q?=E3=83=87=E3=83=BC=E3=82=BF=E3=82=AF=E3=83=A9=E3=82=B9=E3=81=A8?= =?UTF-8?q?=E3=82=AB=E3=82=B9=E3=82=BF=E3=83=A0=E3=82=A8=E3=83=B3=E3=82=B3?= =?UTF-8?q?=E3=83=BC=E3=83=80=E3=83=BC=E3=81=AE=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit JSONで出力できることを確認 --- src/reporter.py | 20 +++++++++++++++++++- tests/test_reporter.py | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/reporter.py b/src/reporter.py index 1c3c4eb..f9e8e71 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from dataclasses_json import dataclass_json from pprint import pformat +from typing import List import json @@ -15,6 +16,20 @@ class ReportData: reason: str +@dataclass_json +@dataclass +class ReportCollection: + Reports: List[ReportData] + + +class CustomEncoder(json.JSONEncoder): + def default(self, obj): + # 例外オブジェクトを文字列に変換 + if isinstance(obj, Exception): + return str(obj) + return super().default(obj) + + def console(data: list[ReportData]): # TODO: 出力形式は仮でpformatを設定中。 line = pformat(data) @@ -22,6 +37,9 @@ def console(data: list[ReportData]): def dump_json(data: list[ReportData], output_path: str): - json_str = ReportData.schema().dumps(data, many=True, indent=4) + collection = ReportCollection(Reports=data) + json_str = json.dumps( + collection.to_dict(), indent=4, ensure_ascii=False, cls=CustomEncoder + ) with open(output_path, "w", encoding="utf-8") as f: f.write(json_str) diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 7c03bfe..dd5287a 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -24,7 +24,7 @@ def test_json(self): links = analyzer.extract_link(files) results_report_data = analyzer.check_links(links) with TemporaryDirectory() as dir: - output_path = Path("tests", "result.json") + output_path = Path(dir, "result.json") reporter.dump_json(results_report_data, output_path) From c3023bf1680e7607b3a16a4599fb6b484d5cb059 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Wed, 12 Nov 2025 17:55:13 +0900 Subject: [PATCH 40/48] =?UTF-8?q?=E3=83=95=E3=82=A9=E3=83=BC=E3=83=9E?= =?UTF-8?q?=E3=83=83=E3=83=88=E6=8C=87=E5=AE=9A=E3=81=A7=E3=81=AF=E3=81=AA?= =?UTF-8?q?=E3=81=8Fjson=E3=82=92=E5=87=BA=E5=8A=9B=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=81=8B=E5=90=A6=E3=81=8B=E3=81=A7=E6=8C=87=E5=AE=9A=E3=81=99?= =?UTF-8?q?=E3=82=8B=E6=96=B9=E5=BC=8F=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit まだJSONは保存できないのでRED --- src/app.py | 17 ++++++------- src/enums.py | 1 + tests/test_app.py | 61 +++++++---------------------------------------- 3 files changed, 17 insertions(+), 62 deletions(-) diff --git a/src/app.py b/src/app.py index 95fd433..3c1e852 100644 --- a/src/app.py +++ b/src/app.py @@ -13,20 +13,17 @@ def __output(data: list[ReportData], format: OutputType): print(line) -def __format__setting(format: str): - match format.upper(): - case "CONSOLE": - return OutputType.Console - case "JSON" | "YAML": - raise NotImplementedError - case _: - raise ValueError +def __format__setting(args): + if args.report_json: + return OutputType.Json + else: + return OutputType.Console def create_parser(): parser = argparse.ArgumentParser() parser.add_argument("src", default=os.environ.get("SRC_DIR", ".")) - parser.add_argument("--format", default=os.environ.get("OUTPUT_FORMAT", "CONSOLE")) + parser.add_argument("--report-json", type=str, help="output report file") return parser @@ -34,7 +31,7 @@ def main(args=None): parser = create_parser() parsed_args = parser.parse_args(args) - format = __format__setting(parsed_args.format) + format = __format__setting(parsed_args) src = parsed_args.src files = analyzer.search(src) diff --git a/src/enums.py b/src/enums.py index 5a53929..d752ede 100644 --- a/src/enums.py +++ b/src/enums.py @@ -3,6 +3,7 @@ class OutputType(Enum): Console = auto() + Json = auto() class Result(StrEnum): diff --git a/tests/test_app.py b/tests/test_app.py index 99f05da..0c9cafb 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1,4 +1,7 @@ import app +import os +from tempfile import TemporaryDirectory +from pathlib import Path import pytest from unittest.mock import patch @@ -10,20 +13,13 @@ def test_main_with_minimal_arguments(self): """環境変数も引数も指定しない場合、コンソールモードで動作する事""" app.main(["tests/sample_doc/"]) - @pytest.mark.parametrize( - ["format"], - [ - pytest.param("CONSOLE"), - pytest.param("console"), - ], - ) - def test_main_with_valid_command_line_arguments(self, format: str): - """フォーマット込みで行う一貫テスト。 + def test_main_with_output_json(self): + """JSONファイルが出力されている事""" + with TemporaryDirectory() as dir: + output_path = Path(dir, "result.json") + app.main(["tests/sample_doc/", "--report-json", str(output_path)]) - :param format: _description_ - :type format: str - """ - app.main(["--format", format, "tests/sample_doc/"]) + assert os.path.isfile(output_path) is True class TestInValid: @@ -33,42 +29,3 @@ def setup_environ(self): yield """異常系""" - - @pytest.mark.parametrize( - ["format"], - [ - pytest.param("JSON"), - pytest.param("json"), - pytest.param("YAML"), - pytest.param("yaml"), - ], - ) - def test_raise_NotImplemented_format_args(self, format: str): - """未対応フォーマットを指定した場合、未実装を表す例外発生。 - :param format: _description_ - :type format: str - """ - with pytest.raises(NotImplementedError): - app.main(["--format", format, "tests/sample_doc/"]) - - @pytest.mark.parametrize( - ["format"], - [pytest.param("consol"), pytest.param("sample")], - ) - def test_raise_ValueError_format_args(self, format: str): - """適切ではない値が来た場合、例外発生。 - :param format: _description_ - :type format: str - """ - with pytest.raises(ValueError): - app.main(["--format", format, "tests/sample_doc/"]) - - @pytest.mark.usefixtures("setup_environ") - def test_raise_format_args_use_environment(self): - """環境変数でフォーマット指定をした時に適切ではない値が入っていた場合、例外発生。 - - :param format: _description_ - :type format: str - """ - with pytest.raises(ValueError): - app.main(["tests/sample_doc/"]) From f89b6b3bbeec4f60fc907cdd71132df27b237d4d Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 10:29:55 +0900 Subject: [PATCH 41/48] =?UTF-8?q?help=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/app.py b/src/app.py index 3c1e852..f1b384f 100644 --- a/src/app.py +++ b/src/app.py @@ -23,7 +23,9 @@ def __format__setting(args): def create_parser(): parser = argparse.ArgumentParser() parser.add_argument("src", default=os.environ.get("SRC_DIR", ".")) - parser.add_argument("--report-json", type=str, help="output report file") + parser.add_argument( + "--report-json", type=str, help="Create json report file at given path" + ) return parser From f4f45f3f9319faf169ce4b8b5f8e444f52ade5ad Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 11:14:20 +0900 Subject: [PATCH 42/48] =?UTF-8?q?JSON=E5=87=BA=E5=8A=9B=E6=A9=9F=E8=83=BD?= =?UTF-8?q?=E3=81=AE=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 7 +++++-- src/reporter.py | 3 +++ tests/test_reporter.py | 41 ++++++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/app.py b/src/app.py index f1b384f..de457b5 100644 --- a/src/app.py +++ b/src/app.py @@ -6,11 +6,14 @@ from reporter import ReportData -def __output(data: list[ReportData], format: OutputType): +def __output(data: list[ReportData], format: OutputType, args): match format: case OutputType.Console: line = reporter.console(data) print(line) + case OutputType.Json: + output_path = args.report_json + reporter.dump_json(data, output_path) def __format__setting(args): @@ -39,7 +42,7 @@ def main(args=None): files = analyzer.search(src) links = analyzer.extract_link(files) report_data_list = analyzer.check_links(links) - __output(report_data_list, format) + __output(report_data_list, format, parsed_args) if __name__ == "__main__": diff --git a/src/reporter.py b/src/reporter.py index f9e8e71..03dea75 100644 --- a/src/reporter.py +++ b/src/reporter.py @@ -3,6 +3,7 @@ from pprint import pformat from typing import List import json +import os @dataclass_json @@ -37,6 +38,8 @@ def console(data: list[ReportData]): def dump_json(data: list[ReportData], output_path: str): + if os.path.splitext(output_path)[-1].lower() != ".json": + raise ValueError collection = ReportCollection(Reports=data) json_str = json.dumps( collection.to_dict(), indent=4, ensure_ascii=False, cls=CustomEncoder diff --git a/tests/test_reporter.py b/tests/test_reporter.py index dd5287a..aa68259 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -1,3 +1,4 @@ +import pytest import reporter import analyzer from pprint import pprint as pp @@ -6,26 +7,44 @@ import os +@pytest.fixture(scope="function") +def setup_report_data(): + """レポート確認用データ作成 + + :yield: _description_ + :rtype: _type_ + """ + files = analyzer.search("tests/sample_doc/") + links = analyzer.extract_link(files) + results_report_data = analyzer.check_links(links) + return results_report_data + + class TestValid: """正常系""" - def test_console(self): + def test_console(self, setup_report_data): """コンソール出力テスト。文字列が想定している形である事""" - files = analyzer.search("tests/sample_doc/") - links = analyzer.extract_link(files) - results_report_data = analyzer.check_links(links) - - output_line = reporter.console(results_report_data) + output_line = reporter.console(setup_report_data) assert output_line is not None - def test_json(self): - files = analyzer.search("tests/sample_doc/") - links = analyzer.extract_link(files) - results_report_data = analyzer.check_links(links) + def test_json(self, setup_report_data): with TemporaryDirectory() as dir: output_path = Path(dir, "result.json") - reporter.dump_json(results_report_data, output_path) + reporter.dump_json(setup_report_data, output_path) assert os.path.isfile(output_path) is True + + +class TestInvalid: + """異常系""" + + def test_raises_exception_for_non_json_extension(self, setup_report_data): + """拡張子がjson以外だった場合、例外発生""" + with TemporaryDirectory() as dir: + output_path = Path(dir, "result.jso") + + with pytest.raises(ValueError): + reporter.dump_json(setup_report_data, output_path) From 08da21115d06b3f41eacd78ebd4a29a2f95e2c33 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 11:36:14 +0900 Subject: [PATCH 43/48] =?UTF-8?q?JSON=E5=87=BA=E5=8A=9B=E5=8B=95=E4=BD=9C?= =?UTF-8?q?=E7=A2=BA=E8=AA=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_awesome.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_awesome.py b/tests/test_awesome.py index a71d2c5..1b340ec 100644 --- a/tests/test_awesome.py +++ b/tests/test_awesome.py @@ -5,4 +5,4 @@ @pytest.mark.skip() def test_awesome(): """awesomeを使ったほぼ実環境に近いパターン""" - app.main(["submodules/awesome/"]) + app.main(["submodules/awesome/", "--report-json", "tests/awesone.json"]) From 789a3aed52261c6aac70556ad4b14dab7e04ce8d Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 11:44:17 +0900 Subject: [PATCH 44/48] =?UTF-8?q?=E3=83=91=E3=83=83=E3=82=B1=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=A8=E3=81=97=E3=81=A6=E5=AE=9F=E8=A1=8C=E3=81=A7?= =?UTF-8?q?=E3=81=8D=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E8=AA=BF=E6=95=B4?= =?UTF-8?q?=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/__main__.py | 4 ++++ src/app.py | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 src/__main__.py diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..37c2a2d --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,4 @@ +from .app import main + +if __name__ == "__main__": + main() diff --git a/src/app.py b/src/app.py index de457b5..256b8a8 100644 --- a/src/app.py +++ b/src/app.py @@ -43,7 +43,3 @@ def main(args=None): links = analyzer.extract_link(files) report_data_list = analyzer.check_links(links) __output(report_data_list, format, parsed_args) - - -if __name__ == "__main__": - main() From cd1c7938c1046a001f92e0af2769762c970c6799 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 11:44:43 +0900 Subject: [PATCH 45/48] =?UTF-8?q?GitHub=20Actions=E3=81=AB=E3=82=88?= =?UTF-8?q?=E3=82=8B=E5=AE=9F=E8=A1=8C=E3=81=AF=E4=B8=80=E6=97=A6=E5=BB=83?= =?UTF-8?q?=E6=AD=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- action.yml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 action.yml diff --git a/action.yml b/action.yml deleted file mode 100644 index 405b8ce..0000000 --- a/action.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: LinkWatch -description: "Check whether you can correctly access the URL listed in the document file." -author: DogFortune - -inputs: - output-option: - description: output format option - required: false - src: - description: check directory - required: true - default: "." - -runs: - using: docker - image: Dockerfile - env: - SRC_DIR: ${{ inputs.src }} - OUTOUT_FORMAT: ${{ inputs.output-option }} \ No newline at end of file From 7b160d488d36719ec80af20274004097e6aa9b6c Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 13:50:29 +0900 Subject: [PATCH 46/48] update pyproject.toml --- pyproject.toml | 13 +++++++++++++ src/app.py | 4 ++++ tests/test_app.py | 11 ----------- tests/test_reporter.py | 1 - 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2fe857e..75ea34b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,16 @@ +[project] +name = "linkwatch" +authors = [{ name = "DogFortune" }] +description = "Perform connectivity checks on URLs listed in the Markdown" +license-files = ["LICENSE"] +readme = "README.md" +requires-python = ">=3.10" +version = "0.0.1" +dependencies = ["tqdm", "dataclasses-json"] + +[project.scripts] +linkwatch = "src.app:main" + [tool.pytest.ini_options] pythonpath = "src" testpaths = ["tests"] diff --git a/src/app.py b/src/app.py index 256b8a8..de457b5 100644 --- a/src/app.py +++ b/src/app.py @@ -43,3 +43,7 @@ def main(args=None): links = analyzer.extract_link(files) report_data_list = analyzer.check_links(links) __output(report_data_list, format, parsed_args) + + +if __name__ == "__main__": + main() diff --git a/tests/test_app.py b/tests/test_app.py index 0c9cafb..2420a98 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2,8 +2,6 @@ import os from tempfile import TemporaryDirectory from pathlib import Path -import pytest -from unittest.mock import patch class TestValid: @@ -20,12 +18,3 @@ def test_main_with_output_json(self): app.main(["tests/sample_doc/", "--report-json", str(output_path)]) assert os.path.isfile(output_path) is True - - -class TestInValid: - @pytest.fixture - def setup_environ(self): - with patch.dict("os.environ", {"OUTPUT_FORMAT": "consol"}): - yield - - """異常系""" diff --git a/tests/test_reporter.py b/tests/test_reporter.py index aa68259..0b2d51a 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -1,7 +1,6 @@ import pytest import reporter import analyzer -from pprint import pprint as pp from tempfile import TemporaryDirectory from pathlib import Path import os From 5047e5c448fcb3d6e0481dbcf10c62c2be5706d9 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 13:59:52 +0900 Subject: [PATCH 47/48] =?UTF-8?q?=E3=83=AA=E3=83=95=E3=82=A1=E3=82=AF?= =?UTF-8?q?=E3=82=BF=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/analyzer.py | 59 ++++++++++++++++++++++++++++++------------ src/app.py | 20 ++++++++++++-- tests/test_analyzer.py | 4 +-- tests/test_reporter.py | 2 +- 4 files changed, 63 insertions(+), 22 deletions(-) diff --git a/src/analyzer.py b/src/analyzer.py index 62f95e3..f88d974 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -3,7 +3,7 @@ from urllib.error import HTTPError, URLError from enums import Result from reporter import ReportData -import dataclasses +from dataclasses import dataclass import re from tqdm import tqdm @@ -11,7 +11,7 @@ URL_RE = re.compile(URL_PATTERN) -@dataclasses.dataclass +@dataclass class AnalyzeResponse: """リンクにアクセスした結果""" @@ -21,9 +21,9 @@ class AnalyzeResponse: reason: str | None -@dataclasses.dataclass -class LinkInfo: - """ドキュメントから抽出したリンク情報""" +@dataclass +class URLInfo: + """ドキュメントから抽出したURL情報""" line: int url: str @@ -31,6 +31,13 @@ class LinkInfo: def request(url: str) -> AnalyzeResponse: + """疎通確認処理 + + :param url: 確認対象URL + :type url: str + :return: 結果 + :rtype: AnalyzeResponse + """ try: res = urlopen(url, timeout=3) return AnalyzeResponse(Result.OK, res.code, res.url, None) @@ -42,9 +49,14 @@ def request(url: str) -> AnalyzeResponse: return AnalyzeResponse(Result.NG, None, url, e.reason) -def check_links(links: dict[str, LinkInfo]) -> list[ReportData]: - # リンクをチェックします。 - # チェックすべきなのはFalseのものだけ。 +def check_links(links: dict[str, URLInfo]) -> list[ReportData]: + """URLの疎通確認を行います。確認を行うのは重複していないものだけです。 + + :param links: URLリスト + :type links: dict[str, URLInfo] + :return: 確認結果 + :rtype: list[ReportData] + """ results = [] with tqdm(links.items()) as links_prog: for file_path, link_items in links_prog: @@ -64,18 +76,31 @@ def check_links(links: dict[str, LinkInfo]) -> list[ReportData]: return results -def search(path: str, filter="*.md"): - # 指定したディレクトリから検査対象のファイルを抽出します。デフォルトはmdです。 +def search(path: str, filter="*.md") -> list: + """指定したディレクトリからMarkdownドキュメントを抽出します。 + + :param path: 検索対象 + :type path: str + :param filter: _description_, defaults to "*.md" + :type filter: str, optional + :return: ファイルパスのリスト + :rtype: list + """ p = Path(path) files = [str(item) for item in p.rglob(filter)] return files -def extract_link(files: list) -> dict[str, LinkInfo]: - # 各ファイルからリンクを抽出します。 - # 重複しているリンクはフラグがTrueになります。 +def extract_url(files: list) -> dict[str, URLInfo]: + """ファイルからURLを抽出します。 + + :param files: _description_ + :type files: list + :return: _description_ + :rtype: dict[str, LinkInfo] + """ links = {} - seen_urls = set() + duplicated_urls = set() for file_path in files: with open(file_path, "r", encoding="utf-8") as f: lines = f.read().splitlines() @@ -84,11 +109,11 @@ def extract_link(files: list) -> dict[str, LinkInfo]: result = URL_RE.search(line) if result: url = result.group() - if url in seen_urls: + if url in duplicated_urls: duplicate = True else: duplicate = False - seen_urls.add(url) - data = LinkInfo(i + 1, url, duplicate) + duplicated_urls.add(url) + data = URLInfo(i + 1, url, duplicate) links[f"{file_path}"].append(data) return links diff --git a/src/app.py b/src/app.py index de457b5..e5f1390 100644 --- a/src/app.py +++ b/src/app.py @@ -7,6 +7,15 @@ def __output(data: list[ReportData], format: OutputType, args): + """出力 + + :param data: 確認結果リスト + :type data: list[ReportData] + :param format: 出力形式 + :type format: OutputType + :param args: 結果 + :type args: + """ match format: case OutputType.Console: line = reporter.console(data) @@ -16,7 +25,14 @@ def __output(data: list[ReportData], format: OutputType, args): reporter.dump_json(data, output_path) -def __format__setting(args): +def __format__setting(args) -> OutputType: + """結果の出力形式の設定 + + :param args: Arguments + :type args: _type_ + :return: 出力形式 + :rtype: OutputType + """ if args.report_json: return OutputType.Json else: @@ -40,7 +56,7 @@ def main(args=None): src = parsed_args.src files = analyzer.search(src) - links = analyzer.extract_link(files) + links = analyzer.extract_url(files) report_data_list = analyzer.check_links(links) __output(report_data_list, format, parsed_args) diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 67e590f..cfbd1ad 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -28,7 +28,7 @@ def test_request(url: str, expected_result: str, expected_status_code: int): def test_check_links(): files = analyzer.search("tests/sample_doc/") - links = analyzer.extract_link(files) + links = analyzer.extract_url(files) results_report_data = analyzer.check_links(links) # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) @@ -62,7 +62,7 @@ def test_extract_link(): # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK files = analyzer.search("tests/sample_doc/") - links = analyzer.extract_link(files) + links = analyzer.extract_url(files) assert len(links) == 2 diff --git a/tests/test_reporter.py b/tests/test_reporter.py index 0b2d51a..dd7502c 100644 --- a/tests/test_reporter.py +++ b/tests/test_reporter.py @@ -14,7 +14,7 @@ def setup_report_data(): :rtype: _type_ """ files = analyzer.search("tests/sample_doc/") - links = analyzer.extract_link(files) + links = analyzer.extract_url(files) results_report_data = analyzer.check_links(links) return results_report_data From 129fbbbc6f2e590e8f211f6faa6418609a390672 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 13 Nov 2025 14:07:21 +0900 Subject: [PATCH 48/48] Delete Submodule --- .gitmodules | 3 --- Dockerfile | 6 ------ submodules/awesome | 1 - tests/test_awesome.py | 8 -------- 4 files changed, 18 deletions(-) delete mode 100644 Dockerfile delete mode 160000 submodules/awesome delete mode 100644 tests/test_awesome.py diff --git a/.gitmodules b/.gitmodules index d401afa..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "submodules/awesome"] - path = submodules/awesome - url = git@github.com:sindresorhus/awesome.git diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 84877b6..0000000 --- a/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM gcr.io/distroless/python3-debian12:latest - -WORKDIR /app -COPY src/ . - -CMD [ "app.py" ] \ No newline at end of file diff --git a/submodules/awesome b/submodules/awesome deleted file mode 160000 index a52fe1c..0000000 --- a/submodules/awesome +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a52fe1ca90889ca67fe78a5366b41afe6ed2d079 diff --git a/tests/test_awesome.py b/tests/test_awesome.py deleted file mode 100644 index 1b340ec..0000000 --- a/tests/test_awesome.py +++ /dev/null @@ -1,8 +0,0 @@ -import app -import pytest - - -@pytest.mark.skip() -def test_awesome(): - """awesomeを使ったほぼ実環境に近いパターン""" - app.main(["submodules/awesome/", "--report-json", "tests/awesone.json"])