diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..483c5fc --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +venv +.pytest_cache +*.pyc \ No newline at end of file diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..7a29d36 --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +exclude = .git,__pycache__,.pytest_cache,docs/source/conf.py,old,build,dist,tests,tmp,.aws-sam,.venv +max-complexity = 15 +max-line-length = 120 +show-source = True diff --git a/.github/workflows/lint-test-format.yml b/.github/workflows/lint-test-format.yml new file mode 100644 index 0000000..66f50ce --- /dev/null +++ b/.github/workflows/lint-test-format.yml @@ -0,0 +1,77 @@ +name: test-lint-format + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main +permissions: + checks: write + pull-requests: write +jobs: + lint: + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pipenv" + - name: install pipenv + run: | + python -m pip install --upgrade pip + python -m pip install pipenv + - name: install packages + run: pipenv sync --dev + - name: run lint + run: pipenv run lint + format: + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pipenv" + - name: install pipenv + run: | + python -m pip install --upgrade pip + python -m pip install pipenv + - name: install packages + run: pipenv sync --dev + - name: run format + run: pipenv run format + test: + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + cache: "pipenv" + - name: install pipenv + run: | + python -m pip install --upgrade pip + python -m pip install pipenv + - name: install packages + run: pipenv sync --dev + - name: Cache Docker images. + uses: AndreKurait/docker-cache@0.6.0 + with: + key: ${{ hashFiles('docker-compose.yml') }} + - name: start docker compose + run: docker compose up -d + - name: run test + run: pipenv run pytest -q --junit-xml pytest.xml + continue-on-error: true + - name: Publish Test Report + uses: mikepenz/action-junit-report@v5 + if: success() || failure() + with: + report_paths: "pytest.xml" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..64d49ae --- /dev/null +++ b/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e69de29 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..00b5018 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 DogFortune + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..ba1021a --- /dev/null +++ b/Pipfile @@ -0,0 +1,21 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +tqdm = "*" +dataclasses-json = "*" + +[dev-packages] +pytest = "*" +flake8 = "*" +black = "*" +pytest-randomly = "*" + +[requires] +python_version = "3.13" + +[scripts] +lint = "flake8 -v" +format = "black ./src/ -v" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..45bb5fc --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,247 @@ +{ + "_meta": { + "hash": { + "sha256": "a2d0cee61c4fa721f0c28a5774070546e974a9bc28d68b59a8e52bff137e588c" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.13" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "dataclasses-json": { + "hashes": [ + "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", + "sha256:b6b3e528266ea45b9535223bc53ca645f5208833c29229e847b3f26a1cc55fc0" + ], + "index": "pypi", + "markers": "python_version >= '3.7' and python_version < '4.0'", + "version": "==0.6.7" + }, + "marshmallow": { + "hashes": [ + "sha256:3350409f20a70a7e4e11a27661187b77cdcaeb20abca41c1454fe33636bea09c", + "sha256:e6d8affb6cb61d39d26402096dc0aee12d5a26d490a121f118d2e81dc0719dc6" + ], + "markers": "python_version >= '3.9'", + "version": "==3.26.1" + }, + "mypy-extensions": { + "hashes": [ + "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", + "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558" + ], + "markers": "python_version >= '3.8'", + "version": "==1.1.0" + }, + "packaging": { + "hashes": [ + "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", + "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + ], + "markers": "python_version >= '3.8'", + "version": "==25.0" + }, + "tqdm": { + "hashes": [ + "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", + "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==4.67.1" + }, + "typing-extensions": { + "hashes": [ + "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", + "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548" + ], + "markers": "python_version >= '3.9'", + "version": "==4.15.0" + }, + "typing-inspect": { + "hashes": [ + "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", + "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78" + ], + "version": "==0.9.0" + } + }, + "develop": { + "black": { + "hashes": [ + "sha256:0172a012f725b792c358d57fe7b6b6e8e67375dd157f64fa7a3097b3ed3e2175", + "sha256:0474bca9a0dd1b51791fcc507a4e02078a1c63f6d4e4ae5544b9848c7adfb619", + "sha256:154b06d618233fe468236ba1f0e40823d4eb08b26f5e9261526fde34916b9140", + "sha256:1b9dc70c21ef8b43248f1d86aedd2aaf75ae110b958a7909ad8463c4aa0880b0", + "sha256:2ab0ce111ef026790e9b13bd216fa7bc48edd934ffc4cbf78808b235793cbc92", + "sha256:3bec74ee60f8dfef564b573a96b8930f7b6a538e846123d5ad77ba14a8d7a64f", + "sha256:456386fe87bad41b806d53c062e2974615825c7a52159cde7ccaeb0695fa28fa", + "sha256:474b34c1342cdc157d307b56c4c65bce916480c4a8f6551fdc6bf9b486a7c4ae", + "sha256:77e7060a00c5ec4b3367c55f39cf9b06e68965a4f2e61cecacd6d0d9b7ec945a", + "sha256:846d58e3ce7879ec1ffe816bb9df6d006cd9590515ed5d17db14e17666b2b357", + "sha256:8e46eecf65a095fa62e53245ae2795c90bdecabd53b50c448d0a8bcd0d2e74c4", + "sha256:9101ee58ddc2442199a25cb648d46ba22cd580b00ca4b44234a324e3ec7a0f7e", + "sha256:a16b14a44c1af60a210d8da28e108e13e75a284bf21a9afa6b4571f96ab8bb9d", + "sha256:aaf319612536d502fdd0e88ce52d8f1352b2c0a955cc2798f79eeca9d3af0608", + "sha256:b756fc75871cb1bcac5499552d771822fd9db5a2bb8db2a7247936ca48f39831", + "sha256:c0372a93e16b3954208417bfe448e09b0de5cc721d521866cd9e0acac3c04a1f", + "sha256:ce41ed2614b706fd55fd0b4a6909d06b5bab344ffbfadc6ef34ae50adba3d4f7", + "sha256:d119957b37cc641596063cd7db2656c5be3752ac17877017b2ffcdb9dfc4d2b1", + "sha256:e3c1f4cd5e93842774d9ee4ef6cd8d17790e65f44f7cdbaab5f2cf8ccf22a823", + "sha256:e593466de7b998374ea2585a471ba90553283fb9beefcfa430d84a2651ed5933", + "sha256:ef69351df3c84485a8beb6f7b8f9721e2009e20ef80a8d619e2d1788b7816d47", + "sha256:f96b6726d690c96c60ba682955199f8c39abc1ae0c3a494a9c62c0184049a713" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==25.9.0" + }, + "click": { + "hashes": [ + "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", + "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4" + ], + "markers": "python_version >= '3.10'", + "version": "==8.3.0" + }, + "colorama": { + "hashes": [ + "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", + "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'", + "version": "==0.4.6" + }, + "flake8": { + "hashes": [ + "sha256:b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e", + "sha256:fe044858146b9fc69b551a4b490d69cf960fcb78ad1edcb84e7fbb1b4a8e3872" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==7.3.0" + }, + "iniconfig": { + "hashes": [ + "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", + "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12" + ], + "markers": "python_version >= '3.10'", + "version": "==2.3.0" + }, + "mccabe": { + "hashes": [ + "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325", + "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" + ], + "markers": "python_version >= '3.6'", + "version": "==0.7.0" + }, + "mypy-extensions": { + "hashes": [ + "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", + "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558" + ], + "markers": "python_version >= '3.8'", + "version": "==1.1.0" + }, + "packaging": { + "hashes": [ + "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", + "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f" + ], + "markers": "python_version >= '3.8'", + "version": "==25.0" + }, + "pathspec": { + "hashes": [ + "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", + "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712" + ], + "markers": "python_version >= '3.8'", + "version": "==0.12.1" + }, + "platformdirs": { + "hashes": [ + "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85", + "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf" + ], + "markers": "python_version >= '3.9'", + "version": "==4.4.0" + }, + "pluggy": { + "hashes": [ + "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", + "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746" + ], + "markers": "python_version >= '3.9'", + "version": "==1.6.0" + }, + "pycodestyle": { + "hashes": [ + "sha256:c4b5b517d278089ff9d0abdec919cd97262a3367449ea1c8b49b91529167b783", + "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" + ], + "markers": "python_version >= '3.9'", + "version": "==2.14.0" + }, + "pyflakes": { + "hashes": [ + "sha256:b24f96fafb7d2ab0ec5075b7350b3d2d2218eab42003821c06344973d3ea2f58", + "sha256:f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f" + ], + "markers": "python_version >= '3.9'", + "version": "==3.4.0" + }, + "pygments": { + "hashes": [ + "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", + "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" + ], + "markers": "python_version >= '3.8'", + "version": "==2.19.2" + }, + "pytest": { + "hashes": [ + "sha256:8f44522eafe4137b0f35c9ce3072931a788a21ee40a2ed279e817d3cc16ed21e", + "sha256:e5ccdf10b0bac554970ee88fc1a4ad0ee5d221f8ef22321f9b7e4584e19d7f96" + ], + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==9.0.0" + }, + "pytest-randomly": { + "hashes": [ + "sha256:174e57bb12ac2c26f3578188490bd333f0e80620c3f47340158a86eca0593cd8", + "sha256:e0dfad2fd4f35e07beff1e47c17fbafcf98f9bf4531fd369d9260e2f858bfcb7" + ], + "index": "pypi", + "markers": "python_version >= '3.9'", + "version": "==4.0.1" + }, + "pytokens": { + "hashes": [ + "sha256:c9a4bfa0be1d26aebce03e6884ba454e842f186a59ea43a6d3b25af58223c044", + "sha256:db7b72284e480e69fb085d9f251f66b3d2df8b7166059261258ff35f50fb711b" + ], + "markers": "python_version >= '3.8'", + "version": "==0.1.10" + } + } +} diff --git a/README_jp.md b/README_jp.md index b01e445..b96791d 100644 --- a/README_jp.md +++ b/README_jp.md @@ -1 +1,2 @@ # LinkWatch + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..8fd1943 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,6 @@ +services: + app: + image: kennethreitz/httpbin:latest + restart: always + ports: + - 8000:80 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..75ea34b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "linkwatch" +authors = [{ name = "DogFortune" }] +description = "Perform connectivity checks on URLs listed in the Markdown" +license-files = ["LICENSE"] +readme = "README.md" +requires-python = ">=3.10" +version = "0.0.1" +dependencies = ["tqdm", "dataclasses-json"] + +[project.scripts] +linkwatch = "src.app:main" + +[tool.pytest.ini_options] +pythonpath = "src" +testpaths = ["tests"] diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..37c2a2d --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,4 @@ +from .app import main + +if __name__ == "__main__": + main() diff --git a/src/analyzer.py b/src/analyzer.py new file mode 100644 index 0000000..f88d974 --- /dev/null +++ b/src/analyzer.py @@ -0,0 +1,119 @@ +from pathlib import Path +from urllib.request import urlopen +from urllib.error import HTTPError, URLError +from enums import Result +from reporter import ReportData +from dataclasses import dataclass +import re +from tqdm import tqdm + +URL_PATTERN = r'https?://[^\s\)\]>"]+' +URL_RE = re.compile(URL_PATTERN) + + +@dataclass +class AnalyzeResponse: + """リンクにアクセスした結果""" + + result: Result + code: str | None + url: str + reason: str | None + + +@dataclass +class URLInfo: + """ドキュメントから抽出したURL情報""" + + line: int + url: str + duplicate: bool + + +def request(url: str) -> AnalyzeResponse: + """疎通確認処理 + + :param url: 確認対象URL + :type url: str + :return: 結果 + :rtype: AnalyzeResponse + """ + try: + res = urlopen(url, timeout=3) + return AnalyzeResponse(Result.OK, res.code, res.url, None) + except HTTPError as e: + # アクセスできて400や500系が来た時はこっち + return AnalyzeResponse(Result.NG, e.code, url, e.reason) + except URLError as e: + # そもそもアクセスすらできなかった場合はこっち + return AnalyzeResponse(Result.NG, None, url, e.reason) + + +def check_links(links: dict[str, URLInfo]) -> list[ReportData]: + """URLの疎通確認を行います。確認を行うのは重複していないものだけです。 + + :param links: URLリスト + :type links: dict[str, URLInfo] + :return: 確認結果 + :rtype: list[ReportData] + """ + results = [] + with tqdm(links.items()) as links_prog: + for file_path, link_items in links_prog: + links_prog.set_description(file_path) + for item in tqdm(link_items): + if not item.duplicate: + res = request(item.url) + data = ReportData( + file_path, + item.line, + item.url, + res.result, + res.code, + res.reason, + ) + results.append(data) + return results + + +def search(path: str, filter="*.md") -> list: + """指定したディレクトリからMarkdownドキュメントを抽出します。 + + :param path: 検索対象 + :type path: str + :param filter: _description_, defaults to "*.md" + :type filter: str, optional + :return: ファイルパスのリスト + :rtype: list + """ + p = Path(path) + files = [str(item) for item in p.rglob(filter)] + return files + + +def extract_url(files: list) -> dict[str, URLInfo]: + """ファイルからURLを抽出します。 + + :param files: _description_ + :type files: list + :return: _description_ + :rtype: dict[str, LinkInfo] + """ + links = {} + duplicated_urls = set() + for file_path in files: + with open(file_path, "r", encoding="utf-8") as f: + lines = f.read().splitlines() + links[f"{file_path}"] = [] + for i, line in enumerate(lines): + result = URL_RE.search(line) + if result: + url = result.group() + if url in duplicated_urls: + duplicate = True + else: + duplicate = False + duplicated_urls.add(url) + data = URLInfo(i + 1, url, duplicate) + links[f"{file_path}"].append(data) + return links diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..e5f1390 --- /dev/null +++ b/src/app.py @@ -0,0 +1,65 @@ +import os +import analyzer +import reporter +import argparse +from enums import OutputType +from reporter import ReportData + + +def __output(data: list[ReportData], format: OutputType, args): + """出力 + + :param data: 確認結果リスト + :type data: list[ReportData] + :param format: 出力形式 + :type format: OutputType + :param args: 結果 + :type args: + """ + match format: + case OutputType.Console: + line = reporter.console(data) + print(line) + case OutputType.Json: + output_path = args.report_json + reporter.dump_json(data, output_path) + + +def __format__setting(args) -> OutputType: + """結果の出力形式の設定 + + :param args: Arguments + :type args: _type_ + :return: 出力形式 + :rtype: OutputType + """ + if args.report_json: + return OutputType.Json + else: + return OutputType.Console + + +def create_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("src", default=os.environ.get("SRC_DIR", ".")) + parser.add_argument( + "--report-json", type=str, help="Create json report file at given path" + ) + return parser + + +def main(args=None): + parser = create_parser() + parsed_args = parser.parse_args(args) + + format = __format__setting(parsed_args) + src = parsed_args.src + + files = analyzer.search(src) + links = analyzer.extract_url(files) + report_data_list = analyzer.check_links(links) + __output(report_data_list, format, parsed_args) + + +if __name__ == "__main__": + main() diff --git a/src/enums.py b/src/enums.py new file mode 100644 index 0000000..d752ede --- /dev/null +++ b/src/enums.py @@ -0,0 +1,11 @@ +from enum import StrEnum, Enum, auto + + +class OutputType(Enum): + Console = auto() + Json = auto() + + +class Result(StrEnum): + OK = "OK" + NG = "NG" diff --git a/src/reporter.py b/src/reporter.py new file mode 100644 index 0000000..03dea75 --- /dev/null +++ b/src/reporter.py @@ -0,0 +1,48 @@ +from dataclasses import dataclass +from dataclasses_json import dataclass_json +from pprint import pformat +from typing import List +import json +import os + + +@dataclass_json +@dataclass +class ReportData: + file: str + line: int + url: str + result: str + code: int + reason: str + + +@dataclass_json +@dataclass +class ReportCollection: + Reports: List[ReportData] + + +class CustomEncoder(json.JSONEncoder): + def default(self, obj): + # 例外オブジェクトを文字列に変換 + if isinstance(obj, Exception): + return str(obj) + return super().default(obj) + + +def console(data: list[ReportData]): + # TODO: 出力形式は仮でpformatを設定中。 + line = pformat(data) + return line + + +def dump_json(data: list[ReportData], output_path: str): + if os.path.splitext(output_path)[-1].lower() != ".json": + raise ValueError + collection = ReportCollection(Reports=data) + json_str = json.dumps( + collection.to_dict(), indent=4, ensure_ascii=False, cls=CustomEncoder + ) + with open(output_path, "w", encoding="utf-8") as f: + f.write(json_str) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..71ff3d8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,24 @@ +import pytest +from urllib.request import Request, urlopen +from urllib.error import HTTPError, URLError + + +@pytest.fixture(scope="session", autouse=True) +def check_mock_server(): + """テスト実行前にモックサーバーの起動を確認""" + mock_server_url = "http://localhost:8000/get" + + try: + req = Request(mock_server_url) + with urlopen(req) as res: + if res.getcode() == 200: + return + except (HTTPError, URLError): + pass + + pytest.exit( + "\n\n❌ エラー: モックサーバーが起動していません\n" + "以下のコマンドでモックサーバーを起動してください:\n" + " docker-compose up -d\n", + returncode=1, + ) diff --git a/tests/sample_doc/doc1.md b/tests/sample_doc/doc1.md new file mode 100644 index 0000000..e9958a1 --- /dev/null +++ b/tests/sample_doc/doc1.md @@ -0,0 +1,2 @@ +# TestDocument +[example.com](https://example.com) \ No newline at end of file diff --git a/tests/sample_doc/doc2.md b/tests/sample_doc/doc2.md new file mode 100644 index 0000000..a15e051 --- /dev/null +++ b/tests/sample_doc/doc2.md @@ -0,0 +1,6 @@ +# TestDocument2 +## Duplicate +[example.com](https://example.com) +[example.com](https://example.com) +[example.com](http://example.com) +[example.jp](https://example.jp) \ No newline at end of file diff --git a/tests/syntax/url_syntax.md b/tests/syntax/url_syntax.md new file mode 100644 index 0000000..e4c6620 --- /dev/null +++ b/tests/syntax/url_syntax.md @@ -0,0 +1,3 @@ +# syntax list +## Syntax List for Links +available at [https://contributor-covenant.org/version/1/4][version] \ No newline at end of file diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py new file mode 100644 index 0000000..cfbd1ad --- /dev/null +++ b/tests/test_analyzer.py @@ -0,0 +1,83 @@ +import pytest +import analyzer +from reporter import ReportData + + +@pytest.mark.parametrize( + ["url", "expected_result", "expected_status_code"], + [ + pytest.param("http://127.0.0.1:8000/status/200", "OK", 200), + pytest.param("http://127.0.0.1:8000/status/404", "NG", 404), + pytest.param("http://127.0.0.1:8000/status/500", "NG", 500), + pytest.param("http://127.0.0.1:800", "NG", None), + ], +) +def test_request(url: str, expected_result: str, expected_status_code: int): + # アクセスチェックした時に想定しているリクエストが返ってくる事。 + # 200系だけTrueで、それ以外はFalseで返ってくる事。 + # URLErrorが発生した(レスポンスが無く、そもそも接続できなかった)場合はFalseでステータスコードがNoneとなる事。 + res = analyzer.request(url) + + assert type(res) is analyzer.AnalyzeResponse + assert res.result == expected_result + assert res.code == expected_status_code + assert res.url == url + if res.result.upper() == "NG": + assert res.reason is not None + + +def test_check_links(): + files = analyzer.search("tests/sample_doc/") + links = analyzer.extract_url(files) + results_report_data = analyzer.check_links(links) + + # 重複しているリンクは結果に含まれていない事(ドキュメントに記載されているリンクの数 - 重複しているリンクの数になっている事) + assert len(results_report_data) == 3 + + # 形式チェック + for item in results_report_data: + assert type(item) is ReportData + assert item.file is not None + assert item.line is not None + assert item.url is not None + assert item.result is not None + + if item.result.upper() == "OK": + assert item.code is not None + assert item.reason is None + else: + assert item.code is None + assert item.reason is not None + + +@pytest.mark.parametrize(["path"], [pytest.param("tests/sample_doc/")]) +def test_search(path: str): + files = analyzer.search(path) + assert len(files) == 2 + + +def test_extract_link(): + # ファイルからリンクを抽出するテスト。対象のドキュメントすべてのリンクを抽出する。 + # データ構造としてはdictのKeyにファイルのパス、Valueにリンクに関する情報が入っている。 + # これは1ファイルの中に大量のリンクがあった時、すべてがフラットなリストだとファイル名を1つ1つ持つ事になるのでデータ量が増えてしまう。ファイル名は値として重複しやすいので、Keyという形で1つにまとめたのが理由。 + # 重複リンクにはフラグをつける。2つ目以降はFalseになるのでTrueのものだけリンクチェックすればOK + files = analyzer.search("tests/sample_doc/") + links = analyzer.extract_url(files) + + assert len(links) == 2 + + doc1_result = [ + item for key, value in links.items() if "doc1.md" in key for item in value + ] + doc2_result = [ + item for key, value in links.items() if "doc2.md" in key for item in value + ] + + assert len(doc1_result) == 1 + assert len(doc2_result) == 4 + + # ちゃんと重複判定の数が正しいか、重複と見なしたリンクは想定しているものか + duplicated_link_list = [item for item in doc2_result if item.duplicate] + assert len(duplicated_link_list) == 2 + assert duplicated_link_list[0].url == duplicated_link_list[1].url + assert duplicated_link_list[0].url == "https://example.com" diff --git a/tests/test_app.py b/tests/test_app.py new file mode 100644 index 0000000..2420a98 --- /dev/null +++ b/tests/test_app.py @@ -0,0 +1,20 @@ +import app +import os +from tempfile import TemporaryDirectory +from pathlib import Path + + +class TestValid: + """正常系""" + + def test_main_with_minimal_arguments(self): + """環境変数も引数も指定しない場合、コンソールモードで動作する事""" + app.main(["tests/sample_doc/"]) + + def test_main_with_output_json(self): + """JSONファイルが出力されている事""" + with TemporaryDirectory() as dir: + output_path = Path(dir, "result.json") + app.main(["tests/sample_doc/", "--report-json", str(output_path)]) + + assert os.path.isfile(output_path) is True diff --git a/tests/test_reporter.py b/tests/test_reporter.py new file mode 100644 index 0000000..dd7502c --- /dev/null +++ b/tests/test_reporter.py @@ -0,0 +1,49 @@ +import pytest +import reporter +import analyzer +from tempfile import TemporaryDirectory +from pathlib import Path +import os + + +@pytest.fixture(scope="function") +def setup_report_data(): + """レポート確認用データ作成 + + :yield: _description_ + :rtype: _type_ + """ + files = analyzer.search("tests/sample_doc/") + links = analyzer.extract_url(files) + results_report_data = analyzer.check_links(links) + return results_report_data + + +class TestValid: + """正常系""" + + def test_console(self, setup_report_data): + """コンソール出力テスト。文字列が想定している形である事""" + output_line = reporter.console(setup_report_data) + + assert output_line is not None + + def test_json(self, setup_report_data): + with TemporaryDirectory() as dir: + output_path = Path(dir, "result.json") + + reporter.dump_json(setup_report_data, output_path) + + assert os.path.isfile(output_path) is True + + +class TestInvalid: + """異常系""" + + def test_raises_exception_for_non_json_extension(self, setup_report_data): + """拡張子がjson以外だった場合、例外発生""" + with TemporaryDirectory() as dir: + output_path = Path(dir, "result.jso") + + with pytest.raises(ValueError): + reporter.dump_json(setup_report_data, output_path)