diff --git a/src/linkstat/analyzer.py b/src/linkstat/analyzer.py index 1c9738b..5b0a15a 100644 --- a/src/linkstat/analyzer.py +++ b/src/linkstat/analyzer.py @@ -1,12 +1,12 @@ from pathlib import Path -from urllib.request import urlopen +from urllib.request import urlopen, Request from urllib.error import HTTPError, URLError from linkstat.enums import Result from linkstat.reporter import ReportData from dataclasses import dataclass import re -URL_PATTERN = r'https?://[^\s\)\]>"]+' +URL_PATTERN = r'https?://[^\s\)\]>"<]+' URL_RE = re.compile(URL_PATTERN) @@ -15,7 +15,7 @@ class AnalyzeResponse: """リンクにアクセスした結果""" result: Result - code: str | None + code: int | None url: str reason: str | None @@ -37,8 +37,12 @@ def request(url: str) -> AnalyzeResponse: :return: 結果 :rtype: AnalyzeResponse """ + # User-Agentヘッダーを追加 + headers = {"User-Agent": "Mozilla"} + req = Request(url, headers=headers) + try: - with urlopen(url, timeout=3) as res: + with urlopen(req, timeout=5) as res: return AnalyzeResponse(Result.OK, res.code, res.url, None) except HTTPError as e: # アクセスできて400や500系が来た時はこっち @@ -50,7 +54,7 @@ def request(url: str) -> AnalyzeResponse: return AnalyzeResponse(Result.NG, None, url, "Timeout") -def check_links(links: dict[str, URLInfo]) -> list[ReportData]: +def check_links(links: dict[str, list[URLInfo]]) -> list[ReportData]: """URLの疎通確認を行います。確認を行うのは重複していないものだけ。 :param links: URLリスト @@ -93,7 +97,7 @@ def search(path: str, filter="*.md") -> list: return files -def extract_url(files: list) -> dict[str, URLInfo]: +def extract_url(files: list) -> dict[str, list[URLInfo]]: """ファイルからURLを抽出します。重複しているリンクも含まれます。 :param files: _description_ diff --git a/src/linkstat/reporter.py b/src/linkstat/reporter.py index 5ef9e8f..b4d5c93 100644 --- a/src/linkstat/reporter.py +++ b/src/linkstat/reporter.py @@ -14,7 +14,7 @@ class ReportData: line: int url: str result: Result - code: int + code: int | None reason: str | None diff --git a/tests/syntax/two_url_syntax.md b/tests/syntax/two_url_syntax.md new file mode 100644 index 0000000..cf77d0c --- /dev/null +++ b/tests/syntax/two_url_syntax.md @@ -0,0 +1 @@ +[example.com](https://example.com) [example.jp](https://example.jp) \ No newline at end of file diff --git a/tests/syntax/url_syntax.md b/tests/syntax/url_syntax.md index e4c6620..375698b 100644 --- a/tests/syntax/url_syntax.md +++ b/tests/syntax/url_syntax.md @@ -1,3 +1,13 @@ # syntax list ## Syntax List for Links -available at [https://contributor-covenant.org/version/1/4][version] \ No newline at end of file +available at [https://contributor-covenant.org/version/1/4][version] +- [ ] The heading title of your list should be in [title case](https://capitalizemytitle.com/) format: `# Awesome Name of List`. +- [ ] Has an appropriate license. + - **We strongly recommend the [CC0 license](https://creativecommons.org/publicdomain/zero/1.0/), but any [Creative Commons license](https://creativecommons.org/choose/) will work.** + - Tip: You can quickly add it to your repo by going to this URL: `https://github.com///community/license/new?branch=main&template=cc0-1.0` (replace `` and `` accordingly). + - A code license like MIT, BSD, Apache, GPL, etc, is not acceptable. Neither are WTFPL and [Unlicense](https://unlicense.org). + - Place a file named `license` or `LICENSE` in the repo root with the license text. + - **Do not** add the license name, text, or a `Licence` section to the readme. GitHub already shows the license name and link to the full text at the top of the repo. + - To verify that you've read all the guidelines, please comment on your pull request with just the word `unicorn`. +- [ ] Does not use [hard-wrapping](https://stackoverflow.com/questions/319925/difference-between-hard-wrap-and-soft-wrap). +- [F#](https://github.com/fsprojects/awesome-fsharp#readme) - A .NET-based language with focus on functional programming. \ No newline at end of file diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 8aacd4a..d6abc23 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -70,6 +70,28 @@ def test_check_links(path: str, report_data_count: int): assert item.reason is not None +def test_all_url_syntax_document_pass(): + """さまざまなURLパターンを記載したドキュメントのテスト。すべてOKになる事""" + files = analyzer.search("tests/syntax/url_syntax.md") + links = analyzer.extract_url(files) + + results_report_data = analyzer.check_links(links) + + assert len(results_report_data) == 8 + for item in results_report_data: + assert item.result == "OK" + + +def test_two_line(): + """1行に2つURLがあるパターンの場合、2つとも補足できている事""" + files = analyzer.search("tests/syntax/two_url_syntax.md") + links = analyzer.extract_url(files) + + results_report_data = analyzer.check_links(links) + + assert len(results_report_data) == 2 + + @pytest.mark.parametrize(["path"], [pytest.param("tests/sample_doc/")]) def test_search(path: str): files = analyzer.search(path) diff --git a/tests/test_app.py b/tests/test_app.py index fdfd547..14f4a93 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -13,9 +13,6 @@ def test_main_with_minimal_arguments(self): """環境変数も引数も指定しない一気通貫のテスト""" app.main(["tests/sample_doc/"]) - # def test_awesome(self): - # app.main(["tmp/awesome-main"]) - @pytest.mark.usefixtures("use_mock_server") def test_main_with_output_json(self): """JSONファイルが出力されている事"""