From 05972cda12bb3a0aef8b2b881a537ac5702b0cfd Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 25 Dec 2025 15:53:28 +0900 Subject: [PATCH 1/4] =?UTF-8?q?=E5=95=8F=E9=A1=8C=E3=81=A8=E3=81=AA?= =?UTF-8?q?=E3=81=A3=E3=81=A6=E3=81=84=E3=81=9FURL=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0=E3=81=97=E3=81=A6=E3=83=86=E3=82=B9=E3=83=88=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0=20RED=E7=A2=BA=E8=AA=8D=20refs=20#18?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/syntax/url_syntax.md | 3 ++- tests/test_analyzer.py | 11 +++++++++++ tests/test_app.py | 3 --- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/syntax/url_syntax.md b/tests/syntax/url_syntax.md index e4c6620..7890e2c 100644 --- a/tests/syntax/url_syntax.md +++ b/tests/syntax/url_syntax.md @@ -1,3 +1,4 @@ # syntax list ## Syntax List for Links -available at [https://contributor-covenant.org/version/1/4][version] \ No newline at end of file +available at [https://contributor-covenant.org/version/1/4][version] +- [ ] The heading title of your list should be in [title case](https://capitalizemytitle.com/) format: `# Awesome Name of List`. \ No newline at end of file diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index 8aacd4a..b87de8f 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -70,6 +70,17 @@ def test_check_links(path: str, report_data_count: int): assert item.reason is not None +def test_all_url_syntax_document_pass(): + """さまざまなURLパターンを記載したドキュメントのテスト。すべてOKになる事""" + files = analyzer.search("tests/syntax/") + links = analyzer.extract_url(files) + + results_report_data = analyzer.check_links(links) + + for item in results_report_data: + assert item.result == "OK" + + @pytest.mark.parametrize(["path"], [pytest.param("tests/sample_doc/")]) def test_search(path: str): files = analyzer.search(path) diff --git a/tests/test_app.py b/tests/test_app.py index fdfd547..14f4a93 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -13,9 +13,6 @@ def test_main_with_minimal_arguments(self): """環境変数も引数も指定しない一気通貫のテスト""" app.main(["tests/sample_doc/"]) - # def test_awesome(self): - # app.main(["tmp/awesome-main"]) - @pytest.mark.usefixtures("use_mock_server") def test_main_with_output_json(self): """JSONファイルが出力されている事""" From 2d9e9b1c6a74e8fa0e00826ad79df51cf1e222a4 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 25 Dec 2025 18:13:29 +0900 Subject: [PATCH 2/4] =?UTF-8?q?=E3=83=98=E3=83=83=E3=83=80=E3=83=BC?= =?UTF-8?q?=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=E3=81=9F=E3=81=93=E3=81=A8?= =?UTF-8?q?=E3=81=A7=E6=94=B9=E5=96=84=20refs=20#18?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 固定値になっているのでrequestsの方式にした方が良さそう --- src/linkstat/analyzer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/linkstat/analyzer.py b/src/linkstat/analyzer.py index 1c9738b..85e30b0 100644 --- a/src/linkstat/analyzer.py +++ b/src/linkstat/analyzer.py @@ -1,5 +1,5 @@ from pathlib import Path -from urllib.request import urlopen +from urllib.request import urlopen, Request from urllib.error import HTTPError, URLError from linkstat.enums import Result from linkstat.reporter import ReportData @@ -37,8 +37,14 @@ def request(url: str) -> AnalyzeResponse: :return: 結果 :rtype: AnalyzeResponse """ + # User-Agentヘッダーを追加 + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + } + req = Request(url, headers=headers) + try: - with urlopen(url, timeout=3) as res: + with urlopen(req, timeout=5) as res: return AnalyzeResponse(Result.OK, res.code, res.url, None) except HTTPError as e: # アクセスできて400や500系が来た時はこっち From baf4b0ab63e7852308d6bdf20060946cde2ab067 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Thu, 25 Dec 2025 18:37:37 +0900 Subject: [PATCH 3/4] =?UTF-8?q?=E3=83=98=E3=83=83=E3=83=80=E3=83=BC?= =?UTF-8?q?=E3=82=92=E7=9C=81=E7=95=A5=20refs=20#18?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit これでもできた --- src/linkstat/analyzer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/linkstat/analyzer.py b/src/linkstat/analyzer.py index 85e30b0..cb5c796 100644 --- a/src/linkstat/analyzer.py +++ b/src/linkstat/analyzer.py @@ -38,9 +38,7 @@ def request(url: str) -> AnalyzeResponse: :rtype: AnalyzeResponse """ # User-Agentヘッダーを追加 - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" - } + headers = {"User-Agent": "Mozilla"} req = Request(url, headers=headers) try: From 054f09aa5becf4a840d80e96e9b81c8dba6fef72 Mon Sep 17 00:00:00 2001 From: DogFortune Date: Sun, 28 Dec 2025 22:07:59 +0900 Subject: [PATCH 4/4] =?UTF-8?q?=E6=AD=A3=E8=A6=8F=E8=A1=A8=E7=8F=BE?= =?UTF-8?q?=E3=81=AE=E3=83=91=E3=82=BF=E3=83=BC=E3=83=B3=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?=20refs=20#18?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1行に2つURLがあるパターン抜けが判明 --- src/linkstat/analyzer.py | 8 ++++---- src/linkstat/reporter.py | 2 +- tests/syntax/two_url_syntax.md | 1 + tests/syntax/url_syntax.md | 11 ++++++++++- tests/test_analyzer.py | 13 ++++++++++++- 5 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 tests/syntax/two_url_syntax.md diff --git a/src/linkstat/analyzer.py b/src/linkstat/analyzer.py index cb5c796..5b0a15a 100644 --- a/src/linkstat/analyzer.py +++ b/src/linkstat/analyzer.py @@ -6,7 +6,7 @@ from dataclasses import dataclass import re -URL_PATTERN = r'https?://[^\s\)\]>"]+' +URL_PATTERN = r'https?://[^\s\)\]>"<]+' URL_RE = re.compile(URL_PATTERN) @@ -15,7 +15,7 @@ class AnalyzeResponse: """リンクにアクセスした結果""" result: Result - code: str | None + code: int | None url: str reason: str | None @@ -54,7 +54,7 @@ def request(url: str) -> AnalyzeResponse: return AnalyzeResponse(Result.NG, None, url, "Timeout") -def check_links(links: dict[str, URLInfo]) -> list[ReportData]: +def check_links(links: dict[str, list[URLInfo]]) -> list[ReportData]: """URLの疎通確認を行います。確認を行うのは重複していないものだけ。 :param links: URLリスト @@ -97,7 +97,7 @@ def search(path: str, filter="*.md") -> list: return files -def extract_url(files: list) -> dict[str, URLInfo]: +def extract_url(files: list) -> dict[str, list[URLInfo]]: """ファイルからURLを抽出します。重複しているリンクも含まれます。 :param files: _description_ diff --git a/src/linkstat/reporter.py b/src/linkstat/reporter.py index 5ef9e8f..b4d5c93 100644 --- a/src/linkstat/reporter.py +++ b/src/linkstat/reporter.py @@ -14,7 +14,7 @@ class ReportData: line: int url: str result: Result - code: int + code: int | None reason: str | None diff --git a/tests/syntax/two_url_syntax.md b/tests/syntax/two_url_syntax.md new file mode 100644 index 0000000..cf77d0c --- /dev/null +++ b/tests/syntax/two_url_syntax.md @@ -0,0 +1 @@ +[example.com](https://example.com) [example.jp](https://example.jp) \ No newline at end of file diff --git a/tests/syntax/url_syntax.md b/tests/syntax/url_syntax.md index 7890e2c..375698b 100644 --- a/tests/syntax/url_syntax.md +++ b/tests/syntax/url_syntax.md @@ -1,4 +1,13 @@ # syntax list ## Syntax List for Links available at [https://contributor-covenant.org/version/1/4][version] -- [ ] The heading title of your list should be in [title case](https://capitalizemytitle.com/) format: `# Awesome Name of List`. \ No newline at end of file +- [ ] The heading title of your list should be in [title case](https://capitalizemytitle.com/) format: `# Awesome Name of List`. +- [ ] Has an appropriate license. + - **We strongly recommend the [CC0 license](https://creativecommons.org/publicdomain/zero/1.0/), but any [Creative Commons license](https://creativecommons.org/choose/) will work.** + - Tip: You can quickly add it to your repo by going to this URL: `https://github.com///community/license/new?branch=main&template=cc0-1.0` (replace `` and `` accordingly). + - A code license like MIT, BSD, Apache, GPL, etc, is not acceptable. Neither are WTFPL and [Unlicense](https://unlicense.org). + - Place a file named `license` or `LICENSE` in the repo root with the license text. + - **Do not** add the license name, text, or a `Licence` section to the readme. GitHub already shows the license name and link to the full text at the top of the repo. + - To verify that you've read all the guidelines, please comment on your pull request with just the word `unicorn`. +- [ ] Does not use [hard-wrapping](https://stackoverflow.com/questions/319925/difference-between-hard-wrap-and-soft-wrap). +- [F#](https://github.com/fsprojects/awesome-fsharp#readme) - A .NET-based language with focus on functional programming. \ No newline at end of file diff --git a/tests/test_analyzer.py b/tests/test_analyzer.py index b87de8f..d6abc23 100644 --- a/tests/test_analyzer.py +++ b/tests/test_analyzer.py @@ -72,15 +72,26 @@ def test_check_links(path: str, report_data_count: int): def test_all_url_syntax_document_pass(): """さまざまなURLパターンを記載したドキュメントのテスト。すべてOKになる事""" - files = analyzer.search("tests/syntax/") + files = analyzer.search("tests/syntax/url_syntax.md") links = analyzer.extract_url(files) results_report_data = analyzer.check_links(links) + assert len(results_report_data) == 8 for item in results_report_data: assert item.result == "OK" +def test_two_line(): + """1行に2つURLがあるパターンの場合、2つとも補足できている事""" + files = analyzer.search("tests/syntax/two_url_syntax.md") + links = analyzer.extract_url(files) + + results_report_data = analyzer.check_links(links) + + assert len(results_report_data) == 2 + + @pytest.mark.parametrize(["path"], [pytest.param("tests/sample_doc/")]) def test_search(path: str): files = analyzer.search(path)