From 0314a3cd4d8dc83b97949cf0d6759db011dc5a22 Mon Sep 17 00:00:00 2001 From: Kazuhiro Oka Date: Thu, 4 Dec 2025 22:31:49 +0900 Subject: [PATCH 1/4] add: bs4 --- Pipfile | 1 + Pipfile.lock | 37 +++++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/Pipfile b/Pipfile index cd3ef9a..96ae23a 100644 --- a/Pipfile +++ b/Pipfile @@ -9,6 +9,7 @@ google-auth-httplib2 = "~=0.2.1" google-auth-oauthlib = ">=1.2.3,<2.0.0" pyyaml = ">=6.0.3,<7.0.0" openai = ">=2.8.0,<3.0.0" +beautifulsoup4 = ">=4.14.3,<5.0.0" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index c1aa932..b93ca40 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "dc959634abb143247462fd06333b2ce993154d67779eaed377e82192707bc928" + "sha256": "39c05e03aaa7e0bc6c4781f29abc6e63a9ec30cba9922c04959e5a3fb1f982af" }, "pipfile-spec": 6, "requires": { @@ -27,11 +27,20 @@ }, "anyio": { "hashes": [ - "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", - "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4" + "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", + "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb" ], "markers": "python_version >= '3.9'", - "version": "==4.11.0" + "version": "==4.12.0" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", + "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86" + ], + "index": "pypi", + "markers": "python_full_version >= '3.7.0'", + "version": "==4.14.3" }, "cachetools": { "hashes": [ @@ -385,12 +394,12 @@ }, "openai": { "hashes": [ - "sha256:4851908f6d6fcacbd47ba659c5ac084f7725b752b6bfa1e948b6fbfc111a6bad", - "sha256:ba975e347f6add2fe13529ccb94d54a578280e960765e5224c34b08d7e029ddf" + "sha256:c6c3b5a04994734386e8dad3c00a393f56d3b68a27cd2e8acae91a59e4122463", + "sha256:cb1b79eef6e809f6da326a7ef6038719e35aa944c42d081807bfa1be8060f15f" ], "index": "pypi", "markers": "python_version >= '3.9'", - "version": "==2.8.0" + "version": "==2.8.1" }, "proto-plus": { "hashes": [ @@ -434,11 +443,11 @@ }, "pydantic": { "hashes": [ - "sha256:0f8cb9555000a4b5b617f66bfd2566264c4984b27589d3b845685983e8ea85ac", - "sha256:92d3d202a745d46f9be6df459ac5a064fdaa3c1c4cd8adcfa332ccf3c05f871e" + "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", + "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d" ], "markers": "python_version >= '3.9'", - "version": "==2.12.4" + "version": "==2.12.5" }, "pydantic-core": { "hashes": [ @@ -687,6 +696,14 @@ "markers": "python_version >= '3.7'", "version": "==1.3.1" }, + "soupsieve": { + "hashes": [ + "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", + "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f" + ], + "markers": "python_version >= '3.9'", + "version": "==2.8" + }, "tqdm": { "hashes": [ "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", From b55906e1f0c7f3e69488928129ed5e1af99d4c2b Mon Sep 17 00:00:00 2001 From: Kazuhiro Oka Date: Thu, 4 Dec 2025 22:35:22 +0900 Subject: [PATCH 2/4] add: lxml --- Pipfile | 1 + Pipfile.lock | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index 96ae23a..ac65599 100644 --- a/Pipfile +++ b/Pipfile @@ -10,6 +10,7 @@ google-auth-oauthlib = ">=1.2.3,<2.0.0" pyyaml = ">=6.0.3,<7.0.0" openai = ">=2.8.0,<3.0.0" beautifulsoup4 = ">=4.14.3,<5.0.0" +lxml = ">=6.0.2,<7.0.0" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index b93ca40..297b1c2 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "39c05e03aaa7e0bc6c4781f29abc6e63a9ec30cba9922c04959e5a3fb1f982af" + "sha256": "c44878df56d1e4bc9885dedde605ece5e47f85f878319c95af1e7657414e15e3" }, "pipfile-spec": 6, "requires": { @@ -384,6 +384,153 @@ "markers": "python_version >= '3.9'", "version": "==0.12.0" }, + "lxml": { + "hashes": [ + "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba", + "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", + "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", + "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa", + "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f", + "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca", + "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", + "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607", + "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8", + "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", + "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700", + "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", + "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b", + "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7", + "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d", + "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", + "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a", + "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d", + "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", + "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb", + "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", + "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d", + "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e", + "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694", + "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", + "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", + "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c", + "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", + "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", + "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", + "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938", + "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a", + "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34", + "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90", + "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", + "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", + "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285", + "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553", + "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", + "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c", + "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5", + "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", + "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba", + "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", + "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", + "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512", + "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a", + "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438", + "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153", + "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9", + "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", + "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", + "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", + "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", + "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46", + "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf", + "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", + "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93", + "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", + "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", + "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", + "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", + "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", + "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9", + "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", + "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7", + "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", + "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb", + "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", + "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", + "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", + "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d", + "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", + "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", + "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", + "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", + "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", + "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", + "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", + "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", + "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304", + "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", + "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", + "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", + "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62", + "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444", + "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", + "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", + "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c", + "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", + "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84", + "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3", + "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", + "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62", + "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", + "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178", + "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", + "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", + "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48", + "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c", + "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", + "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7", + "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", + "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", + "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4", + "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f", + "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", + "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", + "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", + "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f", + "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee", + "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5", + "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", + "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", + "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", + "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad", + "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321", + "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", + "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", + "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0", + "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c", + "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", + "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964", + "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", + "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", + "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6", + "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078", + "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6", + "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388", + "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", + "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", + "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", + "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", + "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1", + "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c", + "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31", + "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", + "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", + "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", + "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==6.0.2" + }, "oauthlib": { "hashes": [ "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", From ab63d0098494b90e837e741cbd2236ffeabb0070 Mon Sep 17 00:00:00 2001 From: Kazuhiro Oka Date: Thu, 4 Dec 2025 22:50:03 +0900 Subject: [PATCH 3/4] add: cchardet --- Pipfile | 1 + Pipfile.lock | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index ac65599..aa16faf 100644 --- a/Pipfile +++ b/Pipfile @@ -11,6 +11,7 @@ pyyaml = ">=6.0.3,<7.0.0" openai = ">=2.8.0,<3.0.0" beautifulsoup4 = ">=4.14.3,<5.0.0" lxml = ">=6.0.2,<7.0.0" +cchardet = ">=2.1.7,<3.0.0" [dev-packages] diff --git a/Pipfile.lock b/Pipfile.lock index 297b1c2..9f1bf79 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "c44878df56d1e4bc9885dedde605ece5e47f85f878319c95af1e7657414e15e3" + "sha256": "bd1c0fc06070a7cda833b756d1bf9aa2a63ab133694aab3b9a92767fd75b2e1a" }, "pipfile-spec": 6, "requires": { @@ -50,6 +50,41 @@ "markers": "python_version >= '3.9'", "version": "==6.2.2" }, + "cchardet": { + "hashes": [ + "sha256:0b859069bbb9d27c78a2c9eb997e6f4b738db2d7039a03f8792b4058d61d1109", + "sha256:228d2533987c450f39acf7548f474dd6814c446e9d6bd228e8f1d9a2d210f10b", + "sha256:2309ff8fc652b0fc3c0cff5dbb172530c7abb92fe9ba2417c9c0bcf688463c1c", + "sha256:24974b3e40fee9e7557bb352be625c39ec6f50bc2053f44a3d1191db70b51675", + "sha256:273699c4e5cd75377776501b72a7b291a988c6eec259c29505094553ee505597", + "sha256:27a9ba87c9f99e0618e1d3081189b1217a7d110e5c5597b0b7b7c3fedd1c340a", + "sha256:302aa443ae2526755d412c9631136bdcd1374acd08e34f527447f06f3c2ddb98", + "sha256:45456c59ec349b29628a3c6bfb86d818ec3a6fbb7eb72de4ff3bd4713681c0e3", + "sha256:48ba829badef61441e08805cfa474ccd2774be2ff44b34898f5854168c596d4d", + "sha256:50ad671e8d6c886496db62c3bd68b8d55060688c655873aa4ce25ca6105409a1", + "sha256:54341e7e1ba9dc0add4c9d23b48d3a94e2733065c13920e85895f944596f6150", + "sha256:54d0b26fd0cd4099f08fb9c167600f3e83619abefeaa68ad823cc8ac1f7bcc0c", + "sha256:5a25f9577e9bebe1a085eec2d6fdd72b7a9dd680811bba652ea6090fb2ff472f", + "sha256:6b6397d8a32b976a333bdae060febd39ad5479817fabf489e5596a588ad05133", + "sha256:70eeae8aaf61192e9b247cf28969faef00578becd2602526ecd8ae7600d25e0e", + "sha256:80e6faae75ecb9be04a7b258dc4750d459529debb6b8dee024745b7b5a949a34", + "sha256:90086e5645f8a1801350f4cc6cb5d5bf12d3fa943811bb08667744ec1ecc9ccd", + "sha256:a39526c1c526843965cec589a6f6b7c2ab07e3e56dc09a7f77a2be6a6afa4636", + "sha256:b154effa12886e9c18555dfc41a110f601f08d69a71809c8d908be4b1ab7314f", + "sha256:b59ddc615883835e03c26f81d5fc3671fab2d32035c87f50862de0da7d7db535", + "sha256:bd7f262f41fd9caf5a5f09207a55861a67af6ad5c66612043ed0f81c58cdf376", + "sha256:c428b6336545053c2589f6caf24ea32276c6664cb86db817e03a94c60afa0eaf", + "sha256:c6f70139aaf47ffb94d89db603af849b82efdf756f187cdd3e566e30976c519f", + "sha256:c96aee9ebd1147400e608a3eff97c44f49811f8904e5a43069d55603ac4d8c97", + "sha256:ec3eb5a9c475208cf52423524dcaf713c394393e18902e861f983c38eeb77f18", + "sha256:eee4f5403dc3a37a1ca9ab87db32b48dc7e190ef84601068f45397144427cc5e", + "sha256:f16517f3697569822c6d09671217fdeab61dfebc7acb5068634d6b0728b86c0b", + "sha256:f86e0566cb61dc4397297696a4a1b30f6391b50bc52b4f073507a48466b6255a", + "sha256:fdac1e4366d0579fff056d1280b8dc6348be964fda8ebb627c0269e097ab37fa" + ], + "index": "pypi", + "version": "==2.1.7" + }, "certifi": { "hashes": [ "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", From 64e883a862824dc219cde09ce92a85edff1fdc7f Mon Sep 17 00:00:00 2001 From: Kazuhiro Oka Date: Thu, 4 Dec 2025 22:50:32 +0900 Subject: [PATCH 4/4] =?UTF-8?q?add:=20bs4=E3=81=AB=E3=82=88=E3=82=8Bhtml?= =?UTF-8?q?=20parse?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../type/classified_email_data.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/email_priority_classifier/type/classified_email_data.py b/email_priority_classifier/type/classified_email_data.py index c3cba6c..58e5230 100644 --- a/email_priority_classifier/type/classified_email_data.py +++ b/email_priority_classifier/type/classified_email_data.py @@ -2,6 +2,8 @@ import json import re +from bs4 import BeautifulSoup + _LABEL_REPLACE_DATA = { "INBOX": "Inbox", "CATEGORY_PERSONAL": "Category: Personal", # 他のタブに分類されない個人的な会話やメール。 @@ -68,15 +70,17 @@ def get_data(self) -> str: if len(text_parts) == 0: return "parts could not found." - for text_part in text_parts: + for text_part in sorted(text_parts, key=lambda part: part["mimeType"], reverse=True): mimetype = text_part["mimeType"] - if mimetype == "text/plain" or mimetype == "text/html": - # 全体のmimetypeがtext/plainかtext/htmlの場合、そのpartを返す - body_data = text_part.get("body", {}).get("data") - if body_data is None: - return "body data could not found." - data = self._decode_body(text_part["body"]["data"], text_part.get("headers", [])) - return data + body_data = text_part.get("body", {}).get("data") + if body_data is None: + continue + + match mimetype: + case "text/plain": + return self._decode_body(text_part["body"]["data"], text_part.get("headers", [])) + case "text/html": + return self._decode_body(BeautifulSoup(text_part["body"]["data"], 'lxml').get_text(), text_part.get("headers", [])) return json.dumps(text_parts[0], ensure_ascii=False)