From b114f5f4c2a3f80ecf4d5e97cb3a89f955881589 Mon Sep 17 00:00:00 2001 From: Tenzin Topjor <37167893+jungtop@users.noreply.github.com> Date: Mon, 23 Jan 2023 22:36:26 +0530 Subject: [PATCH 1/3] Update pechas.csv --- data/Norbu-Ketaka/pechas.csv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data/Norbu-Ketaka/pechas.csv b/data/Norbu-Ketaka/pechas.csv index 53e99f6c..11725b6e 100644 --- a/data/Norbu-Ketaka/pechas.csv +++ b/data/Norbu-Ketaka/pechas.csv @@ -69,8 +69,7 @@ I11910030,ཀློང་ཆེན་སྙིང་ཐིག་གི་ནང IA52587D4,གསུང་འབུམ། བསོད་ནམས་ལྷུན་གྲུབ།,W4PD2079 IA0236A40,མཛེས་རྩལ་དཔེ་རིས་སྦྲང་མའི་ཤོག་རླབས།,W3CN5303 IAF9DAF3B,རྡོ་རྗེ་རྣལ་འབྱོར་མའི་དཀྱིལ་འཁོར་སྒྲུབ་མཆོད་འགྲོ་ཀུན་མཁའ་སྤྱོད་བགྲོད་པའི་མྱུར་ལམ།,W1KG17576 -I5528147D,,W1K2118 I8656639A,འདུལ་བ་མདོའི་དགོངས་པ་གསལ་བར་བྱེད་པ།,W1KG3397 ID1B20A1C,བསྐང་བ་རྩ་གསུམ་དགྱེས་པའི་མཆོད་སྤྲིན།,W3CN4050 I1283733E,གྲུབ་མཐའ་རྩ་བའི་ཚིག་ཊིཀ,W8LS17297 -I5D3FD391,བླ་མ་ཡང་ཏིག་ཡིད་བཞིན་ནོར་བུ།,W3CN50 \ No newline at end of file +I5D3FD391,བླ་མ་ཡང་ཏིག་ཡིད་བཞིན་ནོར་བུ།,W3CN50 From be34e010ab5db3a36cbf16d3dda34c09a434ca83 Mon Sep 17 00:00:00 2001 From: Tashi Tsering <43548581+ta4tsering@users.noreply.github.com> Date: Fri, 27 Jan 2023 16:36:45 +0530 Subject: [PATCH 2/3] Delete api_key.json --- api_key.json | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 api_key.json diff --git a/api_key.json b/api_key.json deleted file mode 100644 index 12de0dd8..00000000 --- a/api_key.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": "service_account", - "project_id": "buda-nexus-pedurma", - "private_key_id": "a39a19693dfd4aecf26f0b8a6c1d881254ff3fde", - "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCHaXkflNfRwlU0\nl3gtpgo+09o/ALFJG0zksrfNYTXJSbtsgeCBst1G/9+/q7NmMTC+veDovKiVPxFy\n8Qnac8brioimyyqXqobXdSTPDv1xUSvCdsAQ3F4OaSCntzMk0YNJF8zKhrt4O7Rm\nJSH1Al4NiQCdr1nywbgtFR6bb9AlLDFoYLqhYp9nOdNE5/XWKokB9Zelkql0fA0K\nd4SD6DLr1kiuIg+Bun/K/ZibWtVaPXu5NRP2o+T5TRSi5Bw2hlKK4UqTuxgn86h/\n+TRC1DKYyH6b4wUr3og6NyK5C4Oyu1QzoeEIM+XYMNodkM8Hi5oWYKn0Yq7ECyCn\nuCpOD0pdAgMBAAECggEABaTyGUynPVVUH2PT7NUvtqQ2g+tHuIQkjaDcc/liGFCU\nOSblwIo/2nqGbVMfIHqa7Ux1YEYi4cu18rxU3I/4Ye+NmCQ+U4GWass1fFMD4fIP\n+KRiB6Hj4sZjh9IlVdrM1eJh1SxOCqo0cBCKrI+Lc5V7GteJEIseHfT0KGb9/lqc\ndA4kjFn3i+T9PXekG1DJox5oAja+X5GEhmf7GHQSOyW6Lo79F5tJCSEi5hCNvi6I\ntUF5A2kAy30inUYsYyXc3qZIobBfFvNewdCWUM8UkumD7q5BwMPE3yaW9qoKkQy0\nq3BpWG+T3YeEhZDt49HbLroAtiBv08xg9YpONga+UQKBgQC6R/td3j5/w0Wc4YXa\nJLrxUz9QL+zTNV+YslvR97+4zs/+HLl7zJQLxXMu/qjA30EQSXF18qXUkttj1rmh\n1LVXpzzvzCSUtD8ANmmQTiVKhhvSk5WBc8NZeb+O5+miXPUlWAL64vwtNvpWCrjF\nOBlivnMZ/zKL0OiPDE3qWUDcDQKBgQC6F5ny8AczB09D6Q1ihUicxnDTdIvkyiTz\nPQxOEfDjVReY6JehT3u38rAzGWiOkO+LY5CEv16jq65WKYwWmD1zcxoaEtDM2IVi\nGkAu5RD87LxiKxlwmYfCc2RVB2pAce7JeHEF+aX+M5R1TlUsJ/tRXUyRy+TFtPXv\n9ca4iJiDkQKBgC0A4OiPnbfOHE5HacgG4acVvpEH0wY/ucWxKV+zo/zo8fTpSaJw\nDUA+xpIO8gQOq7Bcne2p3ohZpmkbYiOHuPvhKmaggiHu3d2WR7CWQIodljEseI0a\nw5IlSLMkshwIqy0Hc0jbd6MQyi/pTTRQVkm1yyb3sWV35gdRKK26PGItAoGAQi4g\njtg0Bz7DPsiD/RSfdTnRaDIbijsllueQF7ejroVVhgi53CFTQ2DortOLbqA2wKCh\nBcS1GIn2zyepbWOo/wThNIKahS8/iFsDlmnad8iNWnAvdO706/9zoBWqAM9D95yA\nvXpCQTwAwRMhkTmK1C3VDEixzGoFyNfkWz4g1zECgYBlMGoJGvxDTAqGR5NuC1HW\ni3A3l1D0t29CtSZpbTBANaB3KP6tYmWvwnGZxR5edNQHKNs/kDbY/JZOML+UrHF0\nKZpQrsIhmVr+eXdjR9KlIcTdIwlj6zp/hvxp9iHc3TgCiWb/t7Pnqu4ayjBB2tzO\nCtSmIf/eyBbOTIXtHSXCeA==\n-----END PRIVATE KEY-----\n", - "client_email": "buda-nexus@buda-nexus-pedurma.iam.gserviceaccount.com", - "client_id": "116294822522355268336", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/buda-nexus%40buda-nexus-pedurma.iam.gserviceaccount.com" -} \ No newline at end of file From 7e9dda45e0dfd93e2f14c35dbc57f756caea17c0 Mon Sep 17 00:00:00 2001 From: Jampa Dhondup Date: Mon, 6 Feb 2023 16:28:17 +0530 Subject: [PATCH 3/3] fix : download git repo issue #15 --- .gitignore | 1 + script/catalog.py | 48 ++++++++++++++++++++++++++++++++++++++++++ script/catalog_info.py | 15 +++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 script/catalog.py create mode 100644 script/catalog_info.py diff --git a/.gitignore b/.gitignore index 7f0738ad..f89de663 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .vscode/ **/.ipynb_checkpoints +.env/ diff --git a/script/catalog.py b/script/catalog.py new file mode 100644 index 00000000..52b8ccb5 --- /dev/null +++ b/script/catalog.py @@ -0,0 +1,48 @@ +from github import Github +import requests +import os +import csv +import re + + + +def download_csv(repo): + """From the GitHub repository, it will download specific file + + Args: + repo (repo): An object of repository to get access to the content of a file + + Returns: + catalog_csv: Get the content of the file in a http reponse format. + """ + file = repo.get_contents("data/catalog.csv", ref="master") + catalog_csv = requests.get(file.download_url, stream=True) + return catalog_csv + + +def get_repos_in_catalog(catalog): + repos_in_catalog = set() + + data = catalog.content.decode('utf-8') + pechas_list = data.split("\n") + pechas = list(csv.reader(pechas_list, delimiter=",")) + for pecha in pechas[1:-3]: + pecha_id = re.search("\[.+\]", pecha[0])[0][1:-1] + if (pecha_id in repos_in_catalog): + print(f"Pecha is already in catalog {pecha_id}") + else: + repos_in_catalog.add(pecha_id) + return repos_in_catalog + + +def get_existing_pecha(repo): + pass + +if __name__ == "__main__": + token = os.environ.get('GitHubToken') + g = Github(token) + repo = g.get_repo("OpenPecha-Data/catalog") + catalog_csv = download_csv(repo) + repos_in_catalog_set = get_repos_in_catalog(catalog_csv) + existing_pecha_set = get_existing_pecha(repo) + # save(catalog_csv,"catalog.txt") diff --git a/script/catalog_info.py b/script/catalog_info.py new file mode 100644 index 00000000..a3579f95 --- /dev/null +++ b/script/catalog_info.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass, field + +@dataclass(frozen=True) +class catalog_info: + pecha_id : str + title : str + volume : any + author : str + source_id : int + creation_date : str + legacy_id : int + + + + \ No newline at end of file