OpenPecha-Data · Zakongjampa · Jan 23, 2023 · Jan 27, 2023 · Feb 6, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 .vscode/
 **/.ipynb_checkpoints
+.env/
diff --git a/api_key.json b/api_key.json
diff --git a/data/Norbu-Ketaka/pechas.csv b/data/Norbu-Ketaka/pechas.csv
@@ -69,8 +69,7 @@ I11910030,ཀློང་ཆེན་སྙིང་ཐིག་གི་ནང
 IA52587D4,གསུང་འབུམ། བསོད་ནམས་ལྷུན་གྲུབ།,W4PD2079
 IA0236A40,མཛེས་རྩལ་དཔེ་རིས་སྦྲང་མའི་ཤོག་རླབས།,W3CN5303
 IAF9DAF3B,རྡོ་རྗེ་རྣལ་འབྱོར་མའི་དཀྱིལ་འཁོར་སྒྲུབ་མཆོད་འགྲོ་ཀུན་མཁའ་སྤྱོད་བགྲོད་པའི་མྱུར་ལམ།,W1KG17576
-I5528147D,,W1K2118
 I8656639A,འདུལ་བ་མདོའི་དགོངས་པ་གསལ་བར་བྱེད་པ།,W1KG3397
 ID1B20A1C,བསྐང་བ་རྩ་གསུམ་དགྱེས་པའི་མཆོད་སྤྲིན།,W3CN4050
 I1283733E,གྲུབ་མཐའ་རྩ་བའི་ཚིག་ཊིཀ,W8LS17297
-I5D3FD391,བླ་མ་ཡང་ཏིག་ཡིད་བཞིན་ནོར་བུ།,W3CN50
+I5D3FD391,བླ་མ་ཡང་ཏིག་ཡིད་བཞིན་ནོར་བུ།,W3CN50
diff --git a/script/catalog.py b/script/catalog.py
@@ -0,0 +1,48 @@
+from github import Github
+import requests
+import os
+import csv
+import re
+
+
+
+def download_csv(repo):
+    """From the GitHub repository, it will download specific file
+
+    Args:
+        repo (repo): An object of repository to get access to the content of a file
+
+    Returns:
+        catalog_csv: Get the content of the file in a http reponse format. 
+    """
+    file = repo.get_contents("data/catalog.csv", ref="master")
+    catalog_csv = requests.get(file.download_url, stream=True)
+    return catalog_csv
+
+
+def get_repos_in_catalog(catalog):
+    repos_in_catalog = set()  
+
+    data = catalog.content.decode('utf-8')
+    pechas_list = data.split("\n")
+    pechas = list(csv.reader(pechas_list, delimiter=","))
+    for pecha in pechas[1:-3]:
+        pecha_id = re.search("\[.+\]", pecha[0])[0][1:-1]
+        if (pecha_id in repos_in_catalog):
+            print(f"Pecha is already in catalog {pecha_id}")
+        else:
+            repos_in_catalog.add(pecha_id)
+    return repos_in_catalog
+
+
+def get_existing_pecha(repo):
+    pass
+
+if __name__ == "__main__":
+    token = os.environ.get('GitHubToken')
+    g = Github(token)
+    repo = g.get_repo("OpenPecha-Data/catalog")
+    catalog_csv = download_csv(repo)
+    repos_in_catalog_set = get_repos_in_catalog(catalog_csv)
+    existing_pecha_set = get_existing_pecha(repo)
+    # save(catalog_csv,"catalog.txt")
diff --git a/script/catalog_info.py b/script/catalog_info.py
@@ -0,0 +1,15 @@
+from dataclasses import dataclass, field
+
+@dataclass(frozen=True)
+class catalog_info:
+    pecha_id : str
+    title : str 
+    volume : any
+    author : str 
+    source_id : int 
+    creation_date : str
+    legacy_id : int
+
+
+
+