Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.vscode/
**/.ipynb_checkpoints
.env/
12 changes: 0 additions & 12 deletions api_key.json

This file was deleted.

3 changes: 1 addition & 2 deletions data/Norbu-Ketaka/pechas.csv
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ I11910030,ཀློང་ཆེན་སྙིང་ཐིག་གི་ནང
IA52587D4,གསུང་འབུམ། བསོད་ནམས་ལྷུན་གྲུབ།,W4PD2079
IA0236A40,མཛེས་རྩལ་དཔེ་རིས་སྦྲང་མའི་ཤོག་རླབས།,W3CN5303
IAF9DAF3B,རྡོ་རྗེ་རྣལ་འབྱོར་མའི་དཀྱིལ་འཁོར་སྒྲུབ་མཆོད་འགྲོ་ཀུན་མཁའ་སྤྱོད་བགྲོད་པའི་མྱུར་ལམ།,W1KG17576
I5528147D,,W1K2118
I8656639A,འདུལ་བ་མདོའི་དགོངས་པ་གསལ་བར་བྱེད་པ།,W1KG3397
ID1B20A1C,བསྐང་བ་རྩ་གསུམ་དགྱེས་པའི་མཆོད་སྤྲིན།,W3CN4050
I1283733E,གྲུབ་མཐའ་རྩ་བའི་ཚིག་ཊིཀ,W8LS17297
I5D3FD391,བླ་མ་ཡང་ཏིག་ཡིད་བཞིན་ནོར་བུ།,W3CN50
I5D3FD391,བླ་མ་ཡང་ཏིག་ཡིད་བཞིན་ནོར་བུ།,W3CN50
48 changes: 48 additions & 0 deletions script/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from github import Github
import requests
import os
import csv
import re



def download_csv(repo):
"""From the GitHub repository, it will download specific file

Args:
repo (repo): An object of repository to get access to the content of a file

Returns:
catalog_csv: Get the content of the file in a http reponse format.
"""
file = repo.get_contents("data/catalog.csv", ref="master")
catalog_csv = requests.get(file.download_url, stream=True)
return catalog_csv


def get_repos_in_catalog(catalog):
repos_in_catalog = set()

data = catalog.content.decode('utf-8')
pechas_list = data.split("\n")
pechas = list(csv.reader(pechas_list, delimiter=","))
for pecha in pechas[1:-3]:
pecha_id = re.search("\[.+\]", pecha[0])[0][1:-1]
if (pecha_id in repos_in_catalog):
print(f"Pecha is already in catalog {pecha_id}")
else:
repos_in_catalog.add(pecha_id)
return repos_in_catalog


def get_existing_pecha(repo):
pass

if __name__ == "__main__":
token = os.environ.get('GitHubToken')
g = Github(token)
repo = g.get_repo("OpenPecha-Data/catalog")
catalog_csv = download_csv(repo)
repos_in_catalog_set = get_repos_in_catalog(catalog_csv)
existing_pecha_set = get_existing_pecha(repo)
# save(catalog_csv,"catalog.txt")
15 changes: 15 additions & 0 deletions script/catalog_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dataclasses import dataclass, field

@dataclass(frozen=True)
class catalog_info:
pecha_id : str
title : str
volume : any
author : str
source_id : int
creation_date : str
legacy_id : int