Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ba25546
refactor: begin
tristan-f-r Jul 30, 2025
c80c561
updated documentation
AMINOexe Jul 30, 2025
a38177b
chore: remove other readme
tristan-f-r Jul 30, 2025
e29e737
fix: use correct file names
tristan-f-r Jul 30, 2025
70500b3
fix: more file path changes
tristan-f-r Jul 30, 2025
d1d3cf7
fix: typo in prize 060
tristan-f-r Jul 30, 2025
d72db21
docs: correct exceptions to copied code
tristan-f-r Jul 31, 2025
e07d6d4
docs: be clearer about paper
tristan-f-r Aug 8, 2025
97d3cfa
remove redundant docs cmt
tristan-f-r Aug 8, 2025
494510b
Merge branch 'main' into hiv-clean
tristan-f-r Jan 22, 2026
23cfcfa
refactor: move hiv to new fetching system
tristan-f-r Jan 22, 2026
0108037
feat: better name mapping
tristan-f-r Jan 23, 2026
2592dbd
chore: drop databases, clean hiv
tristan-f-r Jan 23, 2026
5959e71
docs: cleanup, add Bio
tristan-f-r Jan 23, 2026
c8fa928
chore: clean rest
tristan-f-r Jan 23, 2026
2ee9cd1
docs: more concrete
tristan-f-r Jan 23, 2026
e268654
fix: correct prize file naming
tristan-f-r Jan 23, 2026
667ddf5
fix: correct raw location for depmap
tristan-f-r Jan 23, 2026
36e607d
ci: cache cache
tristan-f-r Jan 23, 2026
9603eac
Merge branch 'main' into hiv-clean
tristan-f-r Jan 23, 2026
b3c43f1
drop kegg orthology
tristan-f-r Jan 23, 2026
b815b47
ci: bump cache, set id
tristan-f-r Jan 23, 2026
45aa4ec
chore: drop bio, bioservices
tristan-f-r Jan 23, 2026
463ccd2
mv to old
tristan-f-r Jan 23, 2026
fdd56ab
add nl, irefindex
tristan-f-r Jan 23, 2026
9363e9b
ci: cache when artifacts made
tristan-f-r Jan 23, 2026
c49d247
fix: use correct dataset paths
tristan-f-r Jan 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,20 @@ jobs:
- name: Log conda environment
shell: bash --login {0}
run: conda list
- name: Fetch Artifact Cache
id: fetch-cache
uses: actions/cache/restore@v4
with:
path: cache/artifacts
key: cache-artifacts
- name: Process raw data through Snakemake
run: sh run_snakemake.sh
- name: Cache Artifact Cache
id: cache-cache
uses: actions/cache/save@v4
with:
path: cache/artifacts
key: cache-artifacts
- name: Run Snakemake workflow for DMMMs
shell: bash --login {0}
run: snakemake --cores 4 --configfile configs/dmmm.yaml --show-failed-logs -s spras/Snakefile
Expand Down
7 changes: 6 additions & 1 deletion cache/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# cache

Handles artifact fetching and cache. This folder has a `Snakefile` which only contains a single function used for producing fetching rules.
Handles artifact fetching and cache. This folder has:

- `Snakefile` which only contains a function used for producing fetching rules.
- `directory.py`, the actual location of file URLs and their cached counterparts.
- `cli.py`, a utility for manually fetching specific URLs from `directory.py`.
- `util.py`, an internal file for use by the other files above.
29 changes: 29 additions & 0 deletions cache/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Downloads the online variants of cache items.

This may be expanded in the future, so only depend on this file as a debugging utility.

For example, `python cache/cli.py KEGG/ko03250.xml ko03250.xm` allows running the KEGG query
for ko03250.xml, which can not be normally accessed automatically in the browser.
"""

import argparse
from cache.directory import get_cache_item

def parse_args():
parser = argparse.ArgumentParser(
prog='Cache',
description='CLI utility for directory.py')
parser.add_argument('path')
parser.add_argument('output')

return parser.parse_args()

def main():
args = parse_args()
cache_item = get_cache_item(args.path.split("/"))

cache_item.download_online(args.output)

if __name__ == "__main__":
main()
35 changes: 32 additions & 3 deletions cache/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Union
from os import PathLike
from tempfile import NamedTemporaryFile
from typing import Optional
import urllib.request
import filecmp
import urllib.parse
Expand All @@ -24,18 +25,34 @@ def fetch_biomart_url(xml: str) -> str:

@dataclass
class CacheItem:
"""Class for differentriating between offline and online items in a cache."""
"""
Class for differentriating between offline and online items in a cache.

NOTE: If cached is "", we assume that online is a Google Drive URL (for cases where there is no
remaining online data source.)
"""

name: str
"""The display name of the artifact, used for human-printing."""
cached: str
online: str
online_headers: Optional[list[tuple[str, str]]] = None

@classmethod
def cache_only(cls, name: str, cached: str) -> "CacheItem":
"""Wrapper method to explicitly declare a CacheItem as cached only."""
return cls(name=name, online=cached, cached="")

def download_online(self, output: str | PathLike):
# https://stackoverflow.com/a/45313194/7589775: this is to add optional headers to requests.
# We remove the opener at the end by re-installing the default opener.
opener = urllib.request.build_opener()
if self.online_headers:
opener.addheaders = self.online_headers
urllib.request.install_opener(opener)
urllib.request.urlretrieve(self.online, output)
urllib.request.install_opener(urllib.request.build_opener())

def download(self, output: str | PathLike):
print(f"Fetching {self.name}...")
print(f"Downloading {self.online}...")
Expand All @@ -46,7 +63,7 @@ def download(self, output: str | PathLike):
gdown.download(self.online, str(output))
return

urllib.request.urlretrieve(self.online, output)
self.download_online(output)

with NamedTemporaryFile() as cached_file:
print(f"Downloading cache {self.cached}...")
Expand Down Expand Up @@ -75,13 +92,16 @@ def download(self, output: str | PathLike):
},
"UniProt": {
# We use FTP when possible, but we delegate to the UniProt REST API in cases that would save significant bandwidth.
# See https://ftp.uniprot.org/pub/databases/uniprot/current_release/README for the FTP README.
"9606": {
# We prefer manually curated genes.
# We prefer manually curated, or SwissProt, genes. This URL selects these genes using the REST API.
"SwissProt_9606.tsv": CacheItem(
name="UniProt 9606 SwissProt genes",
cached="https://drive.google.com/uc?id=1h2Cl-60qcKse-djcsqlRXm_n60mVY7lk",
online="https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Cid%2Cprotein_name%2Cgene_names&format=tsv&query=%28*%29+AND+%28reviewed%3Atrue%29+AND+%28model_organism%3A9606%29",
),
# idmapping FTP files. See the associated README:
# https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/README
"HUMAN_9606_idmapping_selected.tab.gz": CacheItem(
name="UniProt 9606 ID external database mapping",
cached="https://drive.google.com/uc?id=1Oysa5COq31H771rVeyrs-6KFhE3VJqoX",
Expand Down Expand Up @@ -152,6 +172,15 @@ def download(self, output: str | PathLike):
online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.104%2FOmicsCNGeneWGS.csv&dl_name=OmicsCNGeneWGS.csv&bucket=depmap-external-downloads",
),
},
"iRefIndex": {
# This can also be obtained from the SPRAS repo
# (https://github.com/Reed-CompBio/spras/blob/b5d7a2499afa8eab14c60ce0f99fa7e8a23a2c64/input/phosphosite-irefindex13.0-uniprot.txt).
# iRefIndex has been down for quite some time, so this is only from the cache.
"phosphosite-irefindex13.0-uniprot.txt": CacheItem.cache_only(
name="iRefIndex v13.0 UniProt interactome",
cached="https://drive.google.com/uc?id=1fQ8Z3FjEwUseEtsExO723zj7mAAtdomo"
)
},
"OsmoticStress": {
"yeast_pcsf_network.sif": CacheItem.cache_only(
# In the paper https://doi.org/10.1016/j.celrep.2018.08.085
Expand Down
6 changes: 3 additions & 3 deletions configs/dmmm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ datasets:
# HIV: https://github.com/Reed-CompBio/spras-benchmarking/blob/0293ae4dc0be59502fac06b42cfd9796a4b4413e/hiv-benchmarking/spras-config/config.yaml
- label: dmmmhiv_060
node_files: ["processed_prize_060.txt"]
edge_files: ["../../../databases/irefindex/phosphosite-irefindex13.0-uniprot.txt"]
edge_files: ["../raw/phosphosite-irefindex13.0-uniprot.txt"]
other_files: []
data_dir: "datasets/hiv/processed"
- label: dmmmhiv_05
node_files: ["processed_prize_05.txt"]
edge_files: ["../../../databases/irefindex/phosphosite-irefindex13.0-uniprot.txt"]
edge_files: ["../raw/phosphosite-irefindex13.0-uniprot.txt"]
other_files: []
data_dir: "datasets/hiv/processed"
# Yeast: https://github.com/tristan-f-r/spras-benchmarking/blob/9477d85871024a5e3a4b0b8b9be7e78c0d0ee961/yeast-osmotic-stress/config.yaml
Expand All @@ -74,7 +74,7 @@ datasets:
other_files: []
- label: dmmmdepmap_cellline_fadu
data_dir: datasets/depmap
edge_files: ["../../databases/irefindex/phosphosite-irefindex13.0-uniprot.txt"]
edge_files: ["raw/phosphosite-irefindex13.0-uniprot.txt"]
node_files: ["processed/FADU_cell_line_prizes_input_nonzero.txt"]
other_files: []
gold_standards:
Expand Down
1 change: 0 additions & 1 deletion databases/.gitignore

This file was deleted.

1 change: 0 additions & 1 deletion databases/irefindex/README.md

This file was deleted.

Loading
Loading