krishnanlab · ChristopherMancuso · Sep 9, 2025 · Sep 12, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -1,17 +1,19 @@
 name: Tests
 
 on:
-  - workflow_dispatch
-  - push
-  - pull_request
+  workflow_dispatch:
+  pull_request:
+  push:
+    branches:
+      - main
 
 jobs:
   test:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest]
-        python-version: ['3.9', '3.10', '3.11']
+        python-version: ['3.10', '3.11', '3.12', '3.13']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,19 +7,19 @@ repos:
         exclude: ^docs/
 
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v6.0.0
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
         exclude: ^docs/
 
 -   repo: https://github.com/asottile/add-trailing-comma
-    rev: v3.1.0
+    rev: v3.2.0
     hooks:
     -   id: add-trailing-comma
 
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v3.15.0
+    rev: v3.20.0
     hooks:
     -   id: pyupgrade
 

diff --git a/README.md b/README.md
@@ -31,54 +31,15 @@ pip install geneplexus
 
 See `example/example_run.py` for example usage of the API.
 
+For other examples see [Package Documentation](https://pygeneplexus.readthedocs.io).
+
 ### Command-line interface
 
 ```bash
 geneplexus --input_file example/input_genes.txt --output_dir example_result
 ```
 
-Full CLI options (check out with ``geneplexus --help``)
-
-```txt
-Run the GenePlexus pipline on a input gene list.
-
-options:
-  -h, --help            show this help message and exit
-  -i , --input_file     Input gene list (.txt) file. (default: None)
-  -d , --gene_list_delimiter
-                        Delimiter used in the gene list. Use 'newline' if the genes are separated
-                        by new line, and use 'tab' if the genes are seperate by tabs. Other
-                        generic separator are also supported, e.g. ', '. (default: newline)
-  -dd , --data_dir      Directory in which the data are stored, if set to None, then use the
-                        default data directory ~/.data/geneplexus (default: None)
-  -n , --network        Network to use. The choices are: {BioGRID, STRING, IMP} (default: STRING)
-  -f , --feature        Types of feature to use. The choices are: {SixSpeciesN2V} (default:
-                        SixSpeciesN2V)
-  -s1 , --sp_trn        Species of training data The choices are: {Human, Mouse, Fly, Worm,
-                        Zebrafish, Yeast} (default: Human)
-  -s2 , --sp_res        Species of results data The choices are: {Human, Mouse, Fly, Worm,
-                        Zebrafish, Yeast} (default: Mouse)
-  -g1 , --gsc_trn       Geneset collection used to generate negatives. The choices are: {GO,
-                        Monarch, Mondo, Combined} (default: GO)
-  -g2 , --gsc_res       Geneset collection used for model similarities. The choices are: {GO,
-                        Monarch, Mondo, Combined} (default: GO)
-  -s , --small_edgelist_num_nodes
-                        Number of nodes in the small edgelist. (default: 50)
-  -od , --output_dir    Output directory with respect to the repo root directory. (default:
-                        result/)
-  -l , --log_level      Logging level. The choices are: {CRITICAL, ERROR, WARNING, INFO, DEBUG}
-                        (default: INFO)
-  -ad, --auto_download_off
-                        Turns off autodownloader which is on by default. (default: False)
-  -q, --quiet           Suppress log messages (same as setting log_level to CRITICAL). (default:
-                        False)
-  -z, --zip-output      If set, then compress the output directory into a Zip file. (default:
-                        False)
-  --clear-data          Clear data directory and exit. (default: False)
-  --overwrite           Overwrite existing result directory if set. (default: False)
-  --skip-mdl-sim        Skip model similarity computation (default: False)
-  --skip-sm-edgelist    Skip making small edgelist. (default: False)
-```
+Run ``geneplexus --help`` to see full CLI options.
 
 # Dev
 

diff --git a/docs/figures/mainfigure.png b/docs/figures/mainfigure.png
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,7 +1,7 @@
-sphinx==6.2.1  # sphinx-rtd-theme 1.1.1 requires sphinx<6 and >=1.6
-sphinx_rtd_theme==1.2.2
-sphinxcontrib-napoleon==0.7
-sphinx-autodoc-typehints==1.23.0
-sphinx-copybutton==0.5.2
+sphinx>=6.2.1  # sphinx-rtd-theme 1.1.1 requires sphinx<6 and >=1.6
+sphinx_rtd_theme>=1.2.2
+sphinxcontrib-napoleon>=0.7
+sphinx-autodoc-typehints>=1.23.0
+sphinx-copybutton>=0.5.2
 pillow>=6.2.0
 numpy
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -86,3 +86,12 @@
 # so a file named "default.css" will overwrite the builtin "default.css".
 # html_static_path = ['_static']
 html_static_path = []
+
+# don't show properties'
+def skip_all_properties(app, what, name, obj, skip, options):
+    # Skip anything that's a @property
+    if isinstance(obj, property):
+        return True
+    return skip
+def setup(app):
+    app.connect("autodoc-skip-member", skip_all_properties)
diff --git a/docs/source/geneplexus/geneplexus.rst b/docs/source/geneplexus/geneplexus.rst
@@ -4,11 +4,12 @@ geneplexus.geneplexus
 .. autosummary::
    load_genes
    load_negatives
+   cluster_input
    fit
    predict
    make_sim_dfs
    make_small_edgelist
+   save_class
 .. autoclass:: geneplexus.GenePlexus
    :members:
-   :private-members: _load_genes, _convert_to_entrez, _get_pos_and_neg_genes
-   :undoc-members:
+   :undoc-members:
diff --git a/docs/source/notes/api.rst b/docs/source/notes/api.rst
@@ -18,8 +18,7 @@ Manual download
 .. code-block:: python
 
    from geneplexus.download import download_select_data
-   download_select_data("my_data", species = ["Human", "Mouse"]) # download just Human nd Mouse data
-   download_select_data("my_data")  # download all data at once
+   download_select_data(file_loc=None, species=["Human", "Mouse"]) # download just Human nd Mouse data
 
 See :meth:`geneplexus.download.download_select_data` for more information
 
@@ -33,16 +32,16 @@ download necessary data at initialization of the :class:`GenePlexus` object.
 
    from geneplexus import GenePlexus
    gp = GenePlexus(net_type="STRING", features="SixSpeciesN2V",
-                   sp_trn = "Human", sp_res = "Human",
-                   auto_download=True)
+                   sp_trn="Human", sp_res="Human",
+                   file_loc=None, auto_download=True)
 
 .. note::
 
    The default data location is ``~/.data/geneplexus/``. You can change this by
    setting the ``file_loc`` argument of :class:`GenePlexus`.
 
-Run the PyGenePlexus pipeline
------------------------------
+Loading an input gene set
+-------------------------
 
 First, specify the input genes (can have mixed gene ID types, i.e. have any combination of Entrez
 IDs, Gene Symbols, or Ensembl IDs).
@@ -58,22 +57,71 @@ Alternatively, read the gene list from file
    import geneplexus
    input_genes = geneplexus.util.read_gene_list("my_gene_list.txt")
 
+Example running PyGenePlexus pipeline
+-------------------------------------
+
 Next, run the pipline using the :class:`GenePlexus` object.
 
 .. code-block:: python
 
-   # Instantiate GenePlexus class with default parameters
-   gp = geneplexus.GenePlexus()
-
-   # Load input genes and set up positives/negatives for training
-   gp.load_genes(input_genes)
-
-   # Train logistic regression model and get genome-wide gene predictions
-   mdl_weights, df_probs, avgps = gp.fit_and_predict()
-
-   # Optionally, compute model similarity to models pretrained on GO and DisGeNet gene sets
-   df_sim, weights_dict = gp.make_sim_dfs()
-
-   # Optionally, extract the subgraph induced by the top (50 by default) predicted genes
-   df_edge, isolated_genes, df_edge_sym, isolated_genes_sym = gp.make_small_edgelist()
-
+	import geneplexus
+	import json
+	import os.path as osp
+
+	# if you downloaded back end data, we can get a set from there to use as input
+	# find a set that is large enough to cluster
+	# replace path if didn't use deafualt file_loc
+	fp_base = osp.expanduser("~")
+	fp_full = osp.join(fp_base,
+	                   ".data",
+	                   "geneplexus",
+	                   "PreTrainedWeights__Human__Mondo__STRING__SixSpeciesN2V.json",
+	)
+	with open (fp_full, "r") as f:
+	    disease_gene_sets = json.load(f)
+	for aterm in disease_gene_sets:
+	    num_genes = len(disease_gene_sets[aterm]["PosGenes"])
+	    if num_genes > 100:
+	        input_genes = disease_gene_sets[aterm]["PosGenes"]
+	        print(f"Disease chosen is {disease_gene_sets[aterm]['Name']}")
+	        break
+
+	# initialize GenePlexus
+	gp = geneplexus.GenePlexus(file_loc = None,
+	                           net_type = "STRING",
+	                           sp_trn = "Human",
+	                           gsc_trn = "Combined",
+	                           sp_res = ["Human", "Mouse"],
+	                           gsc_res = ["Combined", "Combined"],
+	                           input_genes = input_genes,
+	)
+
+	# do clustering and generate all results
+	gp.cluster_input()
+	gp.fit()
+	gp.predict()
+	gp.make_sim_dfs()
+	gp.make_small_edgelist()
+
+	# get human gene prediction results for full input gene set model
+	print(gp.model_info["All-Genes"].results["Human-Combined"].df_probs)
+	# get similarties of trainied model to other models trained with human annotations
+	print(gp.model_info["All-Genes"].results["Human-Combined"].df_sim)
+	# get network connections for the top 50 human genes predcited using full input gene set model
+	print(gp.model_info["All-Genes"].results["Human-Combined"].df_edge_sym)
+
+	# get mouse gene prediction results for cluster 1 gene set model
+	print(gp.model_info["Cluster-01"].results["Mouse-Combined"].df_probs)
+	# get similarties of trainied model to other models trained with mouse annotations
+	print(gp.model_info["Cluster-01"].results["Mouse-Combined"].df_sim)
+	# get network connections for the top 50 mouse genes predcited using cluster 1 gene set model
+	print(gp.model_info["Cluster-01"].results["Mouse-Combined"].df_edge_sym)
+
+	# get log2(auPRC/prior) metric for the full input gene set model
+	print(gp.model_info["All-Genes"].avgps)
+
+	# save the class. If output_dir=None will try to save to ~/.data/geneplexus_outputs/results
+	gp.save_class(output_dir = None)
+
+- For all items saved in GenePlexus class see :class:`GenePlexus`.
+- For structure of save_class output see :ref:`PyGenePlexus CLI <cli>`.