From 4e5723ee2da2cf87683433018293b199cc01ed89 Mon Sep 17 00:00:00 2001 From: Austin Richardson Date: Wed, 14 Jan 2026 20:18:51 -0800 Subject: [PATCH] Add type stub --- taxonomy.pyi | 172 +++++++++++++++++++++++++++++++++++++++++++++++++ test_python.py | 94 +++++++++++++++------------ 2 files changed, 225 insertions(+), 41 deletions(-) create mode 100644 taxonomy.pyi diff --git a/taxonomy.pyi b/taxonomy.pyi new file mode 100644 index 0000000..fa8a927 --- /dev/null +++ b/taxonomy.pyi @@ -0,0 +1,172 @@ +from typing import Any, List, Optional, Tuple, Iterator + +class TaxonomyError(Exception): + """Raised when an error occurs in the taxonomy library.""" + + ... + +class TaxonomyNode: + """The data returned when looking up a taxonomy by id or by name""" + + id: str + name: str + parent: Optional[str] + rank: str + + def __hash__(self) -> int: ... + def __repr__(self) -> str: ... + def __getitem__(self, key: str) -> Any: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + +class Taxonomy: + """ + The Taxonomy object provides the primary interface for exploring a + biological taxonomy. + """ + @property + def root(self) -> TaxonomyNode: ... + @classmethod + def from_gtdb(cls, value: str) -> "Taxonomy": + """Load a Taxonomy from a GTDB-encoded string.""" + ... + + @classmethod + def from_json(cls, value: str, json_pointer: Optional[str] = None) -> "Taxonomy": + """ + Load a Taxonomy from a JSON-encoded string. The format can either be + of the tree or node_link_data types and will be automatically detected. + If `path` is specified, the JSON will be traversed to that sub-object + before being parsed as a taxonomy. `path` has to be a valid JSON path string. + """ + ... + + @classmethod + def from_newick(cls, value: str) -> "Taxonomy": + """Load a Taxonomy from a Newick-encoded string.""" + ... + + @classmethod + def from_ncbi(cls, dump_dir: str) -> "Taxonomy": + """ + Load a Taxonomy from a directory. + The directory must contain the `nodes.dmp` and `names.dmp` files. + """ + ... + + @classmethod + def from_phyloxml(cls, value: str) -> "Taxonomy": + """Load a Taxonomy from a PhyloXML-encoded string. Experimental.""" + ... + + def clone(self) -> "Taxonomy": + """Clone the current taxonomy""" + ... + + def to_json_tree(self) -> bytes: + """Export a Taxonomy as a JSON-encoded byte string in a tree format""" + ... + + def to_json_node_links(self) -> bytes: + """Export a Taxonomy as a JSON-encoded byte string in a node link format""" + ... + + def to_newick(self) -> bytes: + """Export a Taxonomy as a Newick-encoded byte string.""" + ... + + def node(self, tax_id: str) -> Optional[TaxonomyNode]: + """Find a node by its id. Returns `None` if not found""" + ... + + def find_all_by_name(self, name: str) -> List[TaxonomyNode]: + """Find a node by its name, Raises an exception if not found.""" + ... + + def parent_with_distance( + self, tax_id: str, at_rank: Optional[str] = None + ) -> Tuple[Optional[TaxonomyNode], Optional[float]]: + """ + Return the immediate parent taxonomy node of the node id provided and the distance to it. + If `at_rank` is provided, scan all the nodes in the node's lineage and return + the parent id at that rank. + """ + ... + + def parent(self, tax_id: str, at_rank: Optional[str] = None) -> Optional[TaxonomyNode]: + """ + Return the immediate parent taxonomy node of the node id provided. + If `at_rank` is provided, scan all the nodes in the node's lineage and return + the parent id at that rank. + """ + ... + + def children(self, tax_id: str) -> List[TaxonomyNode]: + """Return a list of direct child taxonomy nodes from the node id provided.""" + ... + + def descendants(self, tax_id: str) -> List[TaxonomyNode]: + """Return a list of all child taxonomy nodes from the node id provided.""" + ... + + def lineage(self, tax_id: str) -> List[TaxonomyNode]: + """ + Return a list of all the parent taxonomy nodes of the node id provided + (including that node itself). + """ + ... + + def internal_index(self, tax_id: str) -> int: + """Return the internal integer ID generated by the taxonomy library""" + ... + + def parents(self, tax_id: str) -> List[TaxonomyNode]: + """ + Return a list of all the parent taxonomy nodes of the node id provided. + It is equivalent to `lineage` except it doesn't include itself + """ + ... + + def lca(self, id1: str, id2: str) -> Optional[TaxonomyNode]: + """Return the lowest common ancestor of two taxonomy nodes.""" + ... + + def prune( + self, keep: Optional[List[str]] = None, remove: Optional[List[str]] = None + ) -> "Taxonomy": + """ + Return a copy of the taxonomy containing: + - only the nodes in `keep` and their parents if provided + - all of the nodes except those in remove and their children if provided + """ + ... + + def remove_node(self, tax_id: str) -> None: + """Remove the node from the tree.""" + ... + + def add_node(self, parent_id: str, tax_id: str, name: str, rank: str) -> None: + """Add a new node to the tree at the parent provided.""" + ... + + def edit_node( + self, + tax_id: str, + name: Optional[str] = None, + rank: Optional[str] = None, + parent_id: Optional[str] = None, + parent_distance: Optional[float] = None, + ) -> None: + """Edit properties on a taxonomy node.""" + ... + + def __repr__(self) -> str: ... + def __len__(self) -> int: ... + def __getitem__(self, tax_id: str) -> TaxonomyNode: ... + def __delitem__(self, tax_id: str) -> None: ... + def __contains__(self, tax_id: str) -> bool: ... + def __iter__(self) -> Iterator[str]: ... + +class TaxonomyIterator: + def __next__(self) -> Optional[str]: ... + def __iter__(self) -> "TaxonomyIterator": ... diff --git a/test_python.py b/test_python.py index fc1145d..c032d76 100644 --- a/test_python.py +++ b/test_python.py @@ -121,6 +121,7 @@ ] } """ + @pytest.fixture def json_tax(): return Taxonomy.from_json(JSON_DATA) @@ -142,18 +143,18 @@ def gtdb_tax(): return Taxonomy.from_gtdb(file.read()) -def test_json_internal_index(json_tax): +def test_json_internal_index(json_tax: Taxonomy): assert [ json_tax.internal_index(x) for x in ["1", "9", "2", "11", "8", "5", "3", "4", "6", "7", "10"] ] == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] -def test_json_find_all_by_name(json_tax): +def test_json_find_all_by_name(json_tax: Taxonomy): assert sorted([n.id for n in json_tax.find_all_by_name("species 1.1")]) == ["10", "12"] -def test_json_edit_node_parent_updates_children(json_tax): +def test_json_edit_node_parent_updates_children(json_tax: Taxonomy): assert json_tax["5"].parent == "4" json_tax.edit_node("5", parent_id="1") node = json_tax["5"] @@ -162,14 +163,14 @@ def test_json_edit_node_parent_updates_children(json_tax): assert "5" in {n.id for n in json_tax.children("1")} -def test_json_prune_works_after_editing_tree(json_tax): +def test_json_prune_works_after_editing_tree(json_tax: Taxonomy): tax = json_tax.clone() tax.edit_node("5", parent_id="1") pruned = tax.prune(keep=["5"]) assert pruned["5"].parent == "1" -def test_json_to_json_tree(json_tax): +def test_json_to_json_tree(json_tax: Taxonomy): small_tax = json_tax.prune(remove=[str(i) for i in range(3, 12)]) actual = json.loads(small_tax.to_json_tree()) expected = { @@ -207,19 +208,21 @@ def test_json_to_json_node_links_empty_tree(json_tax): assert actual == expected -def test_newick_root(newick_tax): +def test_newick_root(newick_tax: Taxonomy): root = newick_tax.root assert root.id == "F" assert root.parent is None -def test_newick_find_node_by_id(newick_tax): +def test_newick_find_node_by_id(newick_tax: Taxonomy): node = newick_tax.node("A") assert node == newick_tax.node("A") + assert node is not None assert node.id == "A" assert node.parent == "F" node = newick_tax.node("D") + assert node is not None assert node.id == "D" assert node.parent == "E" @@ -227,7 +230,7 @@ def test_newick_find_node_by_id(newick_tax): assert node is None -def test_newick_index(newick_tax): +def test_newick_index(newick_tax: Taxonomy): node = newick_tax["A"] assert node.id == "A" assert node.parent == "F" @@ -236,30 +239,33 @@ def test_newick_index(newick_tax): _ = newick_tax["unknown"] -def test_newick_find_all_by_name(newick_tax): +def test_newick_find_all_by_name(newick_tax: Taxonomy): nodes = newick_tax.find_all_by_name("A") assert nodes == [] -def test_newick_parent(newick_tax): +def test_newick_parent(newick_tax: Taxonomy): parent = newick_tax.parent("D") + assert parent is not None assert parent.id == "E" -def test_newick_parent_with_distance(newick_tax): +def test_newick_parent_with_distance(newick_tax: Taxonomy): parent, distance = newick_tax.parent_with_distance("D") + assert parent is not None + assert distance is not None assert parent.id == "E" assert abs(distance - 0.4) < 1e-6 -def test_newick_children(newick_tax): +def test_newick_children(newick_tax: Taxonomy): children = newick_tax.children("E") assert len(children) == 2 assert children[0].id == "C" assert children[1].id == "D" -def test_newick_lineage(newick_tax): +def test_newick_lineage(newick_tax: Taxonomy): lineage = newick_tax.lineage("D") assert len(lineage) == 3 assert lineage[0].id == "D" @@ -267,19 +273,20 @@ def test_newick_lineage(newick_tax): assert lineage[2].id == "F" -def test_newick_parents(newick_tax): +def test_newick_parents(newick_tax: Taxonomy): lineage = newick_tax.parents("D") assert len(lineage) == 2 assert lineage[0].id == "E" assert lineage[1].id == "F" -def test_newick_lca(newick_tax): +def test_newick_lca(newick_tax: Taxonomy): lca = newick_tax.lca("A", "D") + assert lca is not None assert lca.id == "F" -def test_newick_prune(newick_tax): +def test_newick_prune(newick_tax: Taxonomy): new_tax = newick_tax.prune(remove=["E"]) assert new_tax.node("D") is None assert new_tax.node("E") is None @@ -290,14 +297,14 @@ def test_newick_prune(newick_tax): assert new_tax.node("F") is not None -def test_newick_remove(newick_tax): +def test_newick_remove(newick_tax: Taxonomy): newick_tax.remove_node("E") assert newick_tax.node("D") is not None assert newick_tax.node("E") is None assert len(newick_tax) == 5 -def test_newick_add(newick_tax): +def test_newick_add(newick_tax: Taxonomy): newick_tax.add_node("D", "G", "something", "species") node = newick_tax["G"] assert node.parent == "D" @@ -307,13 +314,13 @@ def test_newick_add(newick_tax): assert node.parent == "G" -def test_newick_edit_node(newick_tax): +def test_newick_edit_node(newick_tax: Taxonomy): newick_tax.edit_node("D", parent_distance=3) - node, distance = newick_tax.parent_with_distance("D") + _, distance = newick_tax.parent_with_distance("D") assert distance == 3 -def test_newick_can_clone(newick_tax): +def test_newick_can_clone(newick_tax: Taxonomy): tax2 = newick_tax.clone() newick_tax.remove_node("E") @@ -326,20 +333,21 @@ def test_newick_can_clone(newick_tax): assert len(tax2) == 6 -def test_newick_output_uses_tax_ids(newick_tax): +def test_newick_output_uses_tax_ids(newick_tax: Taxonomy): res = newick_tax.to_newick().decode("utf-8") for tax_id in ["A", "B", "C", "D", "E", "F"]: assert tax_id in res -def test_ncbi_root(ncbi_tax): +def test_ncbi_root(ncbi_tax: Taxonomy): root = ncbi_tax.root assert root.id == "1" assert root.parent is None -def test_ncbi_find_node_by_id(ncbi_tax): +def test_ncbi_find_node_by_id(ncbi_tax: Taxonomy): node = ncbi_tax.node("1236") + assert node is not None assert node.id == "1236" assert node.name == "Gammaproteobacteria" assert node.parent == "1224" @@ -348,7 +356,7 @@ def test_ncbi_find_node_by_id(ncbi_tax): assert node is None -def test_ncbi_index(ncbi_tax): +def test_ncbi_index(ncbi_tax: Taxonomy): node = ncbi_tax["1236"] assert node.id == "1236" assert node.name == "Gammaproteobacteria" @@ -358,31 +366,34 @@ def test_ncbi_index(ncbi_tax): _ = ncbi_tax["unknown"] -def test_ncbi_find_all_by_name(ncbi_tax): +def test_ncbi_find_all_by_name(ncbi_tax: Taxonomy): nodes = ncbi_tax.find_all_by_name("Escherichia coli") assert [n.id for n in nodes] == ["562"] assert [n.name for n in nodes] == ["Escherichia coli"] assert [n.parent for n in nodes] == ["561"] -def test_ncbi_parent(ncbi_tax): +def test_ncbi_parent(ncbi_tax: Taxonomy): parent = ncbi_tax.parent("562") + assert parent is not None assert parent.id == "561" -def test_ncbi_parent_with_distance(ncbi_tax): +def test_ncbi_parent_with_distance(ncbi_tax: Taxonomy): parent, distance = ncbi_tax.parent_with_distance("562") + assert parent is not None + assert distance is not None assert parent.id == "561" assert abs(distance - 1.0) < 1e-6 -def test_ncbi_children(ncbi_tax): +def test_ncbi_children(ncbi_tax: Taxonomy): children = ncbi_tax.children("561") assert len(children) == 1 assert children[0].id == "562" -def test_ncbi_lineage(ncbi_tax): +def test_ncbi_lineage(ncbi_tax: Taxonomy): lineage = ncbi_tax.lineage("562") assert len(lineage) == 9 assert lineage[0].id == "562" @@ -390,19 +401,20 @@ def test_ncbi_lineage(ncbi_tax): assert lineage[-1].id == "1" -def test_ncbi_parents(ncbi_tax): +def test_ncbi_parents(ncbi_tax: Taxonomy): lineage = ncbi_tax.parents("562") assert len(lineage) == 8 assert lineage[0].id == "561" assert lineage[-1].id == "1" -def test_ncbi_lca(ncbi_tax): +def test_ncbi_lca(ncbi_tax: Taxonomy): lca = ncbi_tax.lca("562", "91347") + assert lca is not None assert lca.id == "91347" -def test_ncbi_prune(ncbi_tax): +def test_ncbi_prune(ncbi_tax: Taxonomy): new_tax = ncbi_tax.prune(remove=["561"]) assert new_tax.node("561") is None assert new_tax.node("562") is None @@ -422,7 +434,7 @@ def test_ncbi_remove(): assert len(tax) == 8 -def test_ncbi_add(ncbi_tax): +def test_ncbi_add(): tax = Taxonomy.from_ncbi("tests/data/") tax.add_node("561", "563", "Listeria", "species") node = tax["563"] @@ -437,7 +449,7 @@ def test_ncbi_add(ncbi_tax): assert node.rank == "genus" -def test_ncbi_cannot_add_duplicate_tax_id(ncbi_tax): +def test_ncbi_cannot_add_duplicate_tax_id(): tax = Taxonomy.from_ncbi("tests/data/") tax.add_node("561", "563", "Listeria", "species") @@ -446,35 +458,35 @@ def test_ncbi_cannot_add_duplicate_tax_id(ncbi_tax): assert "563" in str(context.value) -def test_ncbi_edit_node(ncbi_tax): +def test_ncbi_edit_node(): tax = Taxonomy.from_ncbi("tests/data/") tax.edit_node("562", parent_distance=3) - node, distance = tax.parent_with_distance("562") + _, distance = tax.parent_with_distance("562") assert distance == 3 -def test_ncbi_edit_node_parent(ncbi_tax): +def test_ncbi_edit_node_parent(): tax = Taxonomy.from_ncbi("tests/data/") assert tax["562"].parent == "561" tax.edit_node("562", parent_id="1") assert tax["562"].parent == "1" -def test_ncbi_repr(ncbi_tax): +def test_ncbi_repr(): tax = Taxonomy.from_ncbi("tests/data/") assert ( tax["562"].__repr__() == '' ) -def test_gtdb_root(gtdb_tax): +def test_gtdb_root(gtdb_tax: Taxonomy): root = gtdb_tax.root assert root.id == "d__Bacteria" assert root.rank == "domain" assert root.parent is None -def test_gtdb_lineage(gtdb_tax): +def test_gtdb_lineage(gtdb_tax: Taxonomy): assert [n.id for n in gtdb_tax.lineage("d__Bacteria")] == ["d__Bacteria"] assert [n.id for n in gtdb_tax.lineage("c__Bacilli")] == [ "c__Bacilli",