From 74e3080e6752cdb316d0c5d460e584b9da081de0 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Thu, 21 Aug 2025 19:55:58 +0000 Subject: [PATCH] fix: never parse index_col from node data --- spras/dataset.py | 2 +- .../fixtures/toy-372/input-interactome.txt | 2 ++ test/dataset/fixtures/toy-372/input-nodes.txt | 3 +++ test/dataset/test_dataset.py | 22 +++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/dataset/fixtures/toy-372/input-interactome.txt create mode 100644 test/dataset/fixtures/toy-372/input-nodes.txt diff --git a/spras/dataset.py b/spras/dataset.py index 891f4d6f9..c2271235c 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -121,7 +121,7 @@ def load_files_from_dict(self, dataset_dict: DatasetDict): # Load generic node tables self.node_table = pd.DataFrame(node_set, columns=[self.NODE_ID]) for node_file in node_data_files: - single_node_table = pd.read_table(os.path.join(data_loc, node_file)) + single_node_table = pd.read_table(os.path.join(data_loc, node_file), index_col=False) # If we have only 1 column, assume this is an indicator variable if len(single_node_table.columns) == 1: single_node_table = pd.read_table( diff --git a/test/dataset/fixtures/toy-372/input-interactome.txt b/test/dataset/fixtures/toy-372/input-interactome.txt new file mode 100644 index 000000000..f252ca4ca --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-interactome.txt @@ -0,0 +1,2 @@ +C D 0.77 U +N O 0.66 U \ No newline at end of file diff --git a/test/dataset/fixtures/toy-372/input-nodes.txt b/test/dataset/fixtures/toy-372/input-nodes.txt new file mode 100644 index 000000000..2efa6f320 --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-nodes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy sources targets +N +C 5.7 True True diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 4cb988632..52333ca91 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -2,6 +2,7 @@ import pandas import pytest +import numpy as np from spras.dataset import Dataset @@ -58,3 +59,24 @@ def test_standard(self): }) assert len(dataset.get_interactome()) == 2 + + # 372 is a PR, but for the relevant comment, see + # https://github.com/Reed-CompBio/spras/pull/372/files#r2291953612. + # Note that the input-nodes file has more tabs than the original fixture. + def test_372(self): + dataset = Dataset({ + 'label': 'toy-372', + 'edge_files': ['input-interactome.txt'], + 'node_files': ['input-nodes.txt'], + 'data_dir': FIXTURES_PATH / 'toy-372', + 'other_files': [] + }) + + node_table = dataset.node_table + assert node_table is not None + + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['prize'] == 5.7 + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['active'] == True + + assert np.isnan(node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['sources']) + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['targets'] == True