From 8af5e24ee22278e5ad7af395cea77351276e58b4 Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Tue, 2 Dec 2025 19:52:50 +0100 Subject: [PATCH 1/2] add graphml import/export --- graphconstructor/graph.py | 131 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/graphconstructor/graph.py b/graphconstructor/graph.py index dcbd85e..a6a4d5e 100644 --- a/graphconstructor/graph.py +++ b/graphconstructor/graph.py @@ -212,6 +212,107 @@ def from_edges( keep_explicit_zeros=keep_explicit_zeros, ) + @classmethod + def from_graphml( + cls, + path, + *, + default_mode: str = "similarity", + ) -> "Graph": + """ + Load a Graph from a GraphML file. + + Parameters + ---------- + path + File path or file-like object accepted by networkx.read_graphml. + default_mode + Fallback value for the Graph's mode if it is not stored in the + GraphML file. Must be either 'distance' or 'similarity'. + + Notes + ----- + - Uses networkx.read_graphml under the hood. + - Edge weights are taken from the 'weight' attribute, if present. + - Node attributes become columns of the metadata DataFrame. + - Graph-level attributes 'mode', 'directed', 'weighted', + 'ignore_selfloops', and 'keep_explicit_zeros' are honored if present. + """ + try: + import networkx as nx # lazy import + except Exception as e: + raise ImportError("networkx is required for from_graphml().") from e + + if default_mode not in {"distance", "similarity"}: + raise ValueError( + f"default_mode must be 'distance' or 'similarity', got '{default_mode}'." + ) + + G_nx = nx.read_graphml(path) + + # Node ordering: preserve whatever order networkx gives us + nodes = list(G_nx.nodes()) + n = len(nodes) + + # Directedness from the networkx graph + directed = G_nx.is_directed() + + # Build adjacency; we assume 'weight' attribute for weighted graphs. + # If some edges have no 'weight', networkx will treat them with default=1.0. + A = nx.to_scipy_sparse_array( + G_nx, + nodelist=nodes, + dtype=float, + weight="weight", + ).tocsr() + + # Graph-level attributes (if present) + mode = G_nx.graph.get("mode", default_mode) + if mode not in {"distance", "similarity"}: + raise ValueError( + f"GraphML contains invalid or unsupported mode '{mode}'. " + "Expected 'distance' or 'similarity'." + ) + + # Weighted flag: prefer stored graph attribute; otherwise infer + weighted_attr = G_nx.graph.get("weighted", None) + if weighted_attr is not None: + weighted = bool(weighted_attr) + else: + # Fallback: if there are any edges, assume weighted=True + weighted = G_nx.number_of_edges() > 0 + + ignore_selfloops = G_nx.graph.get("ignore_selfloops", None) + keep_explicit_zeros = G_nx.graph.get("keep_explicit_zeros", None) + + # Node attributes -> metadata DataFrame + # Collect union of all attribute keys + all_cols = set() + for _, attrs in G_nx.nodes(data=True): + all_cols.update(attrs.keys()) + all_cols = sorted(all_cols) + + if n > 0 and all_cols: + rows = [] + for node in nodes: + attrs = G_nx.nodes[node] + row = {col: attrs.get(col, None) for col in all_cols} + rows.append(row) + meta = pd.DataFrame(rows) + else: + meta = None + + # Build Graph via from_csr to respect the usual symmetrization / defaults + return cls.from_csr( + A, + mode=mode, + directed=directed, + weighted=weighted, + meta=meta, + ignore_selfloops=ignore_selfloops, + keep_explicit_zeros=keep_explicit_zeros, + ) + # -------- Core properties -------- @property def n_nodes(self) -> int: @@ -381,6 +482,36 @@ def to_igraph(self): g.vs[col] = self.meta[col].tolist() return g + def to_graphml(self, path, *, include_graph_attrs: bool = True) -> None: + """ + Export the graph to a GraphML file. + + Parameters + ---------- + path + File path or file-like object accepted by networkx.write_graphml. + include_graph_attrs + If True (default), store graph-level attributes such as + 'mode', 'directed', 'weighted', 'ignore_selfloops', + and 'keep_explicit_zeros' in the GraphML file. + """ + try: + import networkx as nx # lazy import + except Exception as e: + raise ImportError("networkx is required for to_graphml().") from e + + G_nx = self.to_networkx() + + if include_graph_attrs: + G_nx.graph["mode"] = self.mode + G_nx.graph["directed"] = bool(self.directed) + G_nx.graph["weighted"] = bool(self.weighted) + # These can be None; GraphML will still store them as data keys + G_nx.graph["ignore_selfloops"] = self.ignore_selfloops + G_nx.graph["keep_explicit_zeros"] = self.keep_explicit_zeros + + nx.write_graphml(G_nx, path) + # -------- Utilities -------- def copy(self) -> "Graph": return Graph( From 35e385323beebf13d7ee6909568525666b7ff6cf Mon Sep 17 00:00:00 2001 From: Florian Huber Date: Tue, 2 Dec 2025 19:52:58 +0100 Subject: [PATCH 2/2] add graphml tests --- tests/test_graph.py | 114 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/tests/test_graph.py b/tests/test_graph.py index 86c4057..da3f498 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -365,6 +365,119 @@ def test_to_networkx_types_and_node_attributes(): assert pytest.approx(nxG[0][1]["weight"]) == G.adj[0, 1] +@pytest.mark.skipif(not HAS_NX, reason="networkx not installed") +def test_graphml_roundtrip_undirected_with_meta(tmp_path, S_dense, meta_df): + """Round-trip via GraphML for undirected, weighted graph with metadata.""" + G = Graph.from_dense( + S_dense, + directed=False, + weighted=True, + mode="similarity", + meta=meta_df, + ) + path = tmp_path / "graph_undirected.graphml" + + # Export to GraphML + G.to_graphml(path) + + # Import back + G2 = Graph.from_graphml(path) + + assert G2.n_nodes == G.n_nodes + assert G2.directed == G.directed + assert G2.weighted == G.weighted + assert G2.mode == G.mode + + # Adjacency must be equal + np.testing.assert_array_almost_equal(G2.adj.toarray(), G.adj.toarray()) + + # Metadata should be preserved (content-wise) + assert G2.meta is not None + + # Compare metadata ignoring column order and dtype + meta1 = G2.meta.reindex(sorted(G2.meta.columns), axis=1).reset_index(drop=True) + meta2 = G.meta.reindex(sorted(G.meta.columns), axis=1).reset_index(drop=True) + pd.testing.assert_frame_equal(meta1, meta2, check_dtype=False) + + +@pytest.mark.skipif(not HAS_NX, reason="networkx not installed") +def test_graphml_roundtrip_directed(tmp_path): + """Round-trip via GraphML for a directed weighted graph.""" + A = _csr([1.0, 2.0, 3.0], [0, 1, 2], [1, 2, 0], 3) + G = Graph.from_csr(A, directed=True, weighted=True, mode="distance") + path = tmp_path / "graph_directed.graphml" + + G.to_graphml(path) + G2 = Graph.from_graphml(path) + + assert G2.n_nodes == G.n_nodes + assert G2.directed is True + assert G2.weighted is True + assert G2.mode == "distance" + np.testing.assert_array_almost_equal(G2.adj.toarray(), G.adj.toarray()) + + +@pytest.mark.skipif(not HAS_NX, reason="networkx not installed") +def test_from_graphml_uses_default_mode_when_missing(tmp_path): + """ + Importing GraphML created directly by networkx without a 'mode' attribute + should fall back to default_mode. + """ + import networkx as nx + + G_nx = nx.Graph() + G_nx.add_edge(0, 1, weight=1.5) + path = tmp_path / "no_mode.graphml" + nx.write_graphml(G_nx, path) + + # No 'mode' in graph attributes, so default_mode is used + G = Graph.from_graphml(path, default_mode="distance") + assert G.mode == "distance" + assert not G.directed + assert G.weighted + assert G.n_nodes == 2 + np.testing.assert_array_almost_equal( + G.adj.toarray(), + np.array([[0.0, 1.5], [1.5, 0.0]], dtype=float), + ) + + +@pytest.mark.skipif(not HAS_NX, reason="networkx not installed") +def test_from_graphml_builds_metadata_from_node_attributes(tmp_path): + """ + Node attributes in a GraphML file should become meta columns in Graph. + """ + import networkx as nx + + G_nx = nx.Graph() + G_nx.add_node(0, name="a", group=1) + G_nx.add_node(1, name="b", group=2) + G_nx.add_edge(0, 1, weight=2.0) + + path = tmp_path / "with_node_attrs.graphml" + nx.write_graphml(G_nx, path) + + G = Graph.from_graphml(path, default_mode="similarity") + + assert G.n_nodes == 2 + assert G.meta is not None + assert list(G.meta.columns) == ["group", "name"] or sorted(G.meta.columns) == ["group", "name"] + # Check content, ignoring column order and dtype + meta_sorted = G.meta.reindex(sorted(G.meta.columns), axis=1) + expected = pd.DataFrame({"name": ["a", "b"], "group": [1, 2]}) + expected_sorted = expected.reindex(sorted(expected.columns), axis=1) + pd.testing.assert_frame_equal( + meta_sorted.reset_index(drop=True), + expected_sorted, + check_dtype=False, + ) + # adjacency matches the edge + np.testing.assert_array_almost_equal( + G.adj.toarray(), + np.array([[0.0, 2.0], [2.0, 0.0]], dtype=float), + ) + + @pytest.mark.skipif(not HAS_IG, reason="python-igraph not installed") def test_to_igraph_types_and_attributes(): A = _csr([0.2, 0.9, 0.3], [0, 1, 2], [1, 2, 0], 3) @@ -382,6 +495,7 @@ def test_to_igraph_types_and_attributes(): assert "weight" in igG.es.attributes() + # ----------------- Distance/similarity conversion ----------------- def test_convert_mode_distance_to_similarity_and_back_dense(S_dense, meta_df): G = Graph.from_dense(