Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions graphconstructor/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,107 @@ def from_edges(
keep_explicit_zeros=keep_explicit_zeros,
)

@classmethod
def from_graphml(
cls,
path,
*,
default_mode: str = "similarity",
) -> "Graph":
"""
Load a Graph from a GraphML file.

Parameters
----------
path
File path or file-like object accepted by networkx.read_graphml.
default_mode
Fallback value for the Graph's mode if it is not stored in the
GraphML file. Must be either 'distance' or 'similarity'.

Notes
-----
- Uses networkx.read_graphml under the hood.
- Edge weights are taken from the 'weight' attribute, if present.
- Node attributes become columns of the metadata DataFrame.
- Graph-level attributes 'mode', 'directed', 'weighted',
'ignore_selfloops', and 'keep_explicit_zeros' are honored if present.
"""
try:
import networkx as nx # lazy import
except Exception as e:
raise ImportError("networkx is required for from_graphml().") from e

if default_mode not in {"distance", "similarity"}:
raise ValueError(
f"default_mode must be 'distance' or 'similarity', got '{default_mode}'."
)

G_nx = nx.read_graphml(path)

# Node ordering: preserve whatever order networkx gives us
nodes = list(G_nx.nodes())
n = len(nodes)

# Directedness from the networkx graph
directed = G_nx.is_directed()

# Build adjacency; we assume 'weight' attribute for weighted graphs.
# If some edges have no 'weight', networkx will treat them with default=1.0.
A = nx.to_scipy_sparse_array(
G_nx,
nodelist=nodes,
dtype=float,
weight="weight",
).tocsr()

# Graph-level attributes (if present)
mode = G_nx.graph.get("mode", default_mode)
if mode not in {"distance", "similarity"}:
raise ValueError(
f"GraphML contains invalid or unsupported mode '{mode}'. "
"Expected 'distance' or 'similarity'."
)

# Weighted flag: prefer stored graph attribute; otherwise infer
weighted_attr = G_nx.graph.get("weighted", None)
if weighted_attr is not None:
weighted = bool(weighted_attr)
else:
# Fallback: if there are any edges, assume weighted=True
weighted = G_nx.number_of_edges() > 0

ignore_selfloops = G_nx.graph.get("ignore_selfloops", None)
keep_explicit_zeros = G_nx.graph.get("keep_explicit_zeros", None)

# Node attributes -> metadata DataFrame
# Collect union of all attribute keys
all_cols = set()
for _, attrs in G_nx.nodes(data=True):
all_cols.update(attrs.keys())
all_cols = sorted(all_cols)

if n > 0 and all_cols:
rows = []
for node in nodes:
attrs = G_nx.nodes[node]
row = {col: attrs.get(col, None) for col in all_cols}
rows.append(row)
meta = pd.DataFrame(rows)
else:
meta = None

# Build Graph via from_csr to respect the usual symmetrization / defaults
return cls.from_csr(
A,
mode=mode,
directed=directed,
weighted=weighted,
meta=meta,
ignore_selfloops=ignore_selfloops,
keep_explicit_zeros=keep_explicit_zeros,
)

# -------- Core properties --------
@property
def n_nodes(self) -> int:
Expand Down Expand Up @@ -381,6 +482,36 @@ def to_igraph(self):
g.vs[col] = self.meta[col].tolist()
return g

def to_graphml(self, path, *, include_graph_attrs: bool = True) -> None:
"""
Export the graph to a GraphML file.

Parameters
----------
path
File path or file-like object accepted by networkx.write_graphml.
include_graph_attrs
If True (default), store graph-level attributes such as
'mode', 'directed', 'weighted', 'ignore_selfloops',
and 'keep_explicit_zeros' in the GraphML file.
"""
try:
import networkx as nx # lazy import
except Exception as e:
raise ImportError("networkx is required for to_graphml().") from e

G_nx = self.to_networkx()

if include_graph_attrs:
G_nx.graph["mode"] = self.mode
G_nx.graph["directed"] = bool(self.directed)
G_nx.graph["weighted"] = bool(self.weighted)
# These can be None; GraphML will still store them as data keys
G_nx.graph["ignore_selfloops"] = self.ignore_selfloops
G_nx.graph["keep_explicit_zeros"] = self.keep_explicit_zeros

nx.write_graphml(G_nx, path)

# -------- Utilities --------
def copy(self) -> "Graph":
return Graph(
Expand Down
114 changes: 114 additions & 0 deletions tests/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,119 @@ def test_to_networkx_types_and_node_attributes():
assert pytest.approx(nxG[0][1]["weight"]) == G.adj[0, 1]


@pytest.mark.skipif(not HAS_NX, reason="networkx not installed")
def test_graphml_roundtrip_undirected_with_meta(tmp_path, S_dense, meta_df):
"""Round-trip via GraphML for undirected, weighted graph with metadata."""
G = Graph.from_dense(
S_dense,
directed=False,
weighted=True,
mode="similarity",
meta=meta_df,
)
path = tmp_path / "graph_undirected.graphml"

# Export to GraphML
G.to_graphml(path)

# Import back
G2 = Graph.from_graphml(path)

assert G2.n_nodes == G.n_nodes
assert G2.directed == G.directed
assert G2.weighted == G.weighted
assert G2.mode == G.mode

# Adjacency must be equal
np.testing.assert_array_almost_equal(G2.adj.toarray(), G.adj.toarray())

# Metadata should be preserved (content-wise)
assert G2.meta is not None

# Compare metadata ignoring column order and dtype
meta1 = G2.meta.reindex(sorted(G2.meta.columns), axis=1).reset_index(drop=True)
meta2 = G.meta.reindex(sorted(G.meta.columns), axis=1).reset_index(drop=True)
pd.testing.assert_frame_equal(meta1, meta2, check_dtype=False)


@pytest.mark.skipif(not HAS_NX, reason="networkx not installed")
def test_graphml_roundtrip_directed(tmp_path):
"""Round-trip via GraphML for a directed weighted graph."""
A = _csr([1.0, 2.0, 3.0], [0, 1, 2], [1, 2, 0], 3)
G = Graph.from_csr(A, directed=True, weighted=True, mode="distance")
path = tmp_path / "graph_directed.graphml"

G.to_graphml(path)
G2 = Graph.from_graphml(path)

assert G2.n_nodes == G.n_nodes
assert G2.directed is True
assert G2.weighted is True
assert G2.mode == "distance"
np.testing.assert_array_almost_equal(G2.adj.toarray(), G.adj.toarray())


@pytest.mark.skipif(not HAS_NX, reason="networkx not installed")
def test_from_graphml_uses_default_mode_when_missing(tmp_path):
"""
Importing GraphML created directly by networkx without a 'mode' attribute
should fall back to default_mode.
"""
import networkx as nx

G_nx = nx.Graph()
G_nx.add_edge(0, 1, weight=1.5)
path = tmp_path / "no_mode.graphml"
nx.write_graphml(G_nx, path)

# No 'mode' in graph attributes, so default_mode is used
G = Graph.from_graphml(path, default_mode="distance")
assert G.mode == "distance"
assert not G.directed
assert G.weighted
assert G.n_nodes == 2
np.testing.assert_array_almost_equal(
G.adj.toarray(),
np.array([[0.0, 1.5], [1.5, 0.0]], dtype=float),
)


@pytest.mark.skipif(not HAS_NX, reason="networkx not installed")
def test_from_graphml_builds_metadata_from_node_attributes(tmp_path):
"""
Node attributes in a GraphML file should become meta columns in Graph.
"""
import networkx as nx

G_nx = nx.Graph()
G_nx.add_node(0, name="a", group=1)
G_nx.add_node(1, name="b", group=2)
G_nx.add_edge(0, 1, weight=2.0)

path = tmp_path / "with_node_attrs.graphml"
nx.write_graphml(G_nx, path)

G = Graph.from_graphml(path, default_mode="similarity")

assert G.n_nodes == 2
assert G.meta is not None
assert list(G.meta.columns) == ["group", "name"] or sorted(G.meta.columns) == ["group", "name"]
# Check content, ignoring column order and dtype
meta_sorted = G.meta.reindex(sorted(G.meta.columns), axis=1)
expected = pd.DataFrame({"name": ["a", "b"], "group": [1, 2]})
expected_sorted = expected.reindex(sorted(expected.columns), axis=1)
pd.testing.assert_frame_equal(
meta_sorted.reset_index(drop=True),
expected_sorted,
check_dtype=False,
)
# adjacency matches the edge
np.testing.assert_array_almost_equal(
G.adj.toarray(),
np.array([[0.0, 2.0], [2.0, 0.0]], dtype=float),
)


@pytest.mark.skipif(not HAS_IG, reason="python-igraph not installed")
def test_to_igraph_types_and_attributes():
A = _csr([0.2, 0.9, 0.3], [0, 1, 2], [1, 2, 0], 3)
Expand All @@ -382,6 +495,7 @@ def test_to_igraph_types_and_attributes():
assert "weight" in igG.es.attributes()



# ----------------- Distance/similarity conversion -----------------
def test_convert_mode_distance_to_similarity_and_back_dense(S_dense, meta_df):
G = Graph.from_dense(
Expand Down