Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
version: "0.9.5"
version: "0.9.28"
- name: Install dependencies
run: |
uv sync --group docs --no-group dev
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
version: "0.9.5"
version: "0.9.28"
- name: Install dependencies (including dev)
run: uv sync --group dev
- name: Run pre-commit
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pypi-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
version: "0.8.14"
version: "0.9.28"
- name: Build and publish
run: |
uv build
Expand Down
9 changes: 6 additions & 3 deletions graflo/architecture/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1297,8 +1297,8 @@ def remove_descendants_if(self, predicate: Callable[[ActorWrapper], bool]) -> No

Mutates the tree in place: for each DescendActor, filters its
descendants to exclude wrappers matching the predicate, after
recursing into each descendant. Use with find_descendants to
remove actors that reference disconnected vertices.
recursing into each descendant. Intermediate DescendActor
wrappers that become empty after pruning are also removed.

Args:
predicate: Callable(ActorWrapper) -> bool. Descendants for
Expand All @@ -1308,5 +1308,8 @@ def remove_descendants_if(self, predicate: Callable[[ActorWrapper], bool]) -> No
for d in list(self.actor.descendants):
d.remove_descendants_if(predicate=predicate)
self.actor._descendants[:] = [
d for d in self.actor.descendants if not predicate(d)
d
for d in self.actor.descendants
if not predicate(d)
and not (isinstance(d.actor, DescendActor) and d.count() == 0)
]
28 changes: 10 additions & 18 deletions graflo/architecture/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,11 @@ def remove_disconnected_vertices(self) -> None:
"""Remove vertices that do not take part in any relation (disconnected).

Builds the set of vertex names that appear as source or target of any
edge, then removes from VertexConfig all other vertices. For each
resource, finds actors that reference disconnected vertices (via
find_descendants) and removes them from the actor tree. Resources
whose root actor references only disconnected vertices are removed.
edge, then removes from VertexConfig all other vertices. For each
resource, removes actors that reference disconnected vertices from the
actor tree. If a resource's root directly references a disconnected
vertex (single-step pipeline) or becomes empty after pruning, the
entire resource is removed.

Mutates this schema in place.
"""
Expand All @@ -189,7 +190,7 @@ def remove_disconnected_vertices(self) -> None:

self.vertex_config.remove_vertices(disconnected)

def mentions_disconnected(wrapper):
def _mentions_disconnected(wrapper) -> bool:
actor = wrapper.actor
if isinstance(actor, VertexActor):
return actor.name in disconnected
Expand All @@ -205,21 +206,12 @@ def mentions_disconnected(wrapper):
to_drop: list[Resource] = []
for resource in self.resources:
root = resource.root
to_remove = set(
root.find_descendants(actor_type=VertexActor, name=disconnected)
+ root.find_descendants(actor_type=TransformActor, vertex=disconnected)
+ root.find_descendants(
predicate=lambda w: isinstance(w.actor, EdgeActor)
and (
w.actor.edge.source in disconnected
or w.actor.edge.target in disconnected
),
)
)
if mentions_disconnected(root):
if _mentions_disconnected(root):
to_drop.append(resource)
continue
root.remove_descendants_if(lambda w: w in to_remove)
root.remove_descendants_if(_mentions_disconnected)
if not any(isinstance(a, VertexActor) for a in root.collect_actors()):
to_drop.append(resource)

for r in to_drop:
self.resources.remove(r)
Expand Down
155 changes: 154 additions & 1 deletion test/architecture/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from graflo.architecture.actor import VertexActor
from graflo.architecture.actor import EdgeActor, VertexActor
from graflo.architecture.resource import Resource
from graflo.architecture.schema import Schema

Expand Down Expand Up @@ -70,3 +70,156 @@ def test_remove_disconnected_vertices(vertex_config_kg, edge_config_kg):
# Resource r1 should still exist but without the VertexActor(publication)
assert len(sch.resources) == 1
assert len(root.find_descendants(actor_type=VertexActor, name={"publication"})) == 0


def test_remove_disconnected_vertices_drops_resource(vertex_config_kg, edge_config_kg):
"""A resource that only references a disconnected vertex should be removed entirely."""
# publication is disconnected (not in any edge)
# r_only_pub pipeline has a single step targeting only the disconnected vertex
schema_dict = {
"vertex_config": vertex_config_kg,
"edge_config": edge_config_kg,
"resources": [
{
"resource_name": "r_connected",
"apply": [
{"vertex": "entity"},
{"source": "mention", "target": "entity"},
],
},
{
"resource_name": "r_only_pub",
"apply": [
{"vertex": "publication"},
],
},
],
"general": {"name": "kg"},
}
sch = Schema.from_dict(schema_dict)
assert len(sch.resources) == 2

sch.remove_disconnected_vertices()

assert sch.vertex_config.vertex_set == {"entity", "mention"}
# r_only_pub should be gone — its only actor referenced a disconnected vertex
assert len(sch.resources) == 1
assert sch.resources[0].name == "r_connected"


def test_remove_disconnected_vertices_nested_resource(vertex_config_kg, edge_config_kg):
"""Nested descend blocks are pruned correctly.

Covers three scenarios:
1. Mixed nested descend — disconnected actors inside are removed,
connected actors and the descend wrapper survive.
2. Nested descend becomes empty — the descend wrapper itself is
dropped, but the resource survives because of other actors.
3. Resource whose only content is a nested descend with disconnected
actors — the resource is removed entirely.
"""
# vertex_config_kg: publication, entity, mention
# edge_config_kg edges: entity→entity, entity→entity(aux), mention→entity
# connected = {entity, mention}; publication is disconnected
schema_dict = {
"vertex_config": vertex_config_kg,
"edge_config": edge_config_kg,
"resources": [
{
"resource_name": "r_mixed",
"apply": [
{"vertex": "entity"},
{
"key": "items",
"apply": [
{"vertex": "publication"},
{"vertex": "mention"},
],
},
{"source": "mention", "target": "entity"},
],
},
{
"resource_name": "r_nested_empty",
"apply": [
{"vertex": "entity"},
{
"key": "items",
"apply": [
{"vertex": "publication"},
],
},
],
},
{
"resource_name": "r_all_disconnected",
"apply": [
{
"key": "items",
"apply": [
{"vertex": "publication"},
],
},
],
},
],
"general": {"name": "kg"},
}
sch = Schema.from_dict(schema_dict)

# -- preconditions --
assert len(sch.resources) == 3
r_mixed = sch.fetch_resource("r_mixed")
r_nested_empty = sch.fetch_resource("r_nested_empty")
assert (
len(r_mixed.root.find_descendants(actor_type=VertexActor, name={"publication"}))
== 1
)
assert (
len(
r_nested_empty.root.find_descendants(
actor_type=VertexActor, name={"publication"}
)
)
== 1
)

sch.remove_disconnected_vertices()

assert sch.vertex_config.vertex_set == {"entity", "mention"}

# r_mixed: publication removed from nested descend; mention and edge survive
assert (
len(r_mixed.root.find_descendants(actor_type=VertexActor, name={"publication"}))
== 0
)
assert (
len(r_mixed.root.find_descendants(actor_type=VertexActor, name={"mention"}))
== 1
)
assert len(r_mixed.root.find_descendants(actor_type=EdgeActor)) == 1

# r_nested_empty: the nested descend was emptied and dropped;
# only vertex: entity remains
assert (
len(
r_nested_empty.root.find_descendants(
actor_type=VertexActor, name={"publication"}
)
)
== 0
)
assert (
len(
r_nested_empty.root.find_descendants(
actor_type=VertexActor, name={"entity"}
)
)
== 1
)
assert r_nested_empty.count() == 1

# r_all_disconnected: removed entirely — its only content was disconnected
assert len(sch.resources) == 2
resource_names = {r.name for r in sch.resources}
assert "r_all_disconnected" not in resource_names