diff --git a/docs/tutorial/_lift_order.ipynb b/docs/tutorial/_lift_order.ipynb deleted file mode 100644 index a4129c470..000000000 --- a/docs/tutorial/_lift_order.ipynb +++ /dev/null @@ -1,1346 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index\n", - "import pathpyG as pp\n", - "from torch_geometric.data import Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Explanation for order lifting\n", - "\n", - "\n", - "NB: Turns out everything depends from the initial sorting (even more than it seems from just looking at the code). \n", - "The initial sorting by source id leads to **sorted** ho source indices.\n", - "Notice that each node has get edges (ho-nodes) with consective indices (with length equal to its outdegree).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "metadata": {}, - "outputs": [], - "source": [ - "# num_nodes = 3\n", - "# edge_index = torch.tensor([[2,1,1],[1,0,2]])\n", - "num_nodes = 6\n", - "edge_index = torch.tensor([[0,1,3,4,2,2,5],[2,2,5,5,3,4,0]])" - ] - }, - { - "cell_type": "code", - "execution_count": 105, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 105, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = Data(edge_index = edge_index)\n", - "\n", - "g = pp.Graph(data,pp.IndexMap(list(\"012345\")))\n", - "pp.plot(g, node_label=g.mapping.node_ids.tolist())" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pp.plot(ho_graph)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "first we need to sort the edge index by the source indexes" - ] - }, - { - "cell_type": "code", - "execution_count": 106, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[0, 1, 2, 2, 3, 4, 5],\n", - " [2, 2, 3, 4, 5, 5, 0]])" - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "edge_index = sort_edge_index(edge_index, num_nodes=num_nodes)\n", - "edge_index" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then we compute the outdegrees." - ] - }, - { - "cell_type": "code", - "execution_count": 107, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([1, 1, 2, 1, 1, 1])" - ] - }, - "execution_count": 107, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "outdegree = degree(edge_index[0], dtype = torch.long, num_nodes=num_nodes)\n", - "outdegree" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The 'outdegree_per_dst' gives us the number of out edges we would find by arriving to the node through one of the edges that has it as target.\n", - "Indeed, edge_index[1] is a list of all the instubs. \n", - "Associating each in-stub with the outdegree of the corresponding node gives us all 2-paths grouped by the in-edge (in edge is a x-v edge, where v is the node of which we know the outdegree -- Notice that at this time the node x of the x-edge is not considered)." - ] - }, - { - "cell_type": "code", - "execution_count": 108, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 1, 1, 1, 1, 1])" - ] - }, - "execution_count": 108, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "outdegree_per_dst = outdegree[edge_index[1]]\n", - "outdegree_per_dst\n", - "\n", - "# nodes 0,2,1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From 'outdegree_per_dst' we can unpack the information per in-edge contained in the outdegree_per_dst by using the torch funtion 'repeat_interleave'.\n", - "'repeat_interleave' works like this: \n", - "If the repeats is tensor([n1, n2, n3, …]), then the output will be tensor([0, 0, …, 1, 1, …, 2, 2, …, …]) where 0 appears n1 times, 1 appears n2 times, 2 appears n3 times, etc.\n", - "From the line grap perspective, we can see it as creating the indexes of (lifted, new) source-higher-order nodes.\n", - "In other words, for each (unknow) starting x, a new higher-order node-index is created and repeated a number of time given by the outdegree of v, giving the amount of times x-v appears as a source higher-order node. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 1, 1, 2, 2, 2])" - ] - }, - "execution_count": 109, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.repeat_interleave(torch.tensor([1,2,3]))" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 0, 1, 1, 2, 3, 4, 5, 6])" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)\n", - "ho_edge_srcs\n", - "\n", - "# 123 124 - 023 024 - 234 - 234 \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we define 'num_new_edges' as the total number of x-v-w. \n", - "This comes from taking each incident edge x-v and considering all the w it can reach (given by its outdegree)\n", - "\n", - "\n", - "...i.e, the number of two paths." - ] - }, - { - "cell_type": "code", - "execution_count": 111, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor(9)" - ] - }, - "execution_count": 111, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "num_new_edges = outdegree_per_dst.sum()\n", - "num_new_edges" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "At the end of this first part of the process, we have take the edges incident to each node \n", - "and have then repeated their index a number of time equals to the outdegree of that node" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then,\n", - "Create destination nodes that start the indexing after the cumulative sum of the outdegree\n", - "of all previous nodes in the ordered sequence of nodes \n", - "\n", - "\n", - "\n", - "This cumsum tells us the number of times we need each index (in teh out-indices?). i.e., \n", - "If the first node has oudegree two, we need to allocate the index twice\n", - "the next node, needs the number reached by the previous plus the outdegree of the next node. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**!!!**\n", - "\n", - "ptrs for each node v (Notice it is applied on the outdegree vector which is in R^|V|) stores a starting index for the edges outgoing from it (i.e. the destination ho-nodes). \n", - "Notice that the indexes of the edges outgoing from v (in position with index i) end where the ones of the next node (at position i+1) start. " - ] - }, - { - "cell_type": "code", - "execution_count": 112, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 1, 2, 4, 5, 6])" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "\n", - "ptrs = cumsum(outdegree, dim=0)[:-1] # position in the first order tensor where edge indexes of edges startin from each node (let s say w) start\n", - "# 00001112 -> 0,4,7 (related to first order edge indices)\n", - "\n", - "# node 0 needs from 0 to 4, 1 from 4 to 7, and 2... (the rest?)\n", - "\n", - "ptrs\n", - "\n", - "\n", - "# ptrs[edge_index[1]]\n", - "# then we index by the target indexes in fo-nodes: edge_index[1]. \n", - "# this will tell us the starting position of the target node as source node in the sorted fo-edge indices\n", - "# i.e., Target node w if taken as source will have positions starting from ptrs[w] in edge index\n", - "# e.g., edge_index[1] -> [4,4,7] means that the continueation (source) indexes for the the nodes in edge_index[1] start at [4,4,7]\n", - "# notice that this would be impossible if the edge_index weren't sorted\n", - "\n", - "\n", - "# torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst)\n", - "# Then, we repeat that starting index (of the target node w as source) a number of time equal to the nodes' outdegree\n", - "# This will give us the starting index (of the target node w as source) repeated the number of times w is actually used as source in the line-graph\n", - "# Givers the starting index (of w as source) for all edges w->y \n", - "# e.g., [[4,4],[4,4],[7,7,7]] means that a-w_0 and b-w_0 can be continued twice, and c-w_1 three times (using fo-edge-indices starting at 4,4,and 7 ,repsectively)\n", - "# the above gives an initial 'ho_edge_dsts' (pointers to the w_i-k that are ho_edge_dsts)\n", - "\n", - "# ---Index correction ---\n", - "# Until this point we only have the starting (nod) index of the fo-edges that can (transitively) propagate an edge with w as target.\n", - "# we need to move to specific --not starting-- (ho-node) indexes of ho-edges.\n", - "# \n", - "# first, we generated a tensor with valeus from 0 to num_new_edges ()\n", - "# then, we compute 'cumsum(outdegree_per_dst, dim=0)' which gives where the indexes of each w_i-k start in 'ho_edge_dsts' \n", - "# e.g., [[4,4],[4,4],[7,7,7]] -> [0,2,5] (dim again equal to number of fo-edges) (WRONG HERE)\n", - "# this says \n", - "# \n", - "# then, we select them based on the ho_edge_srsc.\n", - "# so if ho_edge_srcs = [0,1,1,1,2,2], we ll select [0,2,2,2,5,5] \n", - "# what is this???\n", - "# if we substrat this from the tensor with valeus from 0 to num_new_edges" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Select the number of indexes (repetitions) needed by each ho-target index.\n", - "From which entry to which entry we ll have teh index of a (ho?) node\n", - "\n", - "since the edge indices have been ordered by source index, now ptrs has the " - ] - }, - { - "cell_type": "code", - "execution_count": 113, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([5, 6, 6, 7, 7, 7])" - ] - }, - "execution_count": 113, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.repeat_interleave(torch.tensor([5,6,7]),torch.tensor([1,2,3]))" - ] - }, - { - "cell_type": "code", - "execution_count": 114, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 3, 4, 5, 5, 0])" - ] - }, - "execution_count": 114, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "edge_index[1]" - ] - }, - { - "cell_type": "code", - "execution_count": 115, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 4, 5, 6, 6, 0])" - ] - }, - "execution_count": 115, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# this gives the number\n", - "ptrs[edge_index[1]]" - ] - }, - { - "cell_type": "code", - "execution_count": 116, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 1, 1, 1, 1, 1])" - ] - }, - "execution_count": 116, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "outdegree_per_dst" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "ptrs[edge_index[1]]: we use 'edge_index[1]' to couple each target node to the its starting-index-pointer (as obtained in ptrs) \n", - "\n", - "Then, use 'repeat_interleave' to repeat the pointer (containing the starting index) a number of time given by 'outdegree_per_dst'. \n", - "Remember that ' outdegree_per_dst' maps each (target) node v, to its outdegree as a source. \n", - "Therfore, at this point, ho_edge_dsts maps each node v to its starting-index-pointer. \n", - "Succesively, the pointer will be corrected so that they contain the index and not the starting-index-pointer. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 117, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 2, 2, 4, 5, 6, 6, 0])" - ] - }, - "execution_count": 117, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# if I focus on the target indices, I have the number of time we need to repear the index of outstub.\n", - "\n", - "\n", - "# Building new edge_index\n", - "# node in ho is edge\n", - "# degree of fo-ones tells how many new outgoing edges we need to allocate for the ho-node\n", - "# diff from before... (where using outdegree...)\n", - "# ...cause \n", - "\n", - "# the pointers gives where edges start for each node in fo. Indexing by destination, I get \n", - "\n", - "\n", - "ho_edge_dsts = torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst)\n", - "ho_edge_dsts" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The cell above give each (target) node v a list of pointers. \n", - "Here we computer an index correction to adjust the indices (go beyond the starting-index-pointer)\n", - "\n", - "\n", - "\n", - "*(outdegree-per_dst was giving the out degree of each target node v in 'edge_index[1]')*\n", - "'cumsum(outdegree_per_dst, dim=0)' stores a starting index for the edges incoming into a node v (in this case). \n", - "Notice that the dimensionality of this tensor is equal to the number of edges (one entry for each time we take outdegree of a target node in 'edges_index[1]')\n", - "\n", - "\n", - "\n", - "Then we select" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 1, 2, 3, 4, 5, 6, 7, 8])" - ] - }, - "execution_count": 118, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# num_new_edges = outdegree_per_dst.sum()\n", - "idx_correction = torch.arange(num_new_edges, dtype=torch.long, device=edge_index.device)\n", - "idx_correction" - ] - }, - { - "cell_type": "code", - "execution_count": 119, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[0, 1, 2, 2, 3, 4, 5],\n", - " [2, 2, 3, 4, 5, 5, 0]])" - ] - }, - "execution_count": 119, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "edge_index" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 1, 1, 1, 1, 1])" - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "outdegree_per_dst" - ] - }, - { - "cell_type": "code", - "execution_count": 121, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 2, 4, 5, 6, 7, 8, 9])" - ] - }, - "execution_count": 121, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cumsum(outdegree_per_dst, dim=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 1, 0, 1, 0, 0, 0, 0, 0])" - ] - }, - "execution_count": 122, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "idx_correction - cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 0, 1, 1, 2, 3, 4, 5, 6])" - ] - }, - "execution_count": 123, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ho_edge_srcs" - ] - }, - { - "cell_type": "code", - "execution_count": 124, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 2, 2, 2, 4, 5, 6, 6, 0])" - ] - }, - "execution_count": 124, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ho_edge_dsts" - ] - }, - { - "cell_type": "code", - "execution_count": 125, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 0, 2, 2, 4, 5, 6, 7, 8])" - ] - }, - "execution_count": 125, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# cumsum(outdegree_per_dst, dim=0) gives the number of entries required by each target of each source\n", - "# here, selecting with ho_edge_srcs \n", - "cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]" - ] - }, - { - "cell_type": "code", - "execution_count": 126, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([0, 1, 0, 1, 0, 0, 0, 0, 0])" - ] - }, - "execution_count": 126, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]\n", - "idx_correction" - ] - }, - { - "cell_type": "code", - "execution_count": 127, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 3, 2, 3, 4, 5, 6, 6, 0])" - ] - }, - "execution_count": 127, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ho_edge_dsts += idx_correction\n", - "ho_edge_dsts" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "All in all, we initialize 'ho_edge_dsts' entries with the minumum index the target ho-node v-t can get (???)(considering that also these indices are sorted by the source indices).\n", - "\n", - "\n", - "Then, we build upon this.\n", - "We initialize the correction as a tensor 'idx_correction' with the index of each position ('torch.arange(num_new_edges)')\n", - "Then we compute a starting index for the edges incoming into v (or t?); we subtract this from 'idx_correction'.\n", - "This gives the index correction. \n", - "Thus, the index correction ... gives something that ????\n" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([2, 3, 2, 3, 4, 5, 6, 6, 0])" - ] - }, - "execution_count": 130, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ho_edge_dsts" - ] - }, - { - "cell_type": "code", - "execution_count": 137, - "metadata": {}, - "outputs": [], - "source": [ - "data_ho = Data(edge_index=torch.stack([ho_edge_srcs, ho_edge_dsts], dim=0))\n", - "ho_graph = pp.Graph(data_ho)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/_multi_order_concepts.ipynb b/docs/tutorial/_multi_order_concepts.ipynb deleted file mode 100644 index 11671a98c..000000000 --- a/docs/tutorial/_multi_order_concepts.ipynb +++ /dev/null @@ -1,91 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# TODO: Create a Notebook that explains the new concepts for order lifting for DAGs and temporal graphs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def lift_order_edge_index(edge_index: torch.Tensor, num_nodes: int, edge_weights: torch.Tensor) -> torch.Tensor:\n", - " \"\"\"\n", - " Do a line graph transformation on the edge index to lift the order of the graph by one.\n", - "\n", - " Args:\n", - " edge_index: A **sorted** edge index tensor of shape (2, num_edges).\n", - " num_nodes: The number of nodes in the graph.\n", - " \"\"\"\n", - "\n", - " # Since this is a complicated function, we will use the following example to explain the steps:\n", - " # Example:\n", - " # edge_index = [[0, 0, 1, 1, 1, 3, 4, 5, 6],\n", - " # [1, 3, 2, 3, 6, 4, 5, 7, 5]]\n", - "\n", - " # Compute the outdegree of each node used to get all the edge combinations leading to a higher-order edge\n", - " # Example:\n", - " # outdegree = [2, 3, 0, 1, 1, 1, 1, 0]\n", - " outdegree = degree(edge_index[0], dtype=torch.long, num_nodes=num_nodes)\n", - "\n", - " # For each center node, we need to combine each outgoing edge with each incoming edge\n", - " # We achieve this by creating `outdegree` number of edges for each destination node of the old edge index\n", - " # Example:\n", - " # outdegree_per_dst = [3, 1, 0, 1, 1, 1, 1, 0, 1]\n", - " # num_new_edges = 9\n", - " outdegree_per_dst = outdegree[edge_index[1]]\n", - " num_new_edges = outdegree_per_dst.sum()\n", - "\n", - " # Use each edge from the edge index as node and assign the new indices in the order of the original edge index\n", - " # Each higher order node has one outgoing edge for each outgoing edge of the original destination node\n", - " # Since we keep the ordering, we can just repeat each node using the outdegree_per_dst tensor\n", - " # Example:\n", - " # ho_edge_srcs = [0, 0, 0, 1, 3, 4, 5, 6, 8]\n", - " ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)\n", - "\n", - " # For each node, we calculate pointers of shape (num_nodes,) that indicate the start of the original edges\n", - " # (new higher-order nodes) that have the node as source node\n", - " # (Note we use PyG's cumsum function because it adds a 0 at the beginning of the tensor and\n", - " # we want the `left` boundaries of the intervals, so we also remove the last element of the result with [:-1])\n", - " # Example:\n", - " # ptrs = [0, 2, 5, 5, 6, 7, 8, 9]\n", - " ptrs = cumsum(outdegree, dim=0)[:-1]\n", - "\n", - " # Use these pointers to get the start of the edges for each higher-order src and repeat it `outdegree` times\n", - " # Since we keep the ordering, all new higher-order edges that have the same src are indexed consecutively\n", - " # Example:\n", - " # ho_edge_dsts = [2, 2, 2, 5, 5, 8, 6, 7, 7]\n", - " ho_edge_dsts = torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst)\n", - "\n", - " # Since the above only repeats the start of the edges, we need to add (0, 1, 2, 3, ...)\n", - " # for all `outdegree` number of edges consecutively to get the correct destination nodes\n", - " # We can achieve this by starting with a range from (0, 1, ..., num_new_edges)\n", - " # Example:\n", - " # idx_correction = [0, 1, 2, 3, 4, 5, 6, 7, 8]\n", - " idx_correction = torch.arange(num_new_edges, dtype=torch.long, device=edge_index.device)\n", - " # Then, we subtract the cumulative sum of the outdegree for each destination node to get a tensor.\n", - " # Example:\n", - " # idx_correction = [0, 1, 2, 0, 0, 0, 0, 0, 0]\n", - " idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]\n", - " # Add this tensor to the destination nodes to get the correct destination nodes for each higher-order edge\n", - " # Example:\n", - " # ho_edge_dsts = [2, 3, 4, 5, 5, 8, 6, 7, 7]\n", - " ho_edge_dsts += idx_correction\n", - " # tensor([[0, 0, 0, 1, 3, 4, 5, 6, 8],\n", - " # [2, 3, 4, 5, 5, 8, 6, 7, 7]])\n", - " return torch.stack([ho_edge_srcs, ho_edge_dsts], dim=0)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/_new_pathData_test.ipynb b/docs/tutorial/_new_pathData_test.ipynb deleted file mode 100644 index 687420a29..000000000 --- a/docs/tutorial/_new_pathData_test.ipynb +++ /dev/null @@ -1,299 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Optional\n", - "\n", - "from tqdm import trange\n", - "import torch\n", - "from torch import Tensor\n", - "from torch_geometric.data import Data\n", - "from torch_geometric.loader import DataLoader\n", - "from torch_geometric.nn import MessagePassing\n", - "from torch_geometric.experimental import disable_dynamic_shapes\n", - "from torch_geometric.nn.aggr import Aggregation\n", - "from torch_geometric.utils import coalesce, degree, cumsum\n", - "from torch_geometric import EdgeIndex\n", - "\n", - "import pathpyG as pp" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "dags = pp.DAGData()\n", - "dags.append(torch.tensor([[3,0,1],[0,1,2]]))\n", - "dags.append(torch.tensor([[1,0,2],[0,2,0]]))\n", - "dags.append(torch.tensor([[0,1],[1,2]]))" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DAGData with 3 dags and total weight 3\n" - ] - } - ], - "source": [ - "print(dags)" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "def lift_order_edge_index(edge_index: EdgeIndex | torch.Tensor, num_nodes: int | None = None) -> torch.Tensor:\n", - " # Since this is a complicated function, we will use the following example to explain the steps:\n", - " # Example:\n", - " # edge_index = [[0, 0, 1, 1, 1, 3, 4, 5, 6],\n", - " # [1, 3, 2, 3, 6, 4, 5, 7, 5]]\n", - "\n", - " # Compute the outdegree of each node which we will use to get all the edge combinations that lead to a higher order edge\n", - " # Example:\n", - " # outdegree = [2, 3, 0, 1, 1, 1, 1, 0]\n", - " outdegree = degree(edge_index[0], dtype=torch.long, num_nodes=num_nodes)\n", - "\n", - " # For each center node, we need to combine each outgoing edge with each incoming edge\n", - " # We achieve this by creating `outdegree` number of edges for each destination node of the old edge index\n", - " # Example:\n", - " # outdegree_per_dst = [3, 1, 0, 1, 1, 1, 1, 0, 1]\n", - " # num_new_edges = 9\n", - " outdegree_per_dst = outdegree[edge_index[1]]\n", - " num_new_edges = outdegree_per_dst.sum()\n", - "\n", - " # We use each edge from the edge index as new node and assign the new indices in the order of the original edge index\n", - " # Each higher order node has one outgoing edge for each outgoing edge of the original destination node\n", - " # Since we keep the ordering, we can just repeat each node using the outdegree_per_dst tensor\n", - " # Example:\n", - " # ho_edge_srcs = [0, 0, 0, 1, 3, 4, 5, 6, 8]\n", - " ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)\n", - "\n", - " # For each node, we calculate pointers of shape (num_nodes,) that indicate the start of the original edges (new higher order nodes) that have the node as source node\n", - " # (Note we use PyG's cumsum function because it adds a 0 at the beginning of the tensor and we want the `left` boundaries of the intervals, so we also remove the last element of the result with [:-1])\n", - " # Example:\n", - " # ptrs = [0, 2, 5, 5, 6, 7, 8, 9]\n", - " ptrs = cumsum(outdegree, dim=0)[:-1]\n", - "\n", - " # Use these pointers to get the start of the edges for each higher order source node and repeat it `outdegree` times\n", - " # Since we keep the ordering, all new higher order edges that have the same source node are indexed consecutively\n", - " # Example:\n", - " # ho_edge_dsts = [2, 2, 2, 5, 5, 8, 6, 7, 7]\n", - " ho_edge_dsts = torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst)\n", - "\n", - " # Since the above only repeats the start of the edges, we need to add (0, 1, 2, 3, ...) for all `outdegree` number of edges consecutively to get the correct destination nodes\n", - " # We can achieve this by starting with a range from (0, 1, ..., num_new_edges)\n", - " # Example: \n", - " # idx_correction = [0, 1, 2, 3, 4, 5, 6, 7, 8]\n", - " idx_correction = torch.arange(num_new_edges, dtype=torch.long, device=edge_index.device)\n", - " # Then, we subtract the cumulative sum of the outdegree for each destination node to get a tensor.\n", - " # Example:\n", - " # idx_correction = [0, 1, 2, 0, 0, 0, 0, 0, 0]\n", - " idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]\n", - " # Finally, we add this tensor to the destination nodes to get the correct destination nodes for each higher order edge\n", - " # Example:\n", - " # ho_edge_dsts = [2, 3, 4, 5, 5, 8, 6, 7, 7]\n", - " ho_edge_dsts += idx_correction\n", - " # tensor([[0, 0, 0, 1, 3, 4, 5, 6, 8],\n", - " # [2, 3, 4, 5, 5, 8, 6, 7, 7]])\n", - " return torch.stack([ho_edge_srcs, ho_edge_dsts], dim=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [], - "source": [ - "def map_higher_order_index(edge_indices, k):\n", - " \"\"\"map node indices in k-th order edge index\n", - " to corresponding tensor of k first-order nodes\n", - " \"\"\" \n", - "\n", - " # we need to reverse the node indices\n", - " # to construct an edge_index with k-th order nodes\n", - " \n", - " ei = edge_indices[k].reshape(2,-1,1)\n", - " \n", - " j = 0\n", - " for i in range(k-1, 0, -1):\n", - " src_edge, tgt_edge = ei\n", - " src = edge_indices[i][:,src_edge]\n", - " tgt = edge_indices[i][:,tgt_edge]\n", - " if j == 0:\n", - " ei = torch.cat([src, tgt], dim=2)\n", - " else:\n", - " ei = torch.cat([src[:,:,:j], tgt], dim=2)\n", - " j -= 1\n", - " return ei" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def from_DAGs(data: pp.DAGData, max_order: int = 1) -> pp.MultiOrderModel:\n", - " \"\"\"Creates multiple higher-order De Bruijn graphs for paths in DAGData.\"\"\"\n", - " m = pp.MultiOrderModel()\n", - "\n", - " data_list = [Data(edge_index=dag.long()) for dag in data.dags]\n", - " # We use a dataloader from PyG to combine all the edge indices into a single graph with multiple disjoint subgraphs\n", - " # If two paths share a node, the node is duplicated in the resulting graph and the new higher order edges need to be aggregated afterwards\n", - " # Note that due to the `batch_size` parameter, we can also do computations on a set of paths that are too large to fit into memory at once\n", - " dag_graph = next(iter(DataLoader(data_list, batch_size=len(data.dags))))\n", - " dag_edge_index = dag_graph.edge_index\n", - " dag_edge_index = coalesce(dag_edge_index)\n", - "\n", - " print(dag_edge_index)\n", - " print(dag_graph.ptr)\n", - " print(dag_graph.batch)\n", - "\n", - " edge_index = pp.MultiOrderModel.map_batch_indices(dag_edge_index, dag_graph.batch, dag_graph.ptr)\n", - " unique_nodes = torch.unique(edge_index)\n", - " m.layers[1] = pp.Graph(Data(edge_index=edge_index, num_nodes=unique_nodes.size(), fo_nodes=unique_nodes.reshape(-1, 1)))\n", - " print(m.layers[1].data.edge_index)\n", - " print(m.layers[1].data.fo_nodes)\n", - "\n", - " edge_indices = {}\n", - " edge_indices[1] = edge_index\n", - "\n", - " for k in range(2, max_order+1):\n", - " print('=== k={0} ==='.format(k))\n", - " num_nodes = torch.unique(dag_edge_index).size(0)\n", - " print('num nodes = ', num_nodes)\n", - " ho_index = lift_order_edge_index(dag_edge_index, num_nodes = num_nodes)\n", - " edge_indices[k] = ho_index\n", - " print(ho_index)\n", - "\n", - " # Map k-th-order edge index to nodes in (k-1)-th order edge index\n", - " # src_edge, tgt_edge = ho_index\n", - " # src = dag_edge_index[:,src_edge]\n", - " # tgt = dag_edge_index[:,tgt_edge]\n", - " # print(src)\n", - " # print(tgt)\n", - "\n", - " #ho_edge_index, inverse = x.unique(dim=0, return_inverse=True)\n", - "\n", - " # weights of the two unique higher-order edges should be N and 3*N\n", - " # weights of k-th element in output = sum of all w at indices where inverse is k\n", - " #weights = torch.zeros(ho_edge_index.size()[0], dtype=torch.long).index_add(0, inverse, w)\n", - " \n", - "\n", - " #m.layers[k] = pp.Graph(data=Data(edge_index=dag_edge_index))\n", - "\n", - " dag_edge_index = coalesce(ho_index)\n", - "\n", - " return m, edge_indices" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 3, 4, 5, 6, 7, 8],\n", - " [1, 2, 0, 6, 4, 4, 8, 9]])\n", - "tensor([ 0, 4, 7, 10])\n", - "tensor([0, 0, 0, 0, 1, 1, 1, 2, 2, 2])\n", - "EdgeIndex([[0, 0, 0, 1, 1, 1, 2, 3],\n", - " [1, 2, 1, 2, 0, 2, 0, 0]], sparse_size=(4, 4), nnz=8,\n", - " sort_order=row)\n", - "tensor([[0],\n", - " [1],\n", - " [2],\n", - " [3]])\n", - "=== k=2 ===\n", - "num nodes = 10\n", - "tensor([[0, 2, 3, 4, 5, 6],\n", - " [1, 0, 5, 3, 3, 7]])\n", - "=== k=3 ===\n", - "num nodes = 8\n", - "tensor([[1, 2, 3, 4],\n", - " [0, 4, 2, 2]])\n" - ] - } - ], - "source": [ - "m, edge_indices = from_DAGs(dags, max_order=3)" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[[3, 0, 1],\n", - " [0, 2, 0],\n", - " [1, 0, 2],\n", - " [2, 0, 2]],\n", - "\n", - " [[0, 1, 2],\n", - " [2, 0, 2],\n", - " [0, 2, 0],\n", - " [0, 2, 0]]])" - ] - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "map_higher_order_index(edge_indices, k=3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/tutorial/_new_pathData_working.ipynb b/docs/tutorial/_new_pathData_working.ipynb deleted file mode 100644 index e11e33a1c..000000000 --- a/docs/tutorial/_new_pathData_working.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Optional\n", - "\n", - "from tqdm import trange\n", - "import torch\n", - "from torch import Tensor\n", - "from torch_geometric.data import Data\n", - "from torch_geometric.loader import DataLoader\n", - "from torch_geometric.nn import MessagePassing\n", - "from torch_geometric.experimental import disable_dynamic_shapes\n", - "from torch_geometric.nn.aggr import Aggregation\n", - "from torch_geometric.utils import coalesce, degree, cumsum\n", - "from torch_geometric import EdgeIndex\n", - "\n", - "import pathpyG as pp" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DAGData with 2 dags with total weight 3.0\n" - ] - } - ], - "source": [ - "# Example with walks as node sequences\n", - "g = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c'), ('a', 'c')])\n", - "dags = pp.DAGData(mapping = g.mapping)\n", - "\n", - "dags.append_walk(('a', 'b', 'c', 'b'), weight=1.0)\n", - "dags.append_walk(('a', 'c'), weight = 2.0)\n", - "print(dags)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DAGData with 3 dags with total weight 3.0\n" - ] - } - ], - "source": [ - "# Example with walks as edge indices (with no mapping)\n", - "dags = pp.DAGData()\n", - "dags.append_dag(torch.tensor([[3,0,1],[0,1,2]]))\n", - "dags.append_dag(torch.tensor([[1,0,2],[0,2,0]]))\n", - "dags.append_dag(torch.tensor([[0,1],[1,2]]))\n", - "print(dags)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Example with mix of walks or dags\n", - "dags = pp.DAGData(mapping = g.mapping)\n", - "\n", - "dags.append_dag(torch.tensor([[0,0,1],[1,2,2]]))\n", - "dags.append_walk(('a', 'b', 'c'))\n", - "print(dags)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "m = pp.MultiOrderModel.from_DAGs(dags, max_order=2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(m.layers[1].data.edge_index)\n", - "print(m.layers[1].data.node_sequences)\n", - "print(m.layers[1].mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(m.layers[2].data.edge_index)\n", - "print(m.layers[2].data.node_sequences)\n", - "print(m.layers[2].mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Real-world example\n", - "dags = pp.DAGData.from_ngram('../data/tube_paths_train.ngram')\n", - "print(dags)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "m = pp.MultiOrderModel.from_DAGs(dags, max_order=10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(m.layers[3].mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pp.plot(m.layers[10], node_label=list(map(str, m.layers[1].data.node_sequences.tolist())))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dags.map_node_seq(m.layers[10].data.node_sequences[5].tolist())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(m.layers[2].data.edge_index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(m.layers[2].data.edge_weights)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(m.layers[2].data.node_sequences)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/tutorial/_new_paths.ipynb b/docs/tutorial/_new_paths.ipynb deleted file mode 100644 index 0a02c46da..000000000 --- a/docs/tutorial/_new_paths.ipynb +++ /dev/null @@ -1,666 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathpyG import PathData\n", - "\n", - "from torch import IntTensor\n", - "\n", - "import pathpyG as pp" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g = pp.Graph.from_edge_list([('a', 'c'),\n", - " ('b', 'c'),\n", - " ('c', 'd'),\n", - " ('c','e')])\n", - "pp.plot(g, node_label=g.mapping.node_ids.tolist(), edge_color='gray')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data on Walks" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[[0],\n", - " [1]],\n", - "\n", - " [[1],\n", - " [3]]], dtype=torch.int32)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "path = IntTensor([[0,1],\n", - " [1,3]])" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[[0, 1]],\n", - "\n", - " [[1, 3]]], dtype=torch.int32)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "WalkData.edge_index_kth_order(path, k=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{0: tensor([[0, 1],\n", - " [1, 3]], dtype=torch.int32), 1: tensor([[2, 1],\n", - " [1, 4]], dtype=torch.int32)}\n", - "{0: 2, 1: 2}\n" - ] - } - ], - "source": [ - "paths_1 = WalkData(g.mapping)\n", - "paths_1.add_walk_seq(('a', 'c', 'd'), freq=2)\n", - "paths_1.add_walk_seq(('b', 'c', 'e'), freq=2)\n", - "print(paths_1.paths)\n", - "print(paths_1.path_freq)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[0, 1, 1, 2],\n", - " [1, 3, 4, 1]], dtype=torch.int32)" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_1.edge_index" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[0, 1, 1, 2],\n", - " [1, 3, 4, 1]], dtype=torch.int32),\n", - " tensor([2., 2., 2., 2.]))" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_1.edge_index_weighted" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[[0, 1],\n", - " [2, 1]],\n", - " \n", - " [[1, 3],\n", - " [1, 4]]], dtype=torch.int32),\n", - " tensor([2., 2.]))" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_1.edge_index_k_weighted(k=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{0: tensor([[0, 1],\n", - " [1, 3]], dtype=torch.int32), 1: tensor([[0, 1],\n", - " [1, 4]], dtype=torch.int32), 2: tensor([[2, 1],\n", - " [1, 3]], dtype=torch.int32), 3: tensor([[2, 1],\n", - " [1, 4]], dtype=torch.int32)}\n", - "{0: 1, 1: 1, 2: 1, 3: 1}\n" - ] - } - ], - "source": [ - "paths_2 = WalkData(g.mapping)\n", - "paths_2.add_walk_seq(('a', 'c', 'd'), freq=1)\n", - "paths_2.add_walk_seq(('a', 'c', 'e'), freq=1)\n", - "paths_2.add_walk_seq(('b', 'c', 'd'), freq=1)\n", - "paths_2.add_walk_seq(('b', 'c', 'e'), freq=1)\n", - "print(paths_2.paths)\n", - "print(paths_2.path_freq)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[0, 1, 1, 2],\n", - " [1, 3, 4, 1]], dtype=torch.int32),\n", - " tensor([2., 2., 2., 2.]))" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_2.edge_index_weighted" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[[0, 1],\n", - " [0, 1],\n", - " [2, 1],\n", - " [2, 1]],\n", - " \n", - " [[1, 3],\n", - " [1, 4],\n", - " [1, 3],\n", - " [1, 4]]], dtype=torch.int32),\n", - " tensor([1., 1., 1., 1.]))" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_2.edge_index_k_weighted(k=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data on directed acyclic graphs" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[[0, 2],\n", - " [0, 2]],\n", - "\n", - " [[2, 3],\n", - " [2, 4]]], dtype=torch.int32)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "path = IntTensor([[0,2,2],\n", - " [2,3,4]])\n", - "DAGData.edge_index_kth_order(path, k=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "paths_1 = DAGData(g.mapping)\n", - "dag = IntTensor([[0,2,2],\n", - " [2,3,4]])\n", - "paths_1.add(dag, freq=1)\n", - "dag = IntTensor([[1,2,2],\n", - " [2,3,4]])\n", - "paths_1.add(dag, freq=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[0, 1, 2, 2],\n", - " [2, 2, 3, 4]], dtype=torch.int32),\n", - " tensor([1., 1., 2., 2.]))" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_1.edge_index_weighted" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[[0, 2],\n", - " [0, 2],\n", - " [1, 2],\n", - " [1, 2]],\n", - " \n", - " [[2, 3],\n", - " [2, 4],\n", - " [2, 3],\n", - " [2, 4]]], dtype=torch.int32),\n", - " tensor([1., 1., 1., 1.]))" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths_1.edge_index_k_weighted(k=2)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/_time_respecting_paths_gpu.ipynb b/docs/tutorial/_time_respecting_paths_gpu.ipynb deleted file mode 100644 index 060079a0e..000000000 --- a/docs/tutorial/_time_respecting_paths_gpu.ipynb +++ /dev/null @@ -1,311 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pathpyG as pp\n", - "import torch\n", - "from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index\n", - "\n", - "from tqdm import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 327 nodes, 11636 unique edges and 377016 events in [1385982080.0, 1386345600.0]\n", - "\n", - "Graph attributes\n", - "\tdst\t\t -> torch.Size([377016])\n", - "\tsrc\t\t -> torch.Size([377016])\n", - "\tt\t\t -> torch.Size([377016])\n", - "\n", - "1157\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'dst', 'src', 't'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "t_sp = pp.TemporalGraph.from_csv('sociopatterns_highschool_2013.tedges').to_undirected()\n", - "print(t_sp)\n", - "print(torch.unique(t_sp.data.t).size(0))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 5 nodes, 5 unique edges and 5 events in [0.0, 2.0]\n", - "\n", - "Graph attributes\n", - "\tdst\t\t -> torch.Size([5])\n", - "\tsrc\t\t -> torch.Size([5])\n", - "\tt\t\t -> torch.Size([5])\n", - "\n" - ] - } - ], - "source": [ - "t = pp.TemporalGraph.from_edge_list([(0,1,0), (0,2,0), (1,2,1), (1,3,1), (3,4,2)])\n", - "print(t)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# new memory-efficient code copied from `temporal_shortest_paths.ipynb`\n", - "def lift_order_efficient(g: pp.TemporalGraph, delta: int = 1):\n", - "\n", - " # first-order edge index\n", - " edge_index, timestamps = g.data.edge_index, g.data.t\n", - "\n", - " #print(edge_index)\n", - " #print(timestamps)\n", - "\n", - " indices = torch.arange(0, edge_index.size(1), device=g.data.edge_index.device)\n", - "\n", - " unique_t, reverse_idx = torch.unique(timestamps, sorted=True, return_inverse=True)\n", - " second_order = []\n", - " count = 0\n", - "\n", - " # lift order: find possible continuations for edges in each time stamp\n", - " for i in tqdm(range(unique_t.size(0))):\n", - " t = unique_t[i]\n", - " #print('timestamp index ', i)\n", - " #print('timestamp ', t)\n", - " \n", - " # find indices of all source edges that occur at unique timestamp t\n", - " src_time_mask = (timestamps == t)\n", - " src_edges = edge_index[:,src_time_mask]\n", - " src_edge_idx = indices[src_time_mask]\n", - " #print(src_edges)\n", - " #print(src_edge_idx)\n", - "\n", - " # find indices of all edges that can continue edges at tine t for given delta\n", - " dst_time_mask = (timestamps > t) & (timestamps <= t+delta)\n", - " dst_edges = edge_index[:,dst_time_mask] \n", - " dst_edge_idx = indices[dst_time_mask]\n", - " #print(dst_edges)\n", - " #print(dst_edge_idx)\n", - "\n", - " if dst_edge_idx.size(0)>0 and src_edge_idx.size(0)>0:\n", - "\n", - " # compute second-order edges between src and dst idx for all edges where dst in src_edges matches src in dst_edges \n", - " x = torch.cartesian_prod(src_edge_idx, dst_edge_idx).t()\n", - " src_edges = torch.index_select(edge_index, dim=1, index=x[0])\n", - " dst_edges = torch.index_select(edge_index, dim=1, index=x[1])\n", - " #print(src_edges)\n", - " #print(dst_edges)\n", - " ho_edge_index = x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]\n", - " second_order.append(ho_edge_index)\n", - " #print(ho_edge_index) \n", - " \n", - " # #print('dst', dst)\n", - " # src_mask = (edge_index[:,mask][0]==dst)\n", - " # ctd = edge_index[:,mask][:,src_mask]\n", - " # #print('continuations', ctd)\n", - " # ctd_indices = torch.where(edge_index[:,mask][0]==dst)[0] \n", - " # #print('ctd indx', ctd_indices)\n", - " # count += ctd_indices.size(0)\n", - " ho_index = torch.cat(second_order, dim=1) \n", - " return ho_index.size(1), ho_index" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def time_respecting_paths(g: pp.TemporalGraph, delta: int) -> dict:\n", - " \"\"\"\n", - " Calculate all longest time-respecting paths in a temporal graph.\n", - " \"\"\"\n", - " paths_of_length = {}\n", - "\n", - " node_sequence = torch.arange(g.data.num_nodes, device=g.data.edge_index.device).unsqueeze(1)\n", - " node_sequence = torch.cat([node_sequence[g.data.edge_index[0]], node_sequence[g.data.edge_index[1]][:, -1:]], dim=1)\n", - " edge_index = lift_order_efficient(g, delta)[1]\n", - " \n", - " # calculate degrees\n", - " out_degree = degree(edge_index[0], num_nodes=g.m, dtype=torch.long)\n", - " in_degree = degree(edge_index[1], num_nodes=g.m, dtype=torch.long)\n", - " # identify root nodes with in-degree zero\n", - " roots = torch.where(in_degree == 0)[0]\n", - " leafs = (out_degree == 0)\n", - " # print(\"Roots:\", roots)\n", - " # print(\"Leafs:\", leafs)\n", - " paths = node_sequence[roots]\n", - " paths_of_length[1] = paths[leafs[roots]].cpu()\n", - "\n", - " paths = paths[~leafs[roots]]\n", - " nodes = roots[~leafs[roots]]\n", - "\n", - " ptrs = cumsum(out_degree, dim=0)\n", - "\n", - "\n", - " # count all longest time-respecting paths in the temporal graph\n", - " step = 1\n", - " while nodes.size(0) > 0:\n", - " # print(\"step\", step)\n", - " # print(\"Paths: \", paths)\n", - " # print(\"Nodes: \", nodes)\n", - " idx_repeat = torch.repeat_interleave(out_degree[nodes])\n", - " next_idx = torch.repeat_interleave(ptrs[nodes], out_degree[nodes])\n", - " idx_correction = torch.arange(next_idx.size(0), device=edge_index.device) - cumsum(out_degree[nodes], dim=0)[idx_repeat]\n", - " next_idx += idx_correction\n", - " next_nodes = edge_index[1][next_idx]\n", - " paths = torch.cat([paths[idx_repeat], node_sequence[next_nodes, 1:]], dim=1)\n", - " paths_of_length[step] = paths[leafs[next_nodes]].tolist()\n", - " paths = paths[~leafs[next_nodes]]\n", - " nodes = next_nodes[~leafs[next_nodes]]\n", - " step += 1\n", - "\n", - " return paths_of_length\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1157 [00:00 -> torch.Size([2])\n", - "\tsrc\t\t -> torch.Size([2])\n", - "\tdst\t\t -> torch.Size([2])\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'t', 'src', 'dst'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "g = pp.TemporalGraph.from_edge_list([['a', 'b', 1], ['b', 'c', 3]])\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "module 'pathpyG.algorithms' has no attribute 'temporal_graph_to_event_dag'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m dag \u001b[38;5;241m=\u001b[39m \u001b[43mpp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemporal_graph_to_event_dag\u001b[49m(g, delta\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 2\u001b[0m pp\u001b[38;5;241m.\u001b[39mplot(dag)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(dag\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mnode_id)\n", - "\u001b[0;31mAttributeError\u001b[0m: module 'pathpyG.algorithms' has no attribute 'temporal_graph_to_event_dag'" - ] - } - ], - "source": [ - "dag = pp.algorithms.temporal_graph_to_event_dag(g, delta=2)\n", - "pp.plot(dag)\n", - "print(dag.data.node_id)\n", - "print(dag.data.edge_index)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([1697, 1697, 1698, 1698, 1698, 1698, 1699, 1699, 1701, 1702, 1702, 1702,\n", - " 1702, 1702, 1702, 1702, 1703, 1703, 1703, 1704, 1704, 1704, 1704, 1704,\n", - " 1704, 1705, 1705, 1705, 1706, 1706, 1706, 1707, 1707, 1707, 1707, 1707,\n", - " 1708, 1708, 1709, 1709, 1709, 1710, 1710, 1710, 1711, 1711, 1711, 1712,\n", - " 1713, 1714, 1715, 1715, 1717, 1718, 1719, 1719, 1719, 1720, 1720, 1720,\n", - " 1720, 1721, 1721, 1723, 1724, 1724, 1726, 1726, 1727, 1728, 1728, 1728,\n", - " 1728, 1729, 1729, 1730, 1731, 1731, 1732, 1733, 1734, 1735, 1735, 1735,\n", - " 1736, 1736, 1737, 1737, 1737, 1738, 1738, 1739, 1742, 1742, 1743, 1743,\n", - " 1743, 1743, 1744, 1744, 1745, 1745, 1746, 1746, 1747, 1748, 1749],\n", - " device='cuda:0')\n", - "Temporal Graph with 50 nodes 93 edges and 107 time-stamped events in [1697, 1749]\n", - "\n", - "Node attributes\n", - "\tnode_id\t\t\n", - "\n", - "Graph attributes\n", - "\tsrc\t\t -> torch.Size([107])\n", - "\tnum_nodes\t\t\n", - "\tdst\t\t -> torch.Size([107])\n", - "\tt\t\t -> torch.Size([107])\n", - "\n" - ] - } - ], - "source": [ - "g = pp.TemporalGraph.from_csv('../data/ants_1_2_val_small.csv')\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Graph with 147 nodes and 159 edges\n", - "\n", - "Node attributes\n", - "\tnode_idx\t\t\n", - "\tnode_id\t\t\n", - "\tnode_name\t\t\n", - "\n", - "Edge attributes\n", - "\tedge_ts\t\t -> torch.Size([159])\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "dag = pp.algorithms.temporal_graph_to_event_dag(g, delta=30, sparsify=True)\n", - "print(dag)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PathData with 0 walks and 39 dags\n" - ] - } - ], - "source": [ - "paths = pp.DAGData.from_temporal_dag(dag, detect_walks=False)\n", - "print(paths)" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[[ 0, 1],\n", - " [ 0, 1],\n", - " [ 0, 1],\n", - " [ 0, 1],\n", - " [ 0, 1],\n", - " [ 0, 2],\n", - " [ 0, 2],\n", - " [ 0, 21],\n", - " [ 0, 21],\n", - " [ 0, 21],\n", - " [ 0, 32],\n", - " [ 1, 0],\n", - " [ 1, 0],\n", - " [ 1, 0],\n", - " [ 1, 0],\n", - " [ 1, 13],\n", - " [ 1, 13],\n", - " [ 1, 30],\n", - " [ 4, 5],\n", - " [ 5, 20],\n", - " [ 6, 7],\n", - " [ 6, 7],\n", - " [ 6, 9],\n", - " [ 6, 9],\n", - " [ 7, 39],\n", - " [ 7, 39],\n", - " [ 7, 39],\n", - " [ 8, 0],\n", - " [ 8, 0],\n", - " [ 8, 0],\n", - " [ 8, 0],\n", - " [ 9, 7],\n", - " [ 9, 24],\n", - " [10, 11],\n", - " [10, 11],\n", - " [10, 11],\n", - " [10, 11],\n", - " [10, 29],\n", - " [11, 0],\n", - " [11, 0],\n", - " [11, 0],\n", - " [11, 16],\n", - " [11, 21],\n", - " [11, 21],\n", - " [11, 22],\n", - " [11, 22],\n", - " [11, 29],\n", - " [11, 29],\n", - " [12, 1],\n", - " [12, 1],\n", - " [12, 13],\n", - " [12, 13],\n", - " [12, 30],\n", - " [12, 33],\n", - " [13, 8],\n", - " [20, 14],\n", - " [21, 11],\n", - " [21, 11],\n", - " [21, 11],\n", - " [22, 16],\n", - " [23, 24],\n", - " [23, 24],\n", - " [24, 9],\n", - " [25, 7],\n", - " [25, 26],\n", - " [26, 6],\n", - " [26, 9],\n", - " [26, 9],\n", - " [27, 8],\n", - " [28, 24],\n", - " [28, 29],\n", - " [28, 29],\n", - " [28, 29],\n", - " [28, 29],\n", - " [29, 21],\n", - " [29, 21],\n", - " [29, 21],\n", - " [30, 1],\n", - " [30, 33],\n", - " [31, 22],\n", - " [31, 22],\n", - " [32, 0],\n", - " [34, 35],\n", - " [34, 44],\n", - " [35, 34],\n", - " [37, 38],\n", - " [39, 6],\n", - " [39, 6],\n", - " [39, 45],\n", - " [43, 20],\n", - " [44, 18],\n", - " [45, 6],\n", - " [47, 9]],\n", - "\n", - " [[ 1, 0],\n", - " [ 1, 12],\n", - " [ 1, 13],\n", - " [ 1, 30],\n", - " [ 1, 33],\n", - " [ 2, 0],\n", - " [ 2, 3],\n", - " [21, 11],\n", - " [21, 40],\n", - " [21, 42],\n", - " [32, 0],\n", - " [ 0, 2],\n", - " [ 0, 3],\n", - " [ 0, 21],\n", - " [ 0, 32],\n", - " [13, 1],\n", - " [13, 8],\n", - " [30, 33],\n", - " [ 5, 20],\n", - " [20, 43],\n", - " [ 7, 39],\n", - " [ 7, 43],\n", - " [ 9, 24],\n", - " [ 9, 36],\n", - " [39, 6],\n", - " [39, 25],\n", - " [39, 45],\n", - " [ 0, 2],\n", - " [ 0, 3],\n", - " [ 0, 21],\n", - " [ 0, 32],\n", - " [ 7, 39],\n", - " [24, 9],\n", - " [11, 0],\n", - " [11, 21],\n", - " [11, 22],\n", - " [11, 29],\n", - " [29, 42],\n", - " [ 0, 2],\n", - " [ 0, 3],\n", - " [ 0, 32],\n", - " [16, 17],\n", - " [21, 40],\n", - " [21, 42],\n", - " [22, 11],\n", - " [22, 16],\n", - " [29, 33],\n", - " [29, 42],\n", - " [ 1, 12],\n", - " [ 1, 30],\n", - " [13, 1],\n", - " [13, 8],\n", - " [30, 1],\n", - " [33, 12],\n", - " [ 8, 27],\n", - " [14, 20],\n", - " [11, 16],\n", - " [11, 22],\n", - " [11, 29],\n", - " [16, 17],\n", - " [24, 9],\n", - " [24, 28],\n", - " [ 9, 49],\n", - " [ 7, 39],\n", - " [26, 6],\n", - " [ 6, 45],\n", - " [ 9, 24],\n", - " [ 9, 36],\n", - " [ 8, 27],\n", - " [24, 9],\n", - " [29, 21],\n", - " [29, 23],\n", - " [29, 33],\n", - " [29, 42],\n", - " [21, 11],\n", - " [21, 40],\n", - " [21, 42],\n", - " [ 1, 30],\n", - " [33, 12],\n", - " [22, 11],\n", - " [22, 16],\n", - " [ 0, 46],\n", - " [35, 34],\n", - " [44, 18],\n", - " [34, 44],\n", - " [38, 37],\n", - " [ 6, 7],\n", - " [ 6, 45],\n", - " [45, 6],\n", - " [20, 14],\n", - " [18, 44],\n", - " [ 6, 45],\n", - " [ 9, 49]]], device='cuda:0')\n", - "93\n", - "tensor([ 4., 1., 2., 1., 1., 3., 6., 10., 2., 2., 6., 3., 1., 4.,\n", - " 1., 1., 1., 3., 1., 1., 6., 1., 1., 1., 4., 4., 8., 3.,\n", - " 1., 4., 1., 3., 2., 3., 4., 2., 2., 1., 3., 1., 1., 3.,\n", - " 1., 1., 1., 1., 3., 3., 1., 1., 1., 1., 1., 1., 2., 1.,\n", - " 3., 6., 4., 3., 1., 1., 4., 3., 1., 1., 1., 2., 1., 1.,\n", - " 3., 1., 1., 1., 2., 1., 1., 1., 3., 1., 1., 6., 1., 1.,\n", - " 1., 1., 2., 1., 3., 1., 1., 3., 1.], device='cuda:0')\n" - ] - } - ], - "source": [ - "index, weights = paths.edge_index_k_weighted(k=2)\n", - "print(index)\n", - "print(index.size(dim=1))\n", - "print(weights)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "def map_edge_index(edge_index, weights, map):\n", - " paths = []\n", - " for i in range(edge_index.size(dim=1)):\n", - " paths.append((map[edge_index[0][i][0].item()], map[edge_index[0][i][1].item()], map[edge_index[1][i][1].item()], weights[i].item()))\n", - " return paths\n" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "paths = map_edge_index(index, weights, g.mapping.idx_to_id)\n", - "with open('ants_1_2_paths.csv', 'w') as f:\n", - " for p in paths:\n", - " f.write('{0};{1}\\n'.format(','.join(p[:-1]), p[-1]))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HigherOrderGraph (k=1) with 71 nodes and 591 edges\n", - "\tTotal edge weight = 3230.0\n", - "Edge attributes\n", - "\tedge_weight\t\t -> torch.Size([591])\n", - "\n", - "Graph attributes\n", - "\tnode_id\t\t\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g1 = pp.HigherOrderGraph(paths, order=1)\n", - "print(g1)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HigherOrderGraph (k=2) with 80 nodes and 93 edges\n", - "\tTotal edge weight = 202.0\n", - "Edge attributes\n", - "\tedge_weight\t\t -> torch.Size([93])\n", - "\n", - "Graph attributes\n", - "\tnode_id\t\t\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g2 = pp.HigherOrderGraph(paths, order=2)\n", - "print(g2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Toy Example" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([1, 2, 3, 3], device='cuda:0')\n", - "Temporal Graph with 5 nodes 4 edges and 4 time-stamped events in [1, 3]\n", - "\n", - "Node attributes\n", - "\tnode_id\t\t\n", - "\n", - "Graph attributes\n", - "\tsrc\t\t -> torch.Size([4])\n", - "\tnum_nodes\t\t\n", - "\tdst\t\t -> torch.Size([4])\n", - "\tt\t\t -> torch.Size([4])\n", - "\n" - ] - } - ], - "source": [ - "g = pp.TemporalGraph.from_edge_list([['a', 'b', 1], ['b', 'c',2], ['c', 'd',3], ['c', 'e', 3]])\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Graph with 5 nodes and 4 edges\n", - "\n", - "Node attributes\n", - "\tnode_idx\t\t\n", - "\tnode_id\t\t\n", - "\tnode_name\t\t\n", - "\n", - "Edge attributes\n", - "\tedge_ts\t\t -> torch.Size([4])\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n", - "['a', 'b', 'c', 'd', 'e']\n", - "{0: 'a-1', 1: 'b-2', 2: 'c-3', 3: 'd-4', 4: 'e-4'}\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "dag = pp.algorithms.temporal_graph_to_event_dag(g, delta=5, sparsify=True)\n", - "print(dag)\n", - "print(dag.data['node_name'])\n", - "print(dag.node_index_to_id)\n", - "pp.plot(dag, edge_color='lightgray')" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'a-1': tensor([[0, 1, 2, 2],\n", - " [1, 2, 3, 4]], device='cuda:0', dtype=torch.int32)}\n" - ] - } - ], - "source": [ - "x = pp.algorithms.extract_causal_trees(dag)\n", - "print(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "PathData with 0 walks and 1 dags\n", - "{0: 0, 1: 1, 2: 2, 3: 3, 4: 4}\n" - ] - } - ], - "source": [ - "paths = pp.DAGData.from_temporal_dag(dag)\n", - "print(paths)\n", - "print(paths.mapping)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[0, 1, 2, 2],\n", - " [1, 2, 3, 4]], device='cuda:0'),\n", - " tensor([1., 1., 1., 1.], device='cuda:0'))" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths.edge_index_k_weighted(k=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HigherOrderGraph (k=1) with 5 nodes and 4 edges\n", - "\tTotal edge weight = 4.0\n", - "Edge attributes\n", - "\tedge_weight\t\t -> torch.Size([4])\n", - "\n", - "Graph attributes\n", - "\tnode_id\t\t\n", - "\tnum_nodes\t\t\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "g1 = pp.HigherOrderGraph(paths, order=1)\n", - "print(g1)\n", - "pp.plot(g1)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[[0, 1],\n", - " [1, 2],\n", - " [1, 2]],\n", - " \n", - " [[1, 2],\n", - " [2, 3],\n", - " [2, 4]]], device='cuda:0'),\n", - " tensor([1., 1., 1.], device='cuda:0'))" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths.edge_index_k_weighted(k=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HigherOrderGraph (k=2) with 4 nodes and 3 edges\n", - "\tTotal edge weight = 3.0\n", - "Edge attributes\n", - "\tedge_weight\t\t -> torch.Size([3])\n", - "\n", - "Graph attributes\n", - "\tnode_id\t\t\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g2 = pp.HigherOrderGraph(paths, order=2)\n", - "print(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pp.plot(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[1, 2]], dtype=torch.int32)\n", - "tensor([[[0, 1, 2]],\n", - "\n", - " [[1, 2, 3]]], dtype=torch.int32)\n" - ] - } - ], - "source": [ - "# edge_index =torch.IntTensor([[0,1,2],[1,2,3]])\n", - "# edge_index = edge_index.reshape(edge_index.size()+(1,))\n", - "# print(edge_index)\n", - "\n", - "a = edge_index[0].unique(dim=0)\n", - "b = edge_index[1].unique(dim=0)\n", - "# intersection of a and b corresponds to all center nodes, which have at least one incoming and one outgoing edge\n", - "combined = torch.cat((a, b))\n", - "uniques, counts = combined.unique(dim=0, return_counts=True)\n", - "center_nodes = uniques[counts > 1]\n", - "print(center_nodes)\n", - "src = []\n", - "dst = []\n", - "for v in center_nodes:\n", - " src_index = torch.all(edge_index[1]==v, axis=1).nonzero().flatten() # type: ignore\n", - " srcs = edge_index[0][src_index]\n", - " # get all successors of v, i.e. elements in edge_index[1] where edge_index[0] == v\n", - " dst_index = torch.all(edge_index[0]==v, axis=1).nonzero().flatten() # type: ignore\n", - " dsts = edge_index[1][dst_index]\n", - " for s in srcs:\n", - " for d in dsts:\n", - " src.append(torch.cat((torch.gather(s, 0, torch.tensor([0])), v)))\n", - " dst.append(torch.cat((v, torch.gather(d, 0, torch.tensor([d.size()[0]-1])))))\n", - "edge_index = torch.stack((torch.stack(src), torch.stack(dst)))\n", - "print(edge_index)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "HigherOrderGraph (k=3) with 2 nodes and 1 edges\n", - "\tTotal edge weight = 1.0\n", - "Edge attributes\n", - "\tedge_weight\t\t -> torch.Size([1])\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\tnode_id\t\t\n", - "\n" - ] - } - ], - "source": [ - "g2 = pp.HigherOrderGraph(paths, order=3)\n", - "print(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pp.plot(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/_higher_order_scalability.ipynb b/docs/tutorial/archive/_higher_order_scalability.ipynb similarity index 100% rename from docs/tutorial/_higher_order_scalability.ipynb rename to docs/tutorial/archive/_higher_order_scalability.ipynb diff --git a/docs/tutorial/_scalability_analysis.ipynb b/docs/tutorial/archive/_scalability_analysis.ipynb similarity index 100% rename from docs/tutorial/_scalability_analysis.ipynb rename to docs/tutorial/archive/_scalability_analysis.ipynb diff --git a/docs/tutorial/implementation_concepts.ipynb b/docs/tutorial/implementation_concepts.ipynb new file mode 100644 index 000000000..4018268ab --- /dev/null +++ b/docs/tutorial/implementation_concepts.ipynb @@ -0,0 +1,4439 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "45873078", + "metadata": {}, + "source": [ + "# General Concepts of the Tensor-based Implementations\n", + "\n", + "## Prerequisites\n", + "\n", + "First, we need to set up our Python environment that has PyTorch, PyTorch Geometric and PathpyG installed. Depending on where you are executing this notebook, this might already be (partially) done. E.g. Google Colab has PyTorch installed by default so we only need to install the remaining dependencies. The DevContainer that is part of our GitHub Repository on the other hand already has all of the necessary dependencies installed. \n", + "\n", + "In the following, we install the packages for usage in Google Colab using Jupyter magic commands. For other environments comment in or out the commands as necessary. For more details on how to install `pathpyG` especially if you want to install it with GPU-support, we refer to our [documentation](https://www.pathpy.net/dev/getting_started/). Note that `%%capture` discards the full output of the cell to not clutter this tutorial with unnecessary installation details. If you want to print the output, you can comment `%%capture` out." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "941ea9b9", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "# !pip install torch\n", + "# !pip install torch_geometric\n", + "# !pip install git+https://github.com/pathpy/pathpyG.git" + ] + }, + { + "cell_type": "markdown", + "id": "86beb338", + "metadata": {}, + "source": [ + "## Motivation and Learning Objectives\n", + "\n", + "The inner workings of the core classes of PathpyG are based on tensor operations provided by PyTorch and PyTorch Geometric. Especially the creation of higher-order structures using the lift-order functions and the `MultiOderModel` heavily rely on tensor operations for efficiency reasons. While these implementations are highly optimized, they are very hard to read and understand for newcomers. This tutorial aims to explain the general concepts and ideas behind these implementations in a more accessible way. Additionally, we will provide step-by-step explanations of the core functions in the following sections." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "53dd6f73", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from torch_geometric.data import Data\n", + "from torch_geometric.utils import cumsum, degree, sort_edge_index\n", + "\n", + "import pathpyG as pp" + ] + }, + { + "cell_type": "markdown", + "id": "72da4526", + "metadata": {}, + "source": [ + "### Order-lifting and Line Graph Transformations\n", + "\n", + "At the core of creating higher-order models is the `lift_order_edge_index` function that is essentially a line graph transformation. Given an edge index of a graph and the number of nodes in the graph, this function creates the edge index for the corresponding line graph. Let's look at an example:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "547a4b15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mapping = pp.IndexMap(list(\"abcdef\"))\n", + "graph = pp.Graph.from_edge_index(\n", + " edge_index=torch.tensor([[0, 1, 3, 4, 2, 2, 5], [2, 2, 5, 5, 3, 4, 0]]), mapping=mapping\n", + ")\n", + "pp.plot(graph, node_label=graph.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "dbd54742", + "metadata": {}, + "source": [ + "We can create the line graph for this graph using the `lift_order_edge_index` function as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "04d7df38", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "second_order_edge_index = pp.algorithms.lift_order.lift_order_edge_index(edge_index=graph.data.edge_index, num_nodes=graph.n)\n", + "second_order_mapping = pp.IndexMap(graph.edges)\n", + "second_order_data = Data(edge_index=second_order_edge_index, node_sequence=graph.data.edge_index.t())\n", + "line_graph = pp.Graph(data=second_order_data, mapping=second_order_mapping)\n", + "pp.plot(line_graph, node_label=line_graph.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "944a71b6", + "metadata": {}, + "source": [ + "To create the higher-order `PathpyG.Graph`, we needed to specify a `node_sequence` in the `Data` object. The node sequence above was given by the original edges of the graph. This `node_sequence` keeps track of which original nodes correspond to which higher-order nodes in the higher-order graph. In a second order graph, each higher-order node corresponds to an edge in the original graph. In a graph of order k, each higher-order node corresponds to a path of length k in the original graph. With this, we can always trace back which higher-order node corresponds to which original nodes.\n", + "\n", + "As long as we have this mapping from higher-order nodes to original nodes, we can always do an additional line graph transformation to create even higher order graphs. Below, we create a third-order graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "eab46380", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "third_order_edge_index = pp.algorithms.lift_order.lift_order_edge_index(edge_index=line_graph.data.edge_index, num_nodes=line_graph.n)\n", + "third_order_data = Data(edge_index=third_order_edge_index, node_sequence=torch.cat([line_graph.data.node_sequence[line_graph.data.edge_index[0]], line_graph.data.node_sequence[line_graph.data.edge_index[1]][:, -1:]], dim=1))\n", + "third_order_mapping = pp.IndexMap([tuple(seq) for seq in graph.mapping.to_ids(third_order_data.node_sequence).tolist()])\n", + "third_order_graph = pp.Graph(data=third_order_data, mapping=third_order_mapping)\n", + "pp.plot(third_order_graph, node_label=third_order_graph.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "ce852a2f", + "metadata": {}, + "source": [ + "Note that above, we constructed the `node_sequence` for the third-order graph by concatenating the sequences of the two nodes that form each edge in the second-order graph. However, only the first node in the sequence of the higher-order source and the last node in the sequence of the higher-order target node are different. The middle nodes are the same for both higher-order nodes since they represent the overlapping part of the paths.\n", + "\n", + "### Under the Hood of `lift_order_edge_index`\n", + "\n", + "Let us now take a closer look at how the `lift_order_edge_index` function works under the hood. The whole function essentially only needs 10 lines of code and looks as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "adb82d39", + "metadata": {}, + "outputs": [], + "source": [ + "def lift_order_edge_index(edge_index: torch.Tensor, num_nodes: int ) -> torch.Tensor:\n", + " outdegree = degree(edge_index[0], dtype=torch.long, num_nodes=num_nodes)\n", + " outdegree_per_dst = outdegree[edge_index[1]]\n", + " num_new_edges = outdegree_per_dst.sum()\n", + " ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)\n", + " ptrs = cumsum(outdegree, dim=0)[:-1]\n", + " ho_edge_dsts = torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst)\n", + " idx_correction = torch.arange(num_new_edges, dtype=torch.long)\n", + " idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]\n", + " ho_edge_dsts += idx_correction\n", + " return torch.stack([ho_edge_srcs, ho_edge_dsts], dim=0)" + ] + }, + { + "cell_type": "markdown", + "id": "9b90b7b2", + "metadata": {}, + "source": [ + "However, what the function does exactly is obfuscated by the heavy use of tensor operations. Let us break down the function step-by-step to understand what is happening internally.\n", + "\n", + "
\n", + "

Note

\n", + "

\n", + " Due to the high complexity of the tensor operations, we will maintain to lines of explanations that try to explain the same concepts with different words. One explanation line will be added to the code snippets as comments and the other explanation line will be provided in the markdown cells between the code snippets.\n", + "

\n", + "
\n", + "\n", + "
\n", + "

Edge index must be sorted!

\n", + "

\n", + " The lift_order_edge_index function assumes that the input edge_index is sorted by source nodes. This is not enforced by the function itself, because we ensure that the edge indices are sorted whenever we create a PathpyG.Graph object. This step is crucial for the correct functioning of the lift_order_edge_index function.\n", + "

\n", + "
\n", + "\n", + "1. The function first computes the outdegree of each node in the graph using the `degree` function from `torch_geometric.utils`. This gives us a tensor containing the number of outgoing edges for each node." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1263095b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Outdegree per node:\n", + "\ta: 1\n", + "\tb: 1\n", + "\tc: 2\n", + "\td: 1\n", + "\te: 1\n", + "\tf: 1\n" + ] + } + ], + "source": [ + "# Compute the outdegree of each node used to get all the edge combinations leading to a higher-order edge\n", + "outdegree = degree(graph.data.edge_index[0], dtype=torch.long, num_nodes=graph.n)\n", + "print(\"Outdegree per node:\")\n", + "for node in graph.nodes:\n", + " print(f\"\\t{node}: {outdegree[graph.mapping.to_idx(node)].item()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "d9aace27", + "metadata": {}, + "source": [ + "2. Next, we map the outdegree values to the destination nodes of each edge in the edge index. This gives us a tensor where each entry corresponds to the outdegree of the target node of each edge.\n", + "\n", + "
\n", + "

Note

\n", + "

\n", + " This helps us because for the line graph transformation, we need to transform each edge into a node and then connect these nodes (previously edges) if a node in the original graph connects them. Therefore, we need to create a higher-order edge for each combination of incoming and outgoing edges for each node in the original graph. The outdegree of the target node tells us how many outgoing edges there are for each target node, which directly translates to how many higher-order edges we need to create for each incoming edge.\n", + "

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "915f0d5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Outdegree per destination node of each edge:\n", + "\t('a', 'c'): 2\n", + "\t('b', 'c'): 2\n", + "\t('c', 'd'): 1\n", + "\t('c', 'e'): 1\n", + "\t('d', 'f'): 1\n", + "\t('e', 'f'): 1\n", + "\t('f', 'a'): 1\n" + ] + } + ], + "source": [ + "# For each center node, we need to combine each outgoing edge with each incoming edge\n", + "# We achieve this by creating `outdegree` number of edges for each destination node \n", + "# of the old edge index\n", + "outdegree_per_dst = outdegree[graph.data.edge_index[1]]\n", + "print(\"\\nOutdegree per destination node of each edge:\")\n", + "for e, outdeg in zip(graph.edges, outdegree_per_dst.tolist()):\n", + " print(f\"\\t{e}: {outdeg}\")" + ] + }, + { + "cell_type": "markdown", + "id": "be5267b1", + "metadata": {}, + "source": [ + "3. Next, we create the source nodes for the higher-order graph. For this, we create a new index that maps the original edges to its index as a higher-order node. This is done by creating a range from 0 to the number of edges in the original graph. We then repeat each index according to the outdegree of the corresponding target node. This way, we create a source node for each combination of incoming and outgoing edges for each target node, which will be the edges in the higher-order graph." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1981d572", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Higher-order edge source indices:\n", + " [0, 0, 1, 1, 2, 3, 4, 5, 6]\n", + "Higher-order edge sources:\n", + " [['a' 'c']\n", + " ['a' 'c']\n", + " ['b' 'c']\n", + " ['b' 'c']\n", + " ['c' 'd']\n", + " ['c' 'e']\n", + " ['d' 'f']\n", + " ['e' 'f']\n", + " ['f' 'a']]\n" + ] + } + ], + "source": [ + "# Use each edge from the edge index as node and assign the new indices in the order of the original edge index\n", + "# Each higher order node has one outgoing edge for each outgoing edge of the original destination node\n", + "# Since we keep the ordering, we can just repeat each node using the `outdegree_per_dst` tensor\n", + "ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)\n", + "print(\"\\nHigher-order edge source indices:\\n\", ho_edge_srcs.tolist())\n", + "print(\"Higher-order edge sources:\\n\", graph.mapping.to_ids(graph.data.edge_index[:, ho_edge_srcs]).T)" + ] + }, + { + "cell_type": "markdown", + "id": "769117a8", + "metadata": {}, + "source": [ + "4. Now, we need to create the target nodes for the higher-order edges. For this, we first need to know where the edges of each node start in the original edge index. We can compute this by calculating the cumulative sum of the outdegree values of all nodes. This gives us a tensor where each entry corresponds to the starting index of the edges for each node in the original edge index.\n", + "\n", + "
\n", + "

Cumulative Sum

\n", + "

\n", + " There is one cumsum implementation in PyTorch and one in PyTorch Geometric. The one in PyTorch Geometric starts with an initial zero value, while the one in PyTorch does not. This means that the torch.cumsum function will give us the end pointers of the edges for each node, while the torch_geometric.utils.cumsum function will give us the start pointers (including a last pointer that is equal to the total number of edges). Therefore, we use the torch_geometric.utils.cumsum function here and remove the last entry afterwards.\n", + "

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0e290e5e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Edge start pointers per node:\n", + " [0, 1, 2, 4, 5, 6]\n" + ] + } + ], + "source": [ + "# For each node, we calculate pointers of shape (num_nodes,) that indicate the start of the original edges\n", + "# (new higher-order nodes) that have the node as source node\n", + "ptrs = cumsum(outdegree, dim=0)[:-1]\n", + "print(\"Edge start pointers per node:\\n\", ptrs.tolist())" + ] + }, + { + "cell_type": "markdown", + "id": "1c171af2", + "metadata": {}, + "source": [ + "5. With the starting pointers of the edges for each node, we can start with the creation of the target nodes for the higher-order edges. Remember that we assigned the node indices based on the order of edges in the original edge index and ordered the higher-order source nodes accordingly. Therefore, we are essentially going through each edge, and combine it with each outgoing edge of the edges target node to create the higher-order edges. Since the edges are **ordered** by source nodes, we are going through all nodes in the original graph in order by going through each outgoing edge of each node. This means that for each edge in the original graph, we can look up where the outgoing edges of its target node start in the original edge index using the `ptrs` tensor we created in the previous step. We then repeat these starting pointers according to the outdegree of the corresponding target node to create a target node for each combination of incoming and outgoing edges for each target node." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a188b49b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Higher-order edge destination indices (before correction):\n", + " [2, 2, 2, 2, 4, 5, 6, 6, 0]\n" + ] + } + ], + "source": [ + "# Use these pointers to get the start of the edges for each higher-order src and repeat it `outdegree` times\n", + "# Since we keep the ordering, all new higher-order edges that have the same src are indexed consecutively\n", + "ho_edge_dsts = torch.repeat_interleave(ptrs[graph.data.edge_index[1]], outdegree_per_dst)\n", + "print(\"Higher-order edge destination indices (before correction):\\n\", ho_edge_dsts.tolist())" + ] + }, + { + "cell_type": "markdown", + "id": "3cd03e06", + "metadata": {}, + "source": [ + "6. For now, we do not have the correct indices for the higher-order target nodes yet. Since we only repeated the starting pointers of the edges for each target node, we only have the correct offsets for each group of higher-order edges corresponding to each target node. However, within each group, we need to assign the correct indices to the higher-order target nodes. Luckily, we only need to count up from the starting pointer for each group corresponding to one incoming edge in the original graph due to the ordering of the edges. For this, we create a correction index that counts up from 0 to the total number of higher-order edges." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cb1750ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index correction (before adjustment):\n", + " [0, 1, 2, 3, 4, 5, 6, 7, 8]\n" + ] + } + ], + "source": [ + "# Since the above only repeats the start of the edges, we need to add (0, 1, 2, 3, ...)\n", + "# for all `outdegree` number of edges consecutively to get the correct destination nodes\n", + "# We can achieve this by starting with a range from (0, 1, ..., num_new_edges)\n", + "idx_correction = torch.arange(ho_edge_srcs.size(0), dtype=torch.long)\n", + "print(\"Index correction (before adjustment):\\n\", idx_correction.tolist())" + ] + }, + { + "cell_type": "markdown", + "id": "0595260f", + "metadata": {}, + "source": [ + "7. We then subtract the cumulative sum of the outdegree values of the higher-order source nodes from this correction index. This effectively resets the counting for each group of higher-order edges corresponding to each target node." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "d57c6ffe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index correction (after adjustment):\n", + " [0, 1, 0, 1, 0, 0, 0, 0, 0]\n" + ] + } + ], + "source": [ + "# Then, we subtract the cumulative sum of the outdegree for each destination node\n", + "idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]\n", + "print(\"Index correction (after adjustment):\\n\", idx_correction.tolist())" + ] + }, + { + "cell_type": "markdown", + "id": "b891ff8b", + "metadata": {}, + "source": [ + "8. Finally, we add this correction index to the starting pointers of the edges for each target node to get the correct indices for the higher-order target nodes." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f0f7dc5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Higher-order edge destination indices (after correction):\n", + " [2, 3, 2, 3, 4, 5, 6, 6, 0]\n", + "Higher-order edge destinations:\n", + " [['c' 'd']\n", + " ['c' 'e']\n", + " ['c' 'd']\n", + " ['c' 'e']\n", + " ['d' 'f']\n", + " ['e' 'f']\n", + " ['f' 'a']\n", + " ['f' 'a']\n", + " ['a' 'c']]\n" + ] + } + ], + "source": [ + "# Add this tensor to the destination nodes to get the correct destination nodes for each higher-order edge \n", + "ho_edge_dsts += idx_correction\n", + "print(\"Higher-order edge destination indices (after correction):\\n\", ho_edge_dsts.tolist())\n", + "print(\"Higher-order edge destinations:\\n\", graph.mapping.to_ids(graph.data.edge_index[:, ho_edge_dsts]).T)" + ] + }, + { + "cell_type": "markdown", + "id": "71a8754a", + "metadata": {}, + "source": [ + "This gives us the final higher-order edge index that we can return from the function." + ] + }, + { + "cell_type": "markdown", + "id": "fecb4836", + "metadata": {}, + "source": [ + "### Temporal Order Lifting\n", + "\n", + "One of the core functionalities of PathpyG is the ability to create temporal higher-order models. For this, an extension of the `lift_order_edge_index` function to temporal graphs is needed. We implement this in the `lift_order_temporal` function. This function works similarly to the `lift_order_edge_index` function, but with some additional steps to account for the temporal aspect of the graph. The main difference is that we need to ensure that the higher-order edges respect the temporal ordering of the original edges. Let us take a look at an example:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b1b7f17d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tedges = [\n", + " (\"a\", \"b\", 1),\n", + " (\"a\", \"b\", 2),\n", + " (\"b\", \"a\", 3),\n", + " (\"b\", \"c\", 3),\n", + " (\"d\", \"c\", 4),\n", + " (\"a\", \"b\", 4),\n", + " (\"c\", \"b\", 4),\n", + " (\"c\", \"d\", 5),\n", + " (\"b\", \"a\", 5),\n", + " (\"c\", \"b\", 6),\n", + "]\n", + "t = pp.TemporalGraph.from_edge_list(tedges)\n", + "pp.plot(t, node_label=t.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "951a8ffa", + "metadata": {}, + "source": [ + "We can create a second-order graph from this temporal graph using the `lift_order_temporal` function. This second-order graph is typically referred to as an event graph. Each node in the graph is an event (edge) in the original temporal graph and two events are connected if they can follow each other in time respecting a maximum time difference `delta`. Here, we set `delta=2` which means that two events can be connected if the time difference between them is at most 2 time units." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7c0d6265", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6/6 [00:00<00:00, 3495.74it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "event_edge_index = pp.algorithms.temporal.lift_order_temporal(t, delta=2)\n", + "event_mapping = pp.IndexMap(t.temporal_edges)\n", + "event_data = Data(edge_index=event_edge_index, node_sequence=graph.data.edge_index.t())\n", + "event_graph = pp.Graph(data=event_data, mapping=event_mapping)\n", + "pp.plot(event_graph, node_label=event_graph.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "1a2075aa", + "metadata": {}, + "source": [ + "Starting with the event graph, we have a static higher-order representation of the temporal graph that we can use to create higher-order models. For each following lift-order transformations, we can use the same principles as described in the previous section on order-lifting and line graph transformations. \n", + "\n", + "#### Internals of the `lift_order_temporal` Function\n", + "\n", + "The simplest way to implement the `lift_order_temporal` function would be to first create the full higher-order edge index using the `lift_order_edge_index` function and then filter out the edges that do not respect the temporal ordering. The filter function could look as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "42866664", + "metadata": {}, + "outputs": [], + "source": [ + "def filter_time_respecting_edges(event_edge_index: torch.Tensor, timestamps: torch.Tensor, delta: int) -> torch.Tensor:\n", + " # Subtract timestamps of the two events to get the time difference\n", + " time_diff = timestamps[event_edge_index[1]] - timestamps[event_edge_index[0]]\n", + " # Create masks for filtering\n", + " # Remove non-time-respecting higher-order edges\n", + " non_negative_mask = time_diff > 0\n", + " # Remove edges that are too far apart in time based on delta\n", + " delta_mask = time_diff <= delta\n", + " # Combine masks to get the final time-respecting edges\n", + " time_respecting_mask = non_negative_mask & delta_mask\n", + " # Filter the event_edge_index using the time_respecting_mask\n", + " return event_edge_index[:, time_respecting_mask]" + ] + }, + { + "cell_type": "markdown", + "id": "bdb6d4c0", + "metadata": {}, + "source": [ + "We can combine the above filter function with the `lift_order_edge_index` function to create a lift-order function for temporal graphs as follows:\n", + "\n", + "
\n", + "

Warning

\n", + "

\n", + " If we use the standard lift_order_edge_index function, we need to ensure that the input edge index is sorted by source nodes because the edge_index of a TemporalGraph is sorted by time and not by source nodes.\n", + "

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "97f6363f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Sort by source node indices\n", + "sorted_edge_index, time = sort_edge_index(t.data.edge_index.as_tensor(), t.data.time)\n", + "# Lift the edge index to the second order\n", + "second_order_edge_index = pp.algorithms.lift_order.lift_order_edge_index(edge_index=sorted_edge_index, num_nodes=t.n)\n", + "# Filter the edges based on the lifted edge index\n", + "filtered_edge_index = filter_time_respecting_edges(second_order_edge_index, timestamps=time, delta=2)\n", + "# Create `pp.Graph` from the filtered edge index\n", + "filtered_event_mapping = pp.IndexMap([tuple([*t.mapping.to_ids(edge).tolist(), timestamp.item()]) for edge, timestamp in zip(sorted_edge_index.t(), time)])\n", + "filtered_event_data = Data(edge_index=filtered_edge_index, node_sequence=sorted_edge_index.t())\n", + "filtered_event_graph = pp.Graph(data=filtered_event_data, mapping=filtered_event_mapping)\n", + "pp.plot(filtered_event_graph, node_label=filtered_event_graph.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "fc3ff7c7", + "metadata": {}, + "source": [ + "
\n", + "

Note

\n", + "

\n", + " The indexing of the above implementation is different from the one currently implemented in PathpyG. So while the illustrations look identical, the actual indices of the higher-order nodes will differ.\n", + "

\n", + "
\n", + "\n", + "However, the above implementation has a large memory consumption for graphs with many edges because the full higher-order edge index is created before filtering. Therefore, we implement a more memory-efficient version in PathpyG that constructs the higher-order edges from the temporal graph sequentially for each timestamp. This implementation looks as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "169656bb", + "metadata": {}, + "outputs": [], + "source": [ + "def lift_order_temporal(g: pp.TemporalGraph, delta: int = 1):\n", + " indices = torch.arange(0, g.data.edge_index.size(1))\n", + "\n", + " unique_t = torch.unique(g.data.time)\n", + " second_order = []\n", + "\n", + " # lift order: find possible continuations for edges in each time stamp\n", + " for t in unique_t:\n", + "\n", + " # find indices of all source edges that occur at unique timestamp t\n", + " src_time_mask = g.data.time == t\n", + " src_edge_idx = indices[src_time_mask]\n", + "\n", + " # find indices of all edges that can possibly continue edges occurring at time t for the given delta\n", + " dst_time_mask = (g.data.time > t) & (g.data.time <= t + delta)\n", + " dst_edge_idx = indices[dst_time_mask]\n", + "\n", + " if dst_edge_idx.size(0) > 0 and src_edge_idx.size(0) > 0:\n", + " # compute second-order edges between src and dst idx\n", + " # create all possible combinations of src and dst edges\n", + " x = torch.cartesian_prod(src_edge_idx, dst_edge_idx)\n", + " # filter combinations for real higher-order edges\n", + " # for all edges where dst in src_edges (g.data.edge_index[1, x[:, 0]]) matches src in dst_edges (g.data.edge_index[0, x[:, 1]])\n", + " ho_edge_index = x[g.data.edge_index[1, x[:, 0]] == g.data.edge_index[0, x[:, 1]]]\n", + " second_order.append(ho_edge_index)\n", + "\n", + " ho_index = torch.cat(second_order, dim=0).t().contiguous()\n", + " return ho_index" + ] + }, + { + "cell_type": "markdown", + "id": "9bc347eb", + "metadata": {}, + "source": [ + "Note that above we do not use the same indexing trick that is used in the standard `lift_order_edge_index` function. Instead, we create all possible combinations of incoming and outgoing edges for all incoming edges at each timestamp. Therefore, we need a filtering step afterwards to ensure that only valid higher-order edges are created. However, we can skip the sorting step beforehand because we create all possible edge combinations using the cartesian product.\n", + "\n", + "It is also possible to combine both approaches, i.e., we create the higher-order edges for each timestamp separately using the indexing trick from the standard `lift_order_edge_index` function. While it saves the filtering step, it again requires sorting the edges beforehand which has been shown to be similar in performance to the above method. The code would look as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "b333cd54", + "metadata": {}, + "outputs": [], + "source": [ + "def lift_order_temporal_combined(g: pp.TemporalGraph, delta: int = 1):\n", + " indices = torch.arange(0, g.data.edge_index.size(1))\n", + "\n", + " unique_t = torch.unique(g.data.time)\n", + " second_order = []\n", + "\n", + " # lift order: find possible continuations for edges in each time stamp\n", + " for i in range(unique_t.size(0)):\n", + " t = unique_t[i]\n", + "\n", + " # find indices of all source edges that occur at unique timestamp t\n", + " src_time_mask = g.data.time == t\n", + " src_edge_idx = indices[src_time_mask]\n", + "\n", + " # find indices of all edges that can possibly continue edges occurring at time t for the given delta\n", + " dst_time_mask = (g.data.time > t) & (g.data.time <= t + delta)\n", + " dst_node_mask = torch.isin(g.data.edge_index[0], g.data.edge_index[1, src_edge_idx])\n", + " dst_edge_idx = indices[dst_time_mask & dst_node_mask]\n", + "\n", + " if dst_edge_idx.size(0) > 0 and src_edge_idx.size(0) > 0:\n", + " # get sorted dst edges for efficient processing\n", + " src_edges = g.data.edge_index[:, src_edge_idx]\n", + " dst_edges = g.data.edge_index[:, dst_edge_idx]\n", + " sorted_idx = torch.argsort(dst_edges[0])\n", + " dst_edge_idx = dst_edge_idx[sorted_idx]\n", + " dst_edges = dst_edges[:, sorted_idx]\n", + "\n", + " # Use indexing trick to create higher-order edges\n", + " outdegree = degree(dst_edges[0], dtype=torch.long, num_nodes=g.n)\n", + " outdegree_per_dst = outdegree[src_edges[1]]\n", + " num_new_edges = outdegree_per_dst.sum()\n", + " ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst)\n", + " ptrs = cumsum(outdegree, dim=0)[:-1]\n", + " ho_edge_dsts = torch.repeat_interleave(ptrs[src_edges[1]], outdegree_per_dst)\n", + " idx_correction = torch.arange(num_new_edges, dtype=torch.long)\n", + " idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs]\n", + " ho_edge_dsts += idx_correction\n", + " second_order.append(torch.stack([src_edge_idx[ho_edge_srcs], dst_edge_idx[ho_edge_dsts]], dim=0))\n", + "\n", + " ho_index = torch.cat(second_order, dim=1)\n", + " return ho_index" + ] + }, + { + "cell_type": "markdown", + "id": "da2f6f24", + "metadata": {}, + "source": [ + "In contrast to the `lift_order_edge_index` implementation, the temporal version splits the edges into source and destination edges based on timestamps. For each timestamp, we select the edges that occur at that timestamp as source edges and all edges that occur at later timestamps (within the delta time window) as destination edges. Then, instead of repeating the higher-order source nodes for all edges, we only repeat them for the destination edges. " + ] + }, + { + "cell_type": "markdown", + "id": "4497766e", + "metadata": {}, + "source": [ + "## Paths in PathpyG\n", + "\n", + "One other core functionality of PathpyG is the ability to work with paths. Paths are sequences of nodes that represent a walk through the graph. We show an example below:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "6f2a7edb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path_mapping = pp.IndexMap(list(\"abcde\"))\n", + "paths = pp.PathData(mapping=path_mapping)\n", + "paths.append_walk(list(\"ab\"))\n", + "paths.append_walk(list(\"abd\"))\n", + "paths.append_walk(list(\"abec\"))\n", + "paths.append_walk(list(\"dbecb\"))\n", + "pp.plot(\n", + " pp.Graph.from_edge_index(paths.data.edge_index), node_label=paths.mapping.to_ids(paths.data.node_sequence).tolist()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "528a61b3", + "metadata": {}, + "source": [ + "`pp.PathData` is the core class for working with paths in PathpyG. It allows us to gather a collection of paths that are all walks on the same underlying graph. All paths are stored using one `edge_index` internally. Thus, two nodes in a path that both correspond to the same node in the underlying graph will **not** share the same index in the path graph. Instead, each occurrence of a node in a path is represented by a separate node in the path graph. This allows us to represent paths that visit the same node multiple times without ambiguity. The information about the underlying graph is stored in the internal `PathData.data.node_sequence` tensor, similar to higher-order graphs. Let us look at the example above to illustrate this:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "fe8c6e97", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The paths represented using edge index look as follows:\n", + "\tInternal [0, 1]: Underlying graph edge ['a', 'b']\n", + "\tInternal [2, 3]: Underlying graph edge ['a', 'b']\n", + "\tInternal [3, 4]: Underlying graph edge ['b', 'd']\n", + "\tInternal [5, 6]: Underlying graph edge ['a', 'b']\n", + "\tInternal [6, 7]: Underlying graph edge ['b', 'e']\n", + "\tInternal [7, 8]: Underlying graph edge ['e', 'c']\n", + "\tInternal [9, 10]: Underlying graph edge ['d', 'b']\n", + "\tInternal [10, 11]: Underlying graph edge ['b', 'e']\n", + "\tInternal [11, 12]: Underlying graph edge ['e', 'c']\n", + "\tInternal [12, 13]: Underlying graph edge ['c', 'b']\n" + ] + } + ], + "source": [ + "print(\"The paths represented using edge index look as follows:\")\n", + "for edge in paths.data.edge_index.t():\n", + " print(\n", + " f\"\\tInternal {edge.tolist()}: Underlying graph edge {paths.mapping.to_ids(paths.data.node_sequence[edge].view(-1)).tolist()}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "c265f3f1", + "metadata": {}, + "source": [ + "`PathData` additionally stores some metadata about the paths so that you can easily access information about which nodes belong to which path. This includes\n", + "\n", + "- `dag_weight`: A tensor that stores the weight of each path (i.e., the number of times the path was observed).\n", + "- `dag_num_edges`: A tensor that stores the number of edges in each path.\n", + "- `dag_num_nodes`: A tensor that stores the number of nodes in each path.\n", + "\n", + "Using this information, you can, e.g., access the second path in the collection as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "ad15e43f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['a', 'b', 'd']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start = paths.data.dag_num_nodes[:1].sum().item()\n", + "end = start + paths.data.dag_num_nodes[1].item()\n", + "paths.mapping.to_ids(paths.data.node_sequence[start:end].view(-1)).tolist()" + ] + }, + { + "cell_type": "markdown", + "id": "8cca15ee", + "metadata": {}, + "source": [ + "Lastly, since we are using an `edge_index` internally, the `lift_order_edge_index` function works out-of-the-box for paths. A second-order representation of the paths can be created as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "993d5278", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "second_order_edge_index = pp.algorithms.lift_order.lift_order_edge_index(\n", + " edge_index=paths.data.edge_index, num_nodes=paths.data.num_nodes\n", + ")\n", + "second_order_paths = pp.Graph.from_edge_index(edge_index=second_order_edge_index)\n", + "pp.plot(\n", + " second_order_paths,\n", + " node_label=paths.mapping.to_ids(paths.data.node_sequence[paths.data.edge_index.t()].squeeze()).tolist(),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "de496c21", + "metadata": {}, + "source": [ + "## Multi-Order Models\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "8ac717b0", + "metadata": {}, + "source": [ + "With the concepts above, we can now create multi-order models using the `MultiOrderModel` class. This class allows us to create higher-order models of arbitrary order from a given base temporal graph or paths. Let's look at an example of creating a multi-order model from a temporal graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "20835496", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 6/6 [00:00<00:00, 2312.82it/s]\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m_t = pp.MultiOrderModel.from_temporal_graph(t, max_order=2)\n", + "pp.plot(m_t.layers[2], node_label=m_t.layers[2].nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "94f6db38", + "metadata": {}, + "source": [ + "We can see that the second-order graph created by the `MultiOrderModel` is different from the one created by the `lift_order_temporal` function directly. This is because the `MultiOrderModel` higher-order DeBruijn graph representation. This representation merges higher-order nodes that correspond to the same path in the original graph. This means that temporal edges that appear in the event graph as different nodes will be merged into one node in the DeBruijn graph if they correspond to the same path in the original graph. This results in a more compact representation of the higher-order graph.\n", + "\n", + "\n", + "The same is true for paths. We can create a multi-order model from a collection of paths as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "55aef58f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "m_p = pp.MultiOrderModel.from_path_data(paths, max_order=2)\n", + "pp.plot(m_p.layers[2], node_label=m_p.layers[2].nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "9f29b62d", + "metadata": {}, + "source": [ + "We can see that the higher-order node `a->b` which appeared thrice in the second-order graph created by the `lift_order_edge_index` function is now merged into one node in the DeBruijn graph representation." + ] + }, + { + "cell_type": "markdown", + "id": "90dcd56d", + "metadata": {}, + "source": [ + "### Internals of the `MultiOrderModel` Class\n", + "\n", + "Let us now take a closer look at how the `MultiOrderModel` class works under the hood. We already saw that the `MultiOrderModel` merges higher-order nodes from the line/event graph transformations. \n", + "\n", + "This is done in 3 distinct steps which we will go through using the paths example above:\n", + "1. **Order Lifting**: First, we create the higher-order edge index using the appropriate lift-order function (`lift_order_edge_index` or `lift_order_temporal`) depending on whether we are working with paths or temporal graphs in the first order and `lift_order_edge_index` for the second order and beyond regardless of the input type.\n", + "\n", + "
\n", + "

Note

\n", + "

\n", + " While we merge the higher-order nodes and aggregate the higher-order edges for each order, we need to use the original higher-order edge index to create the next order. This is because the transitivity of paths is only preserved in the original higher-order edge index.\n", + "

\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "4e7463d5", + "metadata": {}, + "outputs": [], + "source": [ + "# We create the third-order representation of the paths\n", + "third_order_edge_index = pp.algorithms.lift_order.lift_order_edge_index(\n", + " edge_index=second_order_paths.data.edge_index, num_nodes=second_order_paths.n\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2e5c2031", + "metadata": {}, + "source": [ + "2. **Update Node Sequences**: Next, we need to update the internal `node_sequence` tensor to reflect the new higher-order nodes. For this, we create a new `node_sequence` by concatenating the last node of the target node sequence to the source node sequence. This way, we create a new sequence that corresponds to the paths represented by the next order nodes." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9fbb7d0f", + "metadata": {}, + "outputs": [], + "source": [ + "second_order_node_sequence = paths.data.node_sequence[paths.data.edge_index.t()].squeeze()\n", + "third_order_node_sequence = torch.cat([\n", + " second_order_node_sequence[second_order_paths.data.edge_index[0]],\n", + " second_order_node_sequence[second_order_paths.data.edge_index[1]][:, -1:]\n", + "], dim=1)" + ] + }, + { + "cell_type": "markdown", + "id": "32bd3968", + "metadata": {}, + "source": [ + "3. **Merge Higher-Order Nodes**: Finally, we need to merge the higher-order nodes that correspond to the same path in the original graph. For this, we create a unique mapping from the new `node_sequence` to unique indices. We can then use this mapping to update the higher-order edge index to reflect the merged nodes and then aggregate duplicate edges." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "7e7c2bdd", + "metadata": {}, + "outputs": [], + "source": [ + "third_order_paths = pp.algorithms.lift_order.aggregate_edge_index(\n", + " edge_index=third_order_edge_index, node_sequence=third_order_node_sequence\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "63b367c9", + "metadata": {}, + "source": [ + "After performing these steps, we can again visualize the resulting higher-order graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "1aaec9ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "third_order_paths.mapping = pp.IndexMap([tuple(mapping.to_ids(v).tolist()) for v in third_order_paths.data.node_sequence])\n", + "pp.plot(third_order_paths, node_label=third_order_paths.nodes)" + ] + }, + { + "cell_type": "markdown", + "id": "1c7e4d0f", + "metadata": {}, + "source": [ + "These steps can be repeated for each order until we reach the desired maximum order for the `MultiOrderModel`.\n", + "\n", + "## Other Tensor-based Implementations\n", + "\n", + "The concepts from above can also be useful to implement other functionalities using tensor operations. \n", + "\n", + "### Longest Path Extraction\n", + "\n", + "One example is the extraction of all longest paths from a directed acyclic graph (DAG). This can be done by iterating through all nodes in the DAG in topological order at the same time. We provide an example implementation below:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "06f0820e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_all_paths_DAG(g: pp.Graph) -> dict:\n", + " \"\"\"Calculate all existing paths from any root node to any leaf node in a directed acyclic graph (DAG).\"\"\"\n", + " paths_of_length = {}\n", + " edge_index = g.data.edge_index.as_tensor()\n", + "\n", + " # calculate degrees\n", + " out_degree = degree(edge_index[0], num_nodes=g.n, dtype=torch.long)\n", + " in_degree = degree(edge_index[1], num_nodes=g.n, dtype=torch.long)\n", + "\n", + " # identify root nodes with in-degree zero\n", + " roots = torch.where(in_degree == 0)[0]\n", + " leafs = out_degree == 0\n", + "\n", + " # create path tensor that contains all paths that are not yet at a leaf node\n", + " paths = roots.unsqueeze(1)\n", + " # remove all paths that are already at a leaf node\n", + " paths_of_length[1] = paths[leafs[roots]].cpu().tolist()\n", + " # continue all paths that are not at a leaf node\n", + " paths = paths[~leafs[roots]]\n", + " # remember nodes that haven't been traversed yet\n", + " nodes = roots[~leafs[roots]]\n", + "\n", + " ptrs = cumsum(out_degree, dim=0)\n", + "\n", + " # count all longest paths in DAG\n", + " step = 1\n", + " while nodes.size(0) > 0 or step > g.n:\n", + " idx_repeat = torch.repeat_interleave(out_degree[nodes])\n", + " next_idx = torch.repeat_interleave(ptrs[nodes], out_degree[nodes])\n", + " idx_correction = (\n", + " torch.arange(next_idx.size(0), device=edge_index.device) - cumsum(out_degree[nodes], dim=0)[idx_repeat]\n", + " )\n", + " next_idx += idx_correction\n", + " next_nodes = edge_index[1][next_idx]\n", + " paths = torch.cat([paths[idx_repeat], next_nodes.unsqueeze(1)], dim=1)\n", + " paths_of_length[step] = paths[leafs[next_nodes]].tolist()\n", + " paths = paths[~leafs[next_nodes]]\n", + " nodes = next_nodes[~leafs[next_nodes]]\n", + " step += 1\n", + "\n", + " return paths_of_length" + ] + }, + { + "cell_type": "markdown", + "id": "7ec1f513", + "metadata": {}, + "source": [ + "The function above starts at all root nodes (nodes with no incoming edges) and iteratively traverses all possible next nodes while keeping track of all current paths. Whenever a path reaches a leaf node (a node with no outgoing edges), it is added to the list of longest paths and removed from the current paths. This continues until all paths have reached a leaf node.\n", + "\n", + "
\n", + "

Tip

\n", + "

\n", + " Getting the next nodes for all current paths is done using a similar indexing trick as in the lift_order_edge_index function. This allows us to efficiently get all next nodes for all current paths in one go using tensor operations.\n", + "

\n", + "
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pathpyg", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/tutorial/temporal_betweenness.ipynb b/docs/tutorial/temporal_betweenness.ipynb deleted file mode 100644 index 07e4f0859..000000000 --- a/docs/tutorial/temporal_betweenness.ipynb +++ /dev/null @@ -1,1805 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pathpyG as pp\n", - "from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index\n", - "import torch\n", - "\n", - "from collections import deque\n", - "\n", - "from scipy.sparse.csgraph import bellman_ford, dijkstra\n", - "import numpy as np\n", - "from time import time\n", - "from collections import defaultdict\n", - "\n", - "\n", - "from tqdm import tqdm\n", - "\n", - "from pathpyG.utils import to_numpy" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 327 nodes, 8950 unique edges and 220378 events in [1385982080.0, 1386163840.0]\n", - "\n", - "Graph attributes\n", - "\tt\t\t -> torch.Size([220378])\n", - "\tsrc\t\t -> torch.Size([220378])\n", - "\tdst\t\t -> torch.Size([220378])\n", - "\n", - "579\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'t', 'src', 'dst'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "t_sp = pp.TemporalGraph.from_csv('../data/sociopatterns_highschool_2013_train.tedges')\n", - "print(t_sp)\n", - "print(torch.unique(t_sp.data.t).size(0))" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MultiOrderModel with max. order 2\n" - ] - } - ], - "source": [ - "m = pp.MultiOrderModel.from_temporal_graph(t_sp, delta=3600, max_order=2)\n", - "print(m)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 68 nodes, 506 unique edges and 1045 events in [899.0, 1796.0]\n", - "\n", - "Graph attributes\n", - "\tt\t\t -> torch.Size([1045])\n", - "\tsrc\t\t -> torch.Size([1045])\n", - "\tdst\t\t -> torch.Size([1045])\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'t', 'src', 'dst'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "t_ants = pp.TemporalGraph.from_csv('../data/ants_2_2_val.tedges')\n", - "print(t_ants)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "MultiOrderModel with max. order 10\n", - "tensor([ 2., 1., 1., 3., 3., 1., 2., 1., 1., 1., 1., 2., 1., 1.,\n", - " 1., 1., 1., 1., 4., 1., 1., 1., 1., 5., 2., 6., 6., 1.,\n", - " 1., 3., 3., 1., 1., 3., 1., 2., 9., 3., 1., 1., 4., 1.,\n", - " 1., 1., 4., 2., 3., 2., 1., 4., 3., 2., 4., 1., 1., 3.,\n", - " 1., 2., 3., 1., 1., 4., 2., 9., 5., 1., 2., 4., 1., 2.,\n", - " 1., 1., 1., 3., 4., 1., 2., 3., 4., 1., 1., 3., 4., 4.,\n", - " 3., 4., 3., 1., 2., 3., 1., 2., 4., 2., 1., 1., 1., 1.,\n", - " 1., 1., 1., 2., 1., 3., 1., 4., 1., 1., 1., 1., 5., 2.,\n", - " 1., 1., 2., 1., 1., 2., 1., 4., 3., 4., 1., 2., 1., 1.,\n", - " 1., 2., 3., 1., 4., 1., 3., 3., 1., 1., 3., 1., 2., 1.,\n", - " 1., 4., 1., 1., 1., 1., 1., 1., 1., 3., 1., 3., 1., 1.,\n", - " 1., 1., 3., 2., 2., 2., 2., 1., 1., 2., 4., 3., 1., 2.,\n", - " 2., 3., 5., 5., 2., 1., 2., 1., 1., 5., 2., 3., 2., 1.,\n", - " 1., 2., 5., 1., 1., 2., 2., 6., 1., 2., 4., 1., 2., 5.,\n", - " 4., 5., 1., 1., 2., 1., 3., 6., 2., 1., 1., 1., 2., 2.,\n", - " 12., 7., 4., 1., 1., 1., 4., 2., 1., 1., 1., 3., 2., 1.,\n", - " 1., 1., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", - " 1., 6., 1., 4., 2., 2., 1., 2., 2., 3., 3., 2., 4., 4.,\n", - " 1., 7., 5., 1., 2., 2., 1., 2., 4., 4., 5., 6., 4., 1.,\n", - " 2., 2., 1., 4., 3., 1., 1., 1., 5., 7., 1., 3., 5., 1.,\n", - " 1., 4., 1., 1., 2., 1., 6., 4., 2., 2., 1., 4., 1., 1.,\n", - " 5., 1., 2., 3., 4., 1., 1., 1., 2., 1., 2., 2., 3., 1.,\n", - " 2., 7., 1., 7., 1., 2., 1., 2., 1., 3., 1., 3., 3., 1.,\n", - " 3., 3., 1., 4., 3., 2., 2., 1., 1., 1., 3., 1., 1., 3.,\n", - " 1., 1., 3., 1., 6., 1., 3., 3., 1., 1., 1., 3., 1., 1.,\n", - " 1., 1., 1., 1., 1., 1., 2., 3., 5., 2., 4., 1., 3., 4.,\n", - " 3., 1., 2., 2., 2., 1., 2., 1., 3., 2., 2., 1., 5., 1.,\n", - " 3., 1., 2., 3., 2., 2., 1., 1., 1., 1., 2., 1., 1., 2.,\n", - " 1., 2., 3., 2., 2., 1., 1., 1., 1., 1., 1., 2., 3., 4.,\n", - " 1., 2., 1., 5., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1.,\n", - " 2., 1., 2., 2., 2., 3., 1., 1., 1., 1., 1., 1., 2., 2.,\n", - " 3., 2., 1., 2., 1., 1., 3., 1., 1., 1., 3., 1., 1., 1.,\n", - " 2., 1., 1., 3., 5., 1., 1., 1., 4., 1., 1., 1., 4., 5.,\n", - " 1., 2., 1., 1., 1., 1., 1., 2., 3., 1., 1., 1., 1., 1.,\n", - " 3., 2., 1., 1., 3., 2., 1., 2., 1., 1., 1., 1., 1., 1.,\n", - " 2., 1., 1., 1., 2., 1., 1., 4., 2., 1., 3., 3., 3., 3.,\n", - " 1., 1.])\n" - ] - } - ], - "source": [ - "m = pp.MultiOrderModel.from_temporal_graph(t_ants, delta=30, max_order=10)\n", - "print(m)\n", - "print(m.layers[1].data.edge_weight)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 579/579 [00:53<00:00, 10.79it/s]\n", - " 31%|███ | 102/327 [26:29<58:26, 15.59s/it] \n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m bw \u001b[38;5;241m=\u001b[39m \u001b[43mpp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcentrality\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemporal_betweenness_centrality\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt_sp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3600\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(bw)\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:255\u001b[0m, in \u001b[0;36mtemporal_betweenness_centrality\u001b[0;34m(g, delta)\u001b[0m\n\u001b[1;32m 253\u001b[0m v \u001b[38;5;241m=\u001b[39m Q\u001b[38;5;241m.\u001b[39mpopleft()\n\u001b[1;32m 254\u001b[0m \u001b[38;5;66;03m# for all successor events within delta\u001b[39;00m\n\u001b[0;32m--> 255\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m w \u001b[38;5;129;01min\u001b[39;00m event_graph\u001b[38;5;241m.\u001b[39msuccessors(v):\n\u001b[1;32m 256\u001b[0m \n\u001b[1;32m 257\u001b[0m \u001b[38;5;66;03m# we dicover w for the first time\u001b[39;00m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dist[w] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 259\u001b[0m dist[w] \u001b[38;5;241m=\u001b[39m dist[v] \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/core/Graph.py:295\u001b[0m, in \u001b[0;36mGraph.successors\u001b[0;34m(self, node)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msuccessors\u001b[39m(\u001b[38;5;28mself\u001b[39m, node: Union[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mtuple\u001b[39m) \\\n\u001b[1;32m 284\u001b[0m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Generator[Union[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mtuple\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;28;01mNone\u001b[39;00m]:\n\u001b[1;32m 285\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Return all successors of a given node.\u001b[39;00m\n\u001b[1;32m 286\u001b[0m \n\u001b[1;32m 287\u001b[0m \u001b[38;5;124;03m This method returns a generator object that yields all successors of a\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;124;03m node: Index or string ID of node for which successors shall be returned.\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m \n\u001b[0;32m--> 295\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m j \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_successors\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmapping\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_idx\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnode\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmapping\u001b[38;5;241m.\u001b[39mto_id(j\u001b[38;5;241m.\u001b[39mitem())\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "bw = pp.algorithms.centrality.temporal_betweenness_centrality(t_sp, delta=3600)\n", - "print(bw)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 594/594 [00:00<00:00, 4581.51it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "defaultdict(. at 0x7f9453f36cb0>, {0: 9.083333333333336, 7: 15.0, 10: 114.12687232787852, 1: 5.0, 6: 9.5, 12: 35.060389610389606, 22: 58.61201533244884, 49: 2.500000000000001, 2: 346.2515994755158, 27: 249.9532851737187, 4: 140.7880952380952, 42: 28.220839755359876, 28: 146.0366185070518, 65: 15.791666666666675, 29: 190.04444444444454, 24: 26.736796536796536, 5: 126.14722222222221, 3: 40.32903828197944, 35: 3.9999999999999996, 9: 7.5, 17: 78.59657287157289, 20: 68.21558441558443, 48: 27.916666666666664, 11: 99.25000000000001, 15: 53.07553688141922, 8: 5.8571428571428585, 26: 118.34098235785545, 37: -0.33333333333333304, 34: 58.120919946926136, 23: 49.347619047619034, 16: -0.1666666666666714, 32: 1.3333333333333286, 46: 2.40126050420168, 19: -1.8611111111111098, 36: 1.0000000000000004, 39: 2.0, 14: 25.33333333333333, 31: 1.5, 21: 0.5555555555555536, 33: 2.1666666666666705, 13: 0.0, 18: -5.19444444444445, 25: -1.9984014443252818e-15, 41: 14.888888888888886, 57: 1.0, 52: 13.833333333333332, 51: 2.916666666666661, 44: 2.999999999999999, 47: 1.5, 38: 0.0, 40: 0.0, 62: -7.444444444444445, 56: 0.0, 61: 0.0, 67: 2.220446049250313e-16})\n" - ] - } - ], - "source": [ - "bw = pp.algorithms.centrality.temporal_betweenness_centrality(t_ants, delta=30)\n", - "print(bw)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 17/17 [00:00<00:00, 4453.94it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created temporal event DAG with 38 nodes and 47 edges\n", - "{'a': 12.0, 'b': 16.0, 'c': 16.0, 'd': 14.666666666666666, 'e': 14.666666666666666, 'f': 24.0, 'g': 14.666666666666666, 'h': 28.0, 'i': 24.0}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "tedges = [('a', 'b', 1), ('b', 'c', 5), ('c', 'd', 9), ('c', 'e', 9),\n", - " ('c', 'f', 11), ('f', 'a', 13), ('a', 'g', 18), ('b', 'f', 21),\n", - " ('a', 'g', 26), ('c', 'f', 27), ('h', 'f', 27), ('g', 'h', 28),\n", - " ('a', 'c', 30), ('a', 'b', 31), ('c', 'h', 32), ('f', 'h', 33),\n", - " ('b', 'i', 42), ('i', 'b', 42), ('c', 'i', 47), ('h', 'i', 50)]\n", - "t_long = pp.TemporalGraph.from_edge_list(tedges)\n", - "c = pp.algorithms.centrality.temporal_closeness_centrality(t_long, 5)\n", - "print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1157 [00:00 1\u001b[0m c \u001b[38;5;241m=\u001b[39m \u001b[43mpp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malgorithms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcentrality\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemporal_closeness_centrality\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt_sp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m3600\u001b[39;49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:186\u001b[0m, in \u001b[0;36mtemporal_closeness_centrality\u001b[0;34m(g, delta)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Calculates the closeness of nodes based on observed shortest paths\u001b[39;00m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;124;03mbetween all nodes. Following the definition by M. A. Beauchamp 1965\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m(https://doi.org/10.1002/bs.3830100205).\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;124;03mdict\u001b[39;00m\n\u001b[1;32m 184\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 185\u001b[0m centralities \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m()\n\u001b[0;32m--> 186\u001b[0m dist, _ \u001b[38;5;241m=\u001b[39m \u001b[43mtemporal_shortest_paths\u001b[49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdelta\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m g\u001b[38;5;241m.\u001b[39mnodes:\n\u001b[1;32m 188\u001b[0m centralities[x] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m((g\u001b[38;5;241m.\u001b[39mN \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m dist[_np\u001b[38;5;241m.\u001b[39marange(g\u001b[38;5;241m.\u001b[39mN) \u001b[38;5;241m!=\u001b[39m g\u001b[38;5;241m.\u001b[39mmapping\u001b[38;5;241m.\u001b[39mto_idx(x), g\u001b[38;5;241m.\u001b[39mmapping\u001b[38;5;241m.\u001b[39mto_idx(x)])\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/temporal.py:59\u001b[0m, in \u001b[0;36mtemporal_shortest_paths\u001b[0;34m(g, delta)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtemporal_shortest_paths\u001b[39m(g: TemporalGraph, delta: \u001b[38;5;28mint\u001b[39m):\n\u001b[1;32m 58\u001b[0m \u001b[38;5;66;03m# generate temporal event DAG\u001b[39;00m\n\u001b[0;32m---> 59\u001b[0m edge_index \u001b[38;5;241m=\u001b[39m \u001b[43mlift_order_temporal\u001b[49m\u001b[43m(\u001b[49m\u001b[43mg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdelta\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;66;03m# Add indices of g.n first-order nodes as source nodes of paths in augmented TEG\u001b[39;00m\n\u001b[1;32m 62\u001b[0m src_edges_src \u001b[38;5;241m=\u001b[39m g\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39medge_index[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m+\u001b[39m g\u001b[38;5;241m.\u001b[39mM\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/temporal.py:51\u001b[0m, in \u001b[0;36mlift_order_temporal\u001b[0;34m(g, delta)\u001b[0m\n\u001b[1;32m 49\u001b[0m src_edges \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mindex_select(edge_index, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, index\u001b[38;5;241m=\u001b[39mx[\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 50\u001b[0m dst_edges \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mindex_select(edge_index, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, index\u001b[38;5;241m=\u001b[39mx[\u001b[38;5;241m1\u001b[39m])\n\u001b[0;32m---> 51\u001b[0m ho_edge_index \u001b[38;5;241m=\u001b[39m x[:,\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwhere\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc_edges\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mdst_edges\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]]\n\u001b[1;32m 52\u001b[0m second_order\u001b[38;5;241m.\u001b[39mappend(ho_edge_index)\n\u001b[1;32m 54\u001b[0m ho_index \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mcat(second_order, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m) \n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "c = pp.algorithms.centrality.temporal_closeness_centrality(t_sp, 3600)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'454': 41671.338095238054, '640': 42220.10476190471, '1': 31384.485714285773, '939': 43385.1666666666, '185': 46485.271428571396, '258': 42662.533333333326, '55': 41131.62698412695, '170': 43822.54999999994, '9': 55537.204761904715, '453': 42306.650000000016, '45': 54390.77142857138, '14': 42915.57142857141, '190': 51695.83809523803, '400': 37502.9365079365, '637': 40505.24126984126, '255': 43516.342857142794, '275': 53779.13333333323, '176': 52638.13333333328, '533': 36436.70952380954, '116': 46335.46666666662, '151': 46517.87142857134, '866': 51019.77619047604, '280': 40957.63095238092, '484': 38709.39523809525, '243': 42567.83809523808, '687': 43355.67142857139, '54': 44738.06666666662, '364': 43477.92142857135, '374': 41239.258730158705, '295': 37942.38968253968, '441': 44693.04761904753, '101': 46261.46984126977, '425': 40808.355411255354, '47': 34439.18333333336, '241': 40734.0880952381, '179': 51910.84285714278, '202': 47549.81666666654, '63': 48038.816666666564, '564': 38769.93809523808, '577': 34370.49047619052, '265': 42888.016666666605, '494': 40898.7698412698, '443': 42818.935714285675, '209': 33025.35238095244, '843': 39757.76984126982, '222': 31847.612698412748, '205': 43918.02142857136, '894': 39705.247619047565, '1359': 54767.9999999999, '1383': 37680.55476190476, '376': 53616.13333333324, '638': 41859.176190476144, '1238': 42806.904761904734, '1260': 38609.26666666665, '487': 46472.59365079362, '984': 45297.69999999997, '226': 42690.47619047611, '353': 46454.99999999993, '1342': 44222.67619047608, '1518': 59688.27142857133, '122': 48889.13333333324, '1067': 42078.83809523807, '1324': 41852.96666666664, '70': 44801.455555555476, '132': 47628.98809523799, '779': 42758.522222222164, '279': 42601.21428571425, '908': 36329.59523809524, '510': 29415.290476190487, '545': 44361.355555555485, '634': 49410.73333333322, '1332': 57435.76666666657, '1401': 48332.21666666656, '582': 44306.50476190468, '605': 55865.5333333332, '252': 45385.02142857135, '3': 50445.78333333325, '884': 48080.34285714278, '339': 41450.89999999998, '691': 48490.171428571346, '869': 47797.033333333246, '72': 48899.99999999993, '954': 46840.76666666663, '160': 47314.2428571428, '117': 46867.93333333327, '346': 41061.76984126978, '111': 45678.421428571404, '124': 43084.39285714283, '276': 42882.58333333332, '621': 39236.16984126983, '39': 41416.11255411249, '871': 44988.38809523805, '694': 49321.4714285713, '778': 49137.51428571418, '513': 39579.116666666654, '236': 35266.21428571434, '883': 40068.50476190471, '1594': 46784.104761904644, '1828': 41670.949999999946, '1214': 48579.43333333323, '196': 47528.471428571334, '201': 44797.83333333326, '245': 53325.838095237974, '390': 44972.08809523807, '938': 44610.77142857142, '923': 37550.54285714288, '106': 58207.29999999987, '272': 55849.23333333323, '753': 43170.54999999998, '486': 35035.81507936507, '531': 41168.10793650794, '254': 48965.199999999975, '382': 44808.699999999975, '119': 45078.038095238015, '240': 46369.36031746025, '447': 45610.11666666662, '649': 42277.54285714276, '1204': 46224.08333333323, '466': 32600.38809523813, '841': 37388.707142857165, '199': 43105.73809523804, '674': 54227.77142857136, '857': 38546.395238095225, '945': 40750.388095238064, '1218': 47321.61666666656, '1512': 51657.80476190465, '653': 49398.44365079353, '502': 44192.40476190468, '587': 29551.511904761897, '626': 41647.27619047614, '420': 45023.704761904715, '504': 40343.27619047614, '311': 46414.63809523806, '267': 44915.038095238066, '177': 48121.22222222215, '480': 40095.671428571455, '771': 23901.880158730197, '312': 46976.59999999993, '612': 43187.238095238026, '450': 36801.13095238098, '89': 47408.54999999997, '322': 49383.56666666663, '520': 33164.678571428616, '15': 45297.70000000002, '211': 47992.633333333324, '366': 37022.86269841269, '227': 45639.99999999996, '440': 48286.033333333275, '41': 47324.33333333326, '388': 47221.100000000006, '219': 42089.70476190474, '658': 48671.79999999991, '220': 42779.73809523808, '576': 39435.90952380952, '642': 46664.57142857137, '391': 37692.58571428572, '777': 35597.647619047646, '20': 39938.88095238094, '958': 50489.638095238006, '103': 29371.176984127007, '61': 47813.333333333234, '274': 36857.53412698415, '147': 51953.53333333325, '277': 32935.18492063496, '702': 30786.81904761907, '242': 44846.73333333334, '38': 52752.233333333206, '438': 37185.73333333332, '387': 46914.11666666656, '1295': 49073.866666666545, '1412': 52219.76666666659, '492': 47707.3833333333, '1345': 46802.733333333235, '1212': 51198.29999999987, '28': 45031.466666666616, '327': 45873.63333333326, '1216': 50562.59999999988, '372': 52590.526984126904, '720': 45705.19999999992, '1784': 41720.23809523804, '27': 46151.50952380945, '171': 34138.926984126985, '1336': 43007.54999999995, '1423': 52198.03333333327, '1366': 37297.11666666668, '407': 48333.3809523809, '1320': 46319.16666666659, '1805': 47327.438095237994, '1237': 41225.8047619047, '974': 29733.65793650794, '464': 45232.49999999995, '477': 42176.24999999997, '763': 36072.546825396836, '1894': 45776.221428571334, '1201': 43844.28333333326, '1228': 50051.86666666656, '786': 37304.61984126989, '886': 43075.46666666661, '797': 47585.13333333329, '959': 34756.25714285715, '1485': 41606.13809523808, '210': 46916.83333333329, '4': 42904.70476190475, '790': 30436.757142857183, '285': 53224.9333333333, '544': 43005.609523809464, '333': 50127.93333333329, '622': 35464.14285714289, '429': 45130.042857142784, '46': 39267.476190476154, '343': 37518.719047619066, '867': 36621.44285714288, '615': 44610.7714285714, '977': 42285.30476190473, '90': 46425.50476190473, '269': 44197.83809523808, '603': 46936.23809523808, '335': 43720.48095238093, '765': 43121.64999999997, '257': 41894.10476190469, '268': 39532.9333333333, '214': 43410.00476190473, '491': 43702.240476190455, '181': 39791.01666666664, '650': 40985.18571428571, '85': 46765.993650793615, '325': 43602.49999999998, '941': 39712.62142857144, '356': 40737.58095238093, '744': 37414.70952380953, '1543': 46257.07142857135, '145': 43442.992857142846, '173': 41173.02380952376, '909': 33217.84761904766, '79': 48995.083333333285, '854': 38707.45476190475, '527': 43498.49047619046, '475': 39621.94826839824, '471': 43425.269841269816, '681': 27453.98650793652, '465': 36373.57936507936, '446': 35151.33809523811, '58': 49894.29999999995, '32': 42986.204761904744, '991': 44181.53809523805, '725': 39806.15238095237, '859': 40474.45238095238, '798': 41875.088095238054, '256': 49459.63333333331, '306': 39031.514285714235, '131': 41163.70952380949, '677': 38990.11746031743, '960': 34713.56666666667, '769': 37148.47619047621, '248': 38814.18095238095, '125': 46992.89999999995, '917': 38007.71904761903, '120': 49546.566666666615, '115': 45982.68809523806, '1519': 34333.75079365083, '970': 40702.264285714264, '213': 46737.6744588744, '424': 48813.06666666662, '428': 42515.5746031746, '488': 47845.93333333331, '498': 44651.5214285714, '809': 40044.96031746028, '92': 42035.37142857144, '845': 43173.26666666668, '655': 34575.92222222225, '156': 47349.171428571295, '413': 44792.400000000016, '21': 38381.0666666667, '1232': 35705.92619047622, '290': 40728.654761904734, '71': 41315.0666666667, '65': 44592.919047619034, '791': 34942.5428571429, '874': 43733.67619047614, '448': 45072.60476190477, '496': 49872.56666666665, '921': 36919.888095238115, '497': 35312.785714285754, '627': 36676.94047619046, '194': 44672.866666666676, '927': 43220.614285714255, '232': 52527.138095238035, '172': 45822.40476190471, '165': 26068.48650793651, '87': 42138.604761904746, '253': 39907.833333333365, '706': 47116.05555555552, '134': 48729.23809523806, '624': 38011.988095238106, '548': 33913.83174603179, '893': 44520.73333333333, '920': 41900.31428571425, '836': 44773.91255411253, '80': 47968.5714285714, '743': 40576.13333333335, '826': 46025.76666666668, '184': 49569.076190476095, '601': 51230.89999999998, '1870': 34552.11904761907, '200': 45851.899999999994, '784': 39126.209523809506, '751': 34508.53477633482, '434': 23954.014285714296, '979': 35987.0365079365, '647': 36782.50238095241, '246': 36560.12380952383, '489': 43239.24285714284, '998': 42595.00476190477, '435': 38087.278571428564, '468': 21858.55058830061, '48': 42347.78809523811, '1339': 44040.27142857138, '159': 40869.921428571426, '149': 25296.729725829755, '1819': 39140.180952380964, '525': 45662.50952380946, '882': 28492.9174603175, '34': 43807.543650793574, '239': 31866.888095238148, '62': 20550.960028860038, '452': 43745.06031746028, '445': 28096.385447885485}\n" - ] - } - ], - "source": [ - "print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def temporal_event_graph(g: pp.TemporalGraph, delta=1): \n", - "\n", - " print(g.data.edge_index)\n", - "\n", - " # generate temporal event DAG\n", - " edge_index = pp.algorithms.lift_order_temporal(g, delta)\n", - " print(edge_index)\n", - "\n", - " # Add indices of first-order nodes as src and dst of paths in augmented\n", - " # temporal event DAG\n", - " src_edges_src = g.data.edge_index[0] + g.data.edge_index.size(1)\n", - " print(src_edges_src)\n", - " src_edges_dst = torch.arange(0, g.data.edge_index.size(1))\n", - "\n", - " dst_edges_src = torch.arange(0, g.data.edge_index.size(1))\n", - " dst_edges_dst = g.data.edge_index[1] + g.data.edge_index.size(1) + g.n\n", - " print(dst_edges_dst)\n", - "\n", - " # add edges from source to edges and from edges to destinations\n", - " src_edges = torch.stack([src_edges_src, src_edges_dst])\n", - " dst_edges = torch.stack([dst_edges_src, dst_edges_dst])\n", - " edge_index = torch.cat([edge_index, src_edges, dst_edges], dim=1)\n", - "\n", - " # create sparse scipy matrix\n", - " event_graph = pp.Graph.from_edge_index(edge_index) \n", - " return event_graph" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 17/17 [00:00<00:00, 3068.65it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[ 0, 1, 1, 4, 5, 8, 12],\n", - " [ 1, 2, 3, 5, 6, 11, 14]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "eg = pp.algorithms.lift_order_temporal(t_long, delta=5)\n", - "print(eg)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 2, 2, 2, 5, 0, 1, 0, 7, 2, 6, 0, 0, 2, 5, 1, 8, 2, 7],\n", - " [1, 2, 3, 4, 5, 0, 6, 5, 6, 5, 5, 7, 2, 1, 7, 7, 8, 1, 8, 8]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 17/17 [00:00<00:00, 3405.44it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[ 0, 1, 1, 4, 5, 8, 12],\n", - " [ 1, 2, 3, 5, 6, 11, 14]])\n", - "tensor([20, 21, 22, 22, 22, 25, 20, 21, 20, 27, 22, 26, 20, 20, 22, 25, 21, 28,\n", - " 22, 27])\n", - "tensor([30, 31, 32, 33, 34, 29, 35, 34, 35, 34, 34, 36, 31, 30, 36, 36, 37, 30,\n", - " 37, 37])\n", - "Directed graph with 38 nodes and 47 edges\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(temporal_event_graph(t_long, delta=5))" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "def betweenness_brandes(g: pp.Graph, sources = None):\n", - " bw = defaultdict(lambda: 0.0)\n", - "\n", - " if sources == None:\n", - " sources = [v for v in g.nodes]\n", - "\n", - " for s in tqdm(sources):\n", - " S = list()\n", - " P = defaultdict(list)\n", - "\n", - " sigma = defaultdict(lambda: 0) \n", - " sigma[s] = 1\n", - "\n", - " d = defaultdict(lambda: -1) \n", - " d[s] = 0\n", - "\n", - " Q = [s]\n", - " while Q:\n", - " v = Q.pop(0)\n", - " S.append(v)\n", - " for w in g.successors(v):\n", - " if d[w] < 0:\n", - " Q.append(w)\n", - " d[w] = d[v] + 1\n", - " if d[w] == d[v] + 1:\n", - " # we found shortest path from s via v to w\n", - " sigma[w] = sigma[w] + sigma[v]\n", - " P[w].append(v)\n", - " delta = defaultdict(lambda: 0.0)\n", - " while S:\n", - " w = S.pop()\n", - " for v in P[w]:\n", - " delta[v] = delta[v] + sigma[v]/sigma[w] * (1 + delta[w])\n", - " if v != w:\n", - " bw[w] = bw[w] + delta[w]\n", - " return bw" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "5it [00:00, 3583.04it/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {'e': 0.0, 'd': 0.0, 'c': 4.0})" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g = pp.Graph.from_edge_list([('a', 'c'), ('b', 'c'), ('c', 'd'), ('c', 'e')])\n", - "betweenness_brandes(g, g.nodes)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "def temporal_event_graph(g: pp.TemporalGraph, delta = 1):\n", - " # generate temporal event DAG\n", - " edge_index = pp.algorithms.lift_order_temporal(g, delta) \n", - "\n", - " # Add indices of first-order nodes as src and dst of paths in augmented\n", - " # temporal event DAG\n", - " print(g.data.edge_index)\n", - " edges = [f'{v}-{w}-{t}' for v, w, t in g.temporal_edges]\n", - " print(edges)\n", - " src_edges_src = g.data.edge_index[0] + g.m\n", - " src_edges_dst = torch.arange(0, g.data.edge_index.size(1))\n", - "\n", - " src = [f'{v}-src' for v in g.nodes]\n", - " tgt = [f'{v}-tgt' for v in g.nodes]\n", - "\n", - " dst_edges_src = torch.arange(0, g.data.edge_index.size(1))\n", - " dst_edges_dst = g.data.edge_index[1] + g.m + g.n\n", - "\n", - " # add edges from source to edges and from edges to destinations\n", - " src_edges = torch.stack([src_edges_src, src_edges_dst])\n", - " dst_edges = torch.stack([dst_edges_src, dst_edges_dst])\n", - " edge_index = torch.cat([edge_index, src_edges, dst_edges], dim=1)\n", - "\n", - " # create sparse scipy matrix\n", - " mapping = pp.IndexMap(edges + src + tgt)\n", - " event_graph = pp.Graph.from_edge_index(edge_index, mapping) \n", - " return event_graph" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "# def fo_node(v, g, src_indices) -> int:\n", - "# # if v is one of the source nodes, return corresponding first-order node\n", - "\n", - " \n", - "# def fo_src(v, g, src_indices):\n", - "# if v in src_indices:\n", - "# return v - g.m\n", - "# else:\n", - "# return g.data.edge_index[0,v].item()\n", - "\n", - "def temporal_betweenness_brandes(g: pp.TemporalGraph, delta=1):\n", - "\n", - " print(g.data.edge_index)\n", - "\n", - " # generate temporal event DAG\n", - " edge_index = pp.algorithms.lift_order_temporal(g, delta)\n", - "\n", - " # Add indices of first-order nodes as src and dst of paths in augmented\n", - " # temporal event DAG\n", - " # print(g.data.edge_index)\n", - " #edges = [f'{v}-{w}-{t}' for v, w, t in g.temporal_edges]\n", - "# print(edges)\n", - " src_edges_src = g.data.edge_index[0] + g.m\n", - " src_edges_dst = torch.arange(0, g.data.edge_index.size(1))\n", - "\n", - " #src = [f'{v}-src' for v in g.nodes]\n", - " #tgt = [f'{v}-tgt' for v in g.nodes]\n", - "\n", - " # dst_edges_src = torch.arange(0, g.data.edge_index.size(1))\n", - " # dst_edges_dst = g.data.edge_index[1] + g.m + g.n\n", - "\n", - " # add edges from source to edges and from edges to destinations\n", - " src_edges = torch.stack([src_edges_src, src_edges_dst])\n", - " # dst_edges = torch.stack([dst_edges_src, dst_edges_dst])\n", - " edge_index = torch.cat([edge_index, src_edges], dim=1)\n", - "\n", - " # create sparse scipy matrix\n", - " #mapping = pp.IndexMap(edges + src + tgt)\n", - " event_graph = pp.Graph.from_edge_index(edge_index, num_nodes=g.m+g.n)\n", - " print(event_graph)\n", - " #pp.plot(event_graph, node_label=[i for i in event_graph.nodes])\n", - " #print(edge_index)\n", - "\n", - " # # sources = first-order source nodes in temporal event graph\n", - " src_indices = set(torch.unique(src_edges_src).tolist())\n", - " #print(src_edges_src-g.m)\n", - " # tgt_indices = set(torch.unique(dst_edges_dst).tolist())\n", - " #print(src_indices)\n", - " # print(tgt_indices)\n", - "\n", - " e_i = to_numpy(g.data.edge_index)\n", - "\n", - " fo_nodes = dict()\n", - " for v in event_graph.nodes:\n", - " if v in src_indices:\n", - " fo_nodes[v] = v - g.m\n", - " else: # return first-order target node otherwise\n", - " fo_nodes[v] = e_i[1,v]\n", - "\n", - " # start from indegree zero nodes\n", - " #roots = torch.where((degree(g.data.edge_index[1])==0))[0]\n", - " #dist, _ = pp.algorithms.temporal_shortest_paths(g, delta=delta)\n", - " #print(dist)\n", - " bw = defaultdict(lambda: 0.0)\n", - "\n", - " # for all first-order nodes\n", - " for s in tqdm(src_indices):\n", - " t_start = time()\n", - " print('source', g.mapping.to_id(fo_nodes[s]))\n", - "\n", - " # for any given s, d[v] is the shortest path distance from s to v\n", - " # Note that here we calculate topological distances from sources to events (i.e. time-stamped edges)\n", - " delta_ = defaultdict(lambda: 0.0)\n", - "\n", - " # for any given s, sigma[v] counts shortest paths from s to v\n", - " sigma = defaultdict(lambda: 0.0) \n", - " sigma[s] = 1\n", - "\n", - " sigma_fo = defaultdict(lambda: 0.0) \n", - " sigma_fo[fo_nodes[s]] = 1\n", - "\n", - " dist = defaultdict(lambda: -1)\n", - " dist[s] = 0\n", - "\n", - " dist_fo = defaultdict(lambda: -1)\n", - " dist_fo[fo_nodes[s]] = 0\n", - " \n", - " # for any given s, P[v] is the set of predecessors of v on shortest paths from s\n", - " P = defaultdict(list)\n", - "\n", - " # Q is a queue, so we append at the end and pop from the start\n", - " Q = deque() \n", - " Q.append(s)\n", - "\n", - " # S is a stack, so we append at the end and pop from the end\n", - " S = list()\n", - " \n", - " # dijkstra with path counting\n", - " while Q:\n", - " v = Q.popleft()\n", - " #print('popped ', v)\n", - " # for all successor events within delta\n", - " for w in event_graph.successors(v):\n", - "\n", - " # we dicover w for the first time\n", - " if dist[w] == -1:\n", - " dist[w] = dist[v] + 1\n", - " if dist_fo[fo_nodes[w]] == -1:\n", - " dist_fo[fo_nodes[w]] = dist[v] + 1\n", - " S.append(w)\n", - " Q.append(w)\n", - " # we found a shortest path to event w via event v\n", - " if dist[w] == dist[v] + 1:\n", - " sigma[w] += sigma[w] + sigma[v]\n", - " P[w].append(v)\n", - " # we found a shortest path to first-order node of event w\n", - " if dist[w] == dist_fo[fo_nodes[w]]:\n", - " sigma_fo[fo_nodes[w]] += sigma[v]\n", - " #print('S =', S)\n", - " #print('P', P)\n", - " #print('d', dist)\n", - " print('finished BFS ', (time()- t_start))\n", - " c = 0\n", - " for i in dist_fo:\n", - " if dist_fo[i] >=0:\n", - " c+= 1\n", - " bw[fo_nodes[s]] = bw[fo_nodes[s]] - c + 1\n", - " #print(bw[fo_node(s, g, src_indices)])\n", - "\n", - " # We computed top. shortest path distances and shortest path counts from (first-order) source nodes to all temporal events\n", - " # we must now project this to the first-order target nodes of those events\n", - " while S:\n", - " w = S.pop()\n", - " # work backwards through paths to all targets and sum delta and sigma\n", - "\n", - " # check whether shortest path from s to event w is also shortest path to first-order target of w\n", - " # if d[w] == d_fo[w_fo]: \n", - " if dist[w] == dist_fo[fo_nodes[w]]:\n", - " # v_fo = fo_tgt(v, g, src_indices, tgt_indices)\n", - " delta_[w] += (sigma[w]/sigma_fo[fo_nodes[w]])\n", - " for v in P[w]:\n", - " delta_[v] += (sigma[v]/sigma[w]) * delta_[w]\n", - " bw[fo_nodes[v]] += delta_[w] * (sigma[v]/sigma[w])\n", - " bw_id = defaultdict(lambda: 0.0)\n", - " for idx in bw:\n", - " bw_id[g.mapping.to_id(idx)] = bw[idx]\n", - " return bw_id\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[ 43, 43, 44, ..., 202, 162, 76],\n", - " [ 45, 44, 43, ..., 262, 76, 162]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/579 [00:00\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/327 [00:00 1\u001b[0m \u001b[43mtemporal_betweenness_brandes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt_sp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3600\u001b[39;49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[25], line 103\u001b[0m, in \u001b[0;36mtemporal_betweenness_brandes\u001b[0;34m(g, delta)\u001b[0m\n\u001b[1;32m 100\u001b[0m v \u001b[38;5;241m=\u001b[39m Q\u001b[38;5;241m.\u001b[39mpopleft()\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m#print('popped ', v)\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;66;03m# for all successor events within delta\u001b[39;00m\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m w \u001b[38;5;129;01min\u001b[39;00m event_graph\u001b[38;5;241m.\u001b[39msuccessors(v):\n\u001b[1;32m 104\u001b[0m \n\u001b[1;32m 105\u001b[0m \u001b[38;5;66;03m# we dicover w for the first time\u001b[39;00m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dist[w] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 107\u001b[0m dist[w] \u001b[38;5;241m=\u001b[39m dist[v] \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/core/Graph.py:296\u001b[0m, in \u001b[0;36mGraph.successors\u001b[0;34m(self, node)\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return all successors of a given node.\u001b[39;00m\n\u001b[1;32m 286\u001b[0m \n\u001b[1;32m 287\u001b[0m \u001b[38;5;124;03mThis method returns a generator object that yields all successors of a\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;124;03m node: Index or string ID of node for which successors shall be returned.\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m \n\u001b[1;32m 295\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m j \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_successors(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmapping\u001b[38;5;241m.\u001b[39mto_idx(node)): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[0;32m--> 296\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmapping\u001b[38;5;241m.\u001b[39mto_id(\u001b[43mj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "temporal_betweenness_brandes(t_sp, delta=3600)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 1, 2, 3],\n", - " [1, 2, 3, 4, 4]])\n", - "a -> 0\n", - "b -> 1\n", - "c -> 2\n", - "d -> 3\n", - "e -> 4\n", - "\n", - "tensor([[0, 1, 1, 2, 3],\n", - " [1, 2, 3, 4, 4]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 2574.77it/s]\n", - "100%|██████████| 4/4 [00:00<00:00, 993.62it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "source d\n", - "source a\n", - "source b\n", - "source c\n", - "a 0.0\n", - "b 3.0\n", - "c 1.0\n", - "d 1.0\n", - "e 0.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "# Example with two shortest time-respecting paths from a to e via c or d\n", - "t = pp.TemporalGraph.from_edge_list([('a', 'b', 1), ('b', 'c', 2), ('b', 'd', 2), ('c', 'e', 3), ('d', 'e', 3)])\n", - "print(t.data.edge_index)\n", - "print(t.mapping)\n", - "\n", - "bw_1 = temporal_betweenness_brandes(t, delta=1)\n", - "\n", - "for v in t.nodes:\n", - " print(v, bw_1[t.mapping.to_idx(v)])" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 2, 2, 2, 5, 0, 1, 0, 7, 2, 6, 0, 0, 2, 5, 1, 8, 2, 7],\n", - " [1, 2, 3, 4, 5, 0, 6, 5, 6, 5, 5, 7, 2, 1, 7, 7, 8, 1, 8, 8]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 17/17 [00:00<00:00, 3854.64it/s]\n", - "100%|██████████| 7/7 [00:00<00:00, 1022.18it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "source a\n", - "source b\n", - "source c\n", - "source f\n", - "source g\n", - "source h\n", - "source i\n", - "a 2.0\n", - "b 2.0\n", - "c 4.5\n", - "d 0.0\n", - "e 0.0\n", - "f 2.0\n", - "g 0.5\n", - "h 0.0\n", - "i 0.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "bw = temporal_betweenness_brandes(t_long, 5)\n", - "for v in t_long.nodes:\n", - " print(v, bw[t_long.mapping.to_idx(v)])" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 2, 1],\n", - " [1, 2, 3, 3]])\n", - "a -> 0\n", - "b -> 1\n", - "c -> 2\n", - "d -> 3\n", - "\n", - "tensor([[0, 1, 2, 1],\n", - " [1, 2, 3, 3]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 3437.95it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{4, 5, 6}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 875.52it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "source a\n", - "source b\n", - "source c\n", - "tensor([[0, 1, 2, 1],\n", - " [1, 2, 3, 3]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 2423.05it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{4, 5, 6}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 1125.28it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "source a\n", - "source b\n", - "source c\n", - "a 0.0 -1.0\n", - "b 1.0 1.0\n", - "c 1.0 0.0\n", - "d 0.0 0.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "t = pp.TemporalGraph.from_edge_list([('a', 'b', 1), ('b', 'c', 2), ('c', 'd', 3), ('b', 'd', 3)])\n", - "print(t.data.edge_index)\n", - "print(t.mapping)\n", - "\n", - "bw_1 = temporal_betweenness_brandes(t, delta=1)\n", - "bw_2 = temporal_betweenness_brandes(t, delta=2)\n", - "\n", - "for v in t.nodes:\n", - " print(v, bw_1[t.mapping.to_idx(v)], bw_2[t.mapping.to_idx(v)])" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 2388.10it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created temporal event DAG with 12 nodes and 11 edges\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(array([[ 0., 1., 2., 2.],\n", - " [inf, 0., 1., 1.],\n", - " [inf, inf, 0., 1.],\n", - " [inf, inf, inf, 0.]]),\n", - " array([[-9999, 0, 1, 3],\n", - " [-9999, -9999, 1, 3],\n", - " [-9999, -9999, -9999, 2],\n", - " [-9999, -9999, -9999, -9999]], dtype=int32))" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pp.algorithms.temporal_shortest_paths(t, delta=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 3233.02it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created temporal event DAG with 12 nodes and 10 edges\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "(array([[ 0., 1., 2., 3.],\n", - " [inf, 0., 1., 1.],\n", - " [inf, inf, 0., 1.],\n", - " [inf, inf, inf, 0.]]),\n", - " array([[-9999, 0, 1, 2],\n", - " [-9999, -9999, 1, 3],\n", - " [-9999, -9999, -9999, 2],\n", - " [-9999, -9999, -9999, -9999]], dtype=int32))" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pp.algorithms.temporal_shortest_paths(t, delta=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 313, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a -> 0\n", - "c -> 1\n", - "d -> 2\n", - "b -> 3\n", - "e -> 4\n", - "\n", - "tensor([[0, 1, 3, 1],\n", - " [1, 2, 1, 4]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:00<00:00, 3559.77it/s]\n", - "100%|██████████| 3/3 [00:00<00:00, 2406.37it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10\n", - "[0]\n", - "11\n", - "[1]\n", - "13\n", - "[]\n", - "10\n", - "[]\n", - "11\n", - "[1]\n", - "13\n", - "[3]\n", - "10\n", - "[2]\n", - "11\n", - "[]\n", - "13\n", - "[3]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {1: 0.0, 2: 0.0, 4: 0.0})" - ] - }, - "execution_count": 313, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "t = pp.TemporalGraph.from_edge_list([('a', 'c', 1), ('c', 'd', 2), ('b', 'c', 3), ('c', 'e', 4)])\n", - "print(t.mapping)\n", - "\n", - "temporal_betweenness_brandes(t, delta=1)\n", - "# 4 = a_src\n", - "# 5 = c_src\n", - "# 7 = b_src\n", - "\n", - "# 10 = c_tgt\n", - "# 11 = d_tgt\n", - "# 13 = e_tgt" - ] - }, - { - "cell_type": "code", - "execution_count": 253, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "a -> 0\n", - "c -> 1\n", - "d -> 2\n", - "b -> 3\n", - "e -> 4\n", - "\n", - "tensor([[0, 0, 1, 1, 3, 3, 1, 1],\n", - " [1, 1, 2, 4, 1, 1, 4, 2]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:00<00:00, 2609.21it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 0, 1, 1, 4, 4, 5, 5],\n", - " [2, 3, 2, 3, 6, 7, 6, 7]])\n", - "{8, 9, 11}\n", - "{17, 14, 15}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "8it [00:00, 3661.15it/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {4: 0.0, 2: 0.0})" - ] - }, - "execution_count": 253, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "t = pp.TemporalGraph.from_edge_list([('a', 'c', 1), ('c', 'd', 2), ('b', 'c', 3), ('c', 'e', 4),('a', 'c', 1), ('c', 'e', 2), ('b', 'c', 3), ('c', 'd', 4)])\n", - "print(t.mapping)\n", - "\n", - "temporal_betweenness_brandes(t, delta=1)\n", - "# 4 = a_src\n", - "# 5 = c_src\n", - "# 7 = b_src\n", - "\n", - "# 10 = c_tgt\n", - "# 11 = d_tgt\n", - "# 13 = e_tgt" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:00<00:00, 3337.42it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 3, 1],\n", - " [1, 2, 1, 4]])\n", - "['a-c-1.0', 'c-d-2.0', 'b-c-3.0', 'c-e-4.0']\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "event_graph = temporal_event_graph(t, delta=1)\n", - "pp.plot(event_graph, node_label = [v for v in event_graph.nodes])" - ] - }, - { - "cell_type": "code", - "execution_count": 179, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 3/3 [00:00<00:00, 2910.69it/s]\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {'d-tgt': 0.0,\n", - " 'c-tgt': 0.0,\n", - " 'c-d-2.0': 2.0,\n", - " 'a-c-1.0': 3.0,\n", - " 'e-tgt': 0.0,\n", - " 'c-e-4.0': 2.0,\n", - " 'b-c-3.0': 3.0})" - ] - }, - "execution_count": 179, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "betweenness_brandes(event_graph, sources=['a-src', 'b-src', 'c-src'])" - ] - }, - { - "cell_type": "code", - "execution_count": 182, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 1, 3, 1],\n", - " [1, 2, 1, 4]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:00<00:00, 2712.57it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[ 0, 2, 4, 5, 7, 5, 0, 1, 2, 3],\n", - " [ 1, 3, 0, 1, 2, 3, 10, 11, 10, 13]])\n", - "{4, 5, 7}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/3 [00:00 1\u001b[0m \u001b[43mtemporal_betweenness_brandes\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdelta\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[180], line 56\u001b[0m, in \u001b[0;36mtemporal_betweenness_brandes\u001b[0;34m(g, delta)\u001b[0m\n\u001b[1;32m 53\u001b[0m bw \u001b[38;5;241m=\u001b[39m defaultdict(\u001b[38;5;28;01mlambda\u001b[39;00m: \u001b[38;5;241m0.0\u001b[39m)\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m s \u001b[38;5;129;01min\u001b[39;00m tqdm(src_indices):\n\u001b[0;32m---> 56\u001b[0m fo_src \u001b[38;5;241m=\u001b[39m \u001b[43mfo_src\u001b[49m(s, g, src_indices, tgt_indices) \n\u001b[1;32m 57\u001b[0m dist_eg \u001b[38;5;241m=\u001b[39m defaultdict(\u001b[38;5;28;01mlambda\u001b[39;00m: \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 58\u001b[0m dist_eg[s] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", - "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'fo_src' referenced before assignment" - ] - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1157/1157 [01:15<00:00, 15.43it/s]\n", - " 8%|▊ | 25/327 [13:32<2:43:34, 32.50s/it]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Unexpected exception formatting exception. Falling back to standard exception\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "Traceback (most recent call last):\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 3526, in run_code\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n", - " File \"/tmp/ipykernel_3320/540718842.py\", line 1, in \n", - " temporal_betweenness_brandes(t_sp, delta=3600)\n", - " File \"/tmp/ipykernel_3320/1211725645.py\", line 36, in temporal_betweenness_brandes\n", - " return betweenness_brandes(event_graph, src_indices)\n", - " File \"/tmp/ipykernel_3320/2494254184.py\", line -1, in betweenness_brandes\n", - "KeyboardInterrupt\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/interactiveshell.py\", line 2120, in showtraceback\n", - " stb = self.InteractiveTB.structured_traceback(\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1435, in structured_traceback\n", - " return FormattedTB.structured_traceback(\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1326, in structured_traceback\n", - " return VerboseTB.structured_traceback(\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1173, in structured_traceback\n", - " formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 1088, in format_exception_as_a_whole\n", - " frames.append(self.format_record(record))\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 970, in format_record\n", - " frame_info.lines, Colors, self.has_colors, lvals\n", - " File \"/opt/conda/lib/python3.10/site-packages/IPython/core/ultratb.py\", line 792, in lines\n", - " return self._sd.lines\n", - " File \"/opt/conda/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", - " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", - " File \"/opt/conda/lib/python3.10/site-packages/stack_data/core.py\", line 698, in lines\n", - " pieces = self.included_pieces\n", - " File \"/opt/conda/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", - " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", - " File \"/opt/conda/lib/python3.10/site-packages/stack_data/core.py\", line 649, in included_pieces\n", - " pos = scope_pieces.index(self.executing_piece)\n", - " File \"/opt/conda/lib/python3.10/site-packages/stack_data/utils.py\", line 145, in cached_property_wrapper\n", - " value = obj.__dict__[self.func.__name__] = self.func(obj)\n", - " File \"/opt/conda/lib/python3.10/site-packages/stack_data/core.py\", line 628, in executing_piece\n", - " return only(\n", - " File \"/opt/conda/lib/python3.10/site-packages/executing/executing.py\", line 164, in only\n", - " raise NotOneValueFound('Expected one value, found 0')\n", - "executing.executing.NotOneValueFound: Expected one value, found 0\n" - ] - } - ], - "source": [ - "temporal_betweenness_brandes(t_sp, delta=3600)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/temporal_clusters.html b/docs/tutorial/temporal_clusters.html deleted file mode 100644 index c7af53cbd..000000000 --- a/docs/tutorial/temporal_clusters.html +++ /dev/null @@ -1,306 +0,0 @@ - - -
- - \ No newline at end of file diff --git a/docs/tutorial/temporal_shortest_paths.ipynb b/docs/tutorial/temporal_shortest_paths.ipynb deleted file mode 100644 index fd99737ae..000000000 --- a/docs/tutorial/temporal_shortest_paths.ipynb +++ /dev/null @@ -1,845 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pathpyG as pp\n", - "from torch_geometric.utils import cumsum, coalesce, degree, sort_edge_index\n", - "import torch\n", - "\n", - "from scipy.sparse.csgraph import bellman_ford, dijkstra\n", - "import numpy as np\n", - "\n", - "from collections import defaultdict\n", - "\n", - "\n", - "from tqdm import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 327 nodes, 11636 unique edges and 754032 events in [1385982080.0, 1386345600.0]\n", - "\n", - "Graph attributes\n", - "\tdst\t\t -> torch.Size([754032])\n", - "\tsrc\t\t -> torch.Size([754032])\n", - "\tt\t\t -> torch.Size([754032])\n", - "\n", - "1157\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'dst', 'src', 't'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "t_sp = pp.io.read_csv_temporal_graph('sociopatterns_highschool_2013.tedges', header=False).to_undirected()\n", - "print(t_sp)\n", - "print(torch.unique(t_sp.data.t).size(0))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 68 nodes, 752 unique edges and 2090 events in [899.0, 1796.0]\n", - "\n", - "Graph attributes\n", - "\tdst\t\t -> torch.Size([2090])\n", - "\tsrc\t\t -> torch.Size([2090])\n", - "\tt\t\t -> torch.Size([2090])\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'dst', 'src', 't'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "t_ants = pp.io.read_csv_temporal_graph('../data/ants_2_2_val.tedges', header=False)\n", - "print(t_ants)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 594/594 [00:00<00:00, 5479.47it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created temporal event DAG with 1181 nodes and 4023 edges\n", - "[[ 0. 1. inf ... inf inf inf]\n", - " [ 1. 0. inf ... inf inf inf]\n", - " [ 5. 3. 0. ... 2. inf inf]\n", - " ...\n", - " [inf inf inf ... 0. inf inf]\n", - " [inf inf inf ... inf 0. inf]\n", - " [inf inf inf ... 1. inf 0.]]\n", - "{'JJJJ': 1399.0180458430464, 'WGG_': 1491.1753968253968, '_Y_B': 1461.7166666666667, 'HHHH': 996.0666666666666, 'WGRB': 1834.2047619047619, 'WYWY': 1540.441666666667, 'WY_G': 761.1371794871794, 'XXXX': 1670.8789682539682, 'LLLL': 1182.7095238095237, 'FFFF': 1062.2448773448773, 'WYG_': 1978.7333333333331, 'WW__': 1790.2027777777776, 'WRWB': 1743.196428571429, 'AAAA': 581.3047619047619, 'WGYW': 1155.8297619047619, 'WBYY': 968.8944444444444, '_R__': 880.7575396825396, 'WYBG': 1448.1039682539683, 'W__W': 1546.319877344877, 'RRRR': 924.1214285714285, 'WYRW': 1601.938095238095, 'WYYB': 865.6825396825396, 'WG_W': 1494.8178571428573, 'WRR_': 1195.2853174603176, 'W__G': 867.9182900432901, '_WRR': 622.8873015873016, 'WY_R': 1549.3750000000002, '_YYY': 1706.9047619047617, 'WRGG': 1571.4158730158733, 'WWGY': 1374.6964285714284, 'WW_W': 1325.6428571428573, 'W_W_': 842.7908730158728, 'WYYR': 798.6825396825395, 'ZZZZ': 662.777922077922, 'W_RG': 1339.8936507936507, 'WBGW': 512.55, 'WBGG': 1543.3130952380955, 'WWRY': 965.0658730158731, 'W___': 518.640909090909, 'VVVV': 394.82142857142856, 'WGGY': 402.0, 'WG__': 402.0, 'WY__': 1094.4130952380951, 'W_GY': 847.5990842490843, 'WYWW': 383.8191197691197, 'OOOO': 866.3738095238094, 'W_BG': 1306.0214285714287, 'TTTT': 549.4, 'WBWY': 1183.2944444444443, 'WWY_': 1060.354761904762, 'WBGR': 67.0, 'WGWY': 597.4166666666666, 'PPPP': 1146.8166666666664, 'WGGW': 917.4214285714285, 'EEEE': 617.1976190476189, '__YR': 134.0, 'WYYG': 548.8972582972583, 'WGGG': 207.70000000000002, 'IIII': 409.81666666666666, 'MMMM': 201.0, 'UUUU': 67.0, 'W_WG': 67.0, 'WYY_': 134.0, 'WWR_': 134.0, 'QQQQ': 415.4, 'WR__': 1117.5440476190474, 'W_GW': 167.5, 'AAAB': 0.0}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "c = pp.algorithms.centrality.temporal_closeness_centrality(t_ants, delta=60)\n", - "print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 17/17 [00:00<00:00, 5773.07it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created temporal event DAG with 38 nodes and 47 edges\n", - "(9, 38)\n", - "(9, 9)\n", - "[[ 0. 1. 1. 3. 3. inf 1. 2. inf]\n", - " [inf 0. 1. 2. 2. 1. inf inf 1.]\n", - " [ 2. inf 0. 1. 1. 1. 3. 1. 1.]\n", - " [inf inf inf 0. inf inf inf inf inf]\n", - " [inf inf inf inf 0. inf inf inf inf]\n", - " [ 1. inf inf inf inf 0. 2. 1. inf]\n", - " [inf inf inf inf inf inf 0. 1. inf]\n", - " [inf inf inf inf inf 1. inf 0. 1.]\n", - " [inf 1. inf inf inf inf inf inf 0.]]\n", - "{'a': 12.0, 'b': 16.0, 'c': 16.0, 'd': 14.666666666666666, 'e': 14.666666666666666, 'f': 24.0, 'g': 14.666666666666666, 'h': 28.0, 'i': 24.0}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "tedges = [('a', 'b', 1), ('b', 'c', 5), ('c', 'd', 9), ('c', 'e', 9),\n", - " ('c', 'f', 11), ('f', 'a', 13), ('a', 'g', 18), ('b', 'f', 21),\n", - " ('a', 'g', 26), ('c', 'f', 27), ('h', 'f', 27), ('g', 'h', 28),\n", - " ('a', 'c', 30), ('a', 'b', 31), ('c', 'h', 32), ('f', 'h', 33),\n", - " ('b', 'i', 42), ('i', 'b', 42), ('c', 'i', 47), ('h', 'i', 50)]\n", - "t = pp.TemporalGraph.from_edge_list(tedges)\n", - "c = pp.algorithms.centrality.temporal_closeness_centrality(t, 5)\n", - "print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Temporal Graph with 5 nodes, 6 unique edges and 6 events in [0.0, 3.0]\n", - "\n", - "Graph attributes\n", - "\tdst\t\t -> torch.Size([6])\n", - "\tt\t\t -> torch.Size([6])\n", - "\tsrc\t\t -> torch.Size([6])\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.10/site-packages/torch_geometric/data/storage.py:450: UserWarning: Unable to accurately infer 'num_nodes' from the attribute set '{'dst', 't', 'src'}'. Please explicitly set 'num_nodes' as an attribute of 'data' to suppress this warning\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "t = pp.TemporalGraph.from_edge_list([(0,1,0), (0,2,0), (1,2,1), (1,3,1), (3,4,2), (1,4,3)])\n", - "print(t)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:00<00:00, 262.99it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created temporal event DAG with 17 nodes and 15 edges\n", - "{0.0: 0.0, 1.0: 4.0, 2.0: 8.0, 3.0: 6.0, 4.0: 9.333333333333332}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "c = pp.algorithms.centrality.temporal_closeness_centrality(t, delta=1)\n", - "print(c)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# old code with explosive memory usage due to computation of all second-order edges irrespective of time stamps\n", - "def lift_order_not_efficient(g: pp.TemporalGraph, delta=1):\n", - " # first-order edge index\n", - " edge_index, timestamps = sort_edge_index(g.data.edge_index, g.data.t)\n", - " node_sequence = torch.arange(g.data.num_nodes, device=edge_index.device).unsqueeze(1)\n", - " print(edge_index)\n", - " # second-order edge index with time-respective filtering\n", - " null_model_edge_index = pp.MultiOrderModel.lift_order_edge_index(edge_index, num_nodes=node_sequence.size(0)) \n", - " # Update node sequences\n", - " node_sequence = torch.cat([node_sequence[edge_index[0]], node_sequence[edge_index[1]][:, -1:]], dim=1)\n", - " # Remove non-time-respecting higher-order edges\n", - " time_diff = timestamps[null_model_edge_index[1]] - timestamps[null_model_edge_index[0]]\n", - " non_negative_mask = time_diff > 0\n", - " delta_mask = time_diff <= delta\n", - " time_respecting_mask = non_negative_mask & delta_mask\n", - " edge_index = null_model_edge_index[:, time_respecting_mask]\n", - " return edge_index" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# new memory-efficient code\n", - "def lift_order_efficient(g: pp.TemporalGraph, delta: int = 1):\n", - "\n", - " # first-order edge index\n", - " edge_index, timestamps = g.data.edge_index, g.data.t\n", - " # print(edge_index)\n", - "\n", - " indices = torch.arange(0, edge_index.size(1), device=g.data.edge_index.device)\n", - "\n", - " unique_t = torch.unique(timestamps, sorted=True)\n", - " second_order = []\n", - "\n", - " # lift order: find possible continuations for edges in each time stamp\n", - " for i in tqdm(range(unique_t.size(0))):\n", - " t = unique_t[i]\n", - " #print('timestamp index ', i)\n", - " #print('timestamp ', t)\n", - " \n", - " # find indices of all source edges that occur at unique timestamp t\n", - " src_time_mask = (timestamps == t)\n", - " src_edges = edge_index[:,src_time_mask]\n", - " src_edge_idx = indices[src_time_mask]\n", - " #print(src_edges)\n", - " #print(src_edge_idx)\n", - "\n", - " # find indices of all edges that can possibly continue edges occurring at time t for the given delta\n", - " dst_time_mask = (timestamps > t) & (timestamps <= t+delta)\n", - " dst_edges = edge_index[:,dst_time_mask] \n", - " dst_edge_idx = indices[dst_time_mask]\n", - " #print(dst_edges)\n", - " #print(dst_edge_idx)\n", - "\n", - " if dst_edge_idx.size(0)>0 and src_edge_idx.size(0)>0:\n", - "\n", - " # compute second-order edges between src and dst idx for all edges where dst in src_edges matches src in dst_edges \n", - " x = torch.cartesian_prod(src_edge_idx, dst_edge_idx).t()\n", - " src_edges = torch.index_select(edge_index, dim=1, index=x[0])\n", - " dst_edges = torch.index_select(edge_index, dim=1, index=x[1])\n", - " #print(src_edges)\n", - " #print(dst_edges)\n", - " ho_edge_index = x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]\n", - " second_order.append(ho_edge_index)\n", - " #print(ho_edge_index) \n", - " \n", - " # #print('dst', dst)\n", - " # src_mask = (edge_index[:,mask][0]==dst)\n", - " # ctd = edge_index[:,mask][:,src_mask]\n", - " # #print('continuations', ctd)\n", - " # ctd_indices = torch.where(edge_index[:,mask][0]==dst)[0] \n", - " # #print('ctd indx', ctd_indices)\n", - " # count += ctd_indices.size(0)\n", - " ho_index = torch.cat(second_order, dim=1) \n", - " return ho_index" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "def fo_nodes(ho_edge, g):\n", - " src_edge = ho_edge[0]\n", - " dst_edge = ho_edge[1]\n", - " return g.data.edge_index[:,src_edge][0], g.data.edge_index[:,dst_edge][0], g.data.edge_index[:,dst_edge][1]\n", - "\n", - "\n", - "def temporal_shortest_paths_all(g: pp.TemporalGraph, delta: int):\n", - " # generate temporal event DAG\n", - " edge_index = lift_order_efficient(g, delta)\n", - "\n", - " # Add indices of first-order nodes as src and dst of paths in TEG\n", - " src_edges_src = g.data.edge_index[0,:] + g.data.edge_index.size(1)\n", - " src_edges_dst = torch.arange(0, g.data.edge_index.size(1)) \n", - " dst_edges_src = torch.arange(0, g.data.edge_index.size(1))\n", - " dst_edges_dst = g.data.edge_index[1,:] + 2*g.data.edge_index.size(1)\n", - "\n", - " src_edges = torch.stack([src_edges_src, src_edges_dst])\n", - " dst_edges = torch.stack([dst_edges_src, dst_edges_dst])\n", - " edge_index = torch.cat([edge_index, src_edges, dst_edges], dim=1)\n", - "\n", - " event_graph = pp.Graph.from_edge_index(edge_index)\n", - " \n", - " # initialize distance matrix \n", - " dist = torch.full((g.n, event_graph.n), float(\"inf\"), device=g.data.edge_index.device)\n", - "\n", - " # predecessor lists\n", - " pred = defaultdict(lambda: defaultdict(list))\n", - "\n", - " # Fastest known single source SP in DAG (Cormen, Leiserson): single scan of edges in DAG\n", - " # trick: index of second-order nodes = topological sorting of event DAG assuming that edges are given in chronological order \n", - " # scan second-order nodes in topological order and relax distances between first-order nodes\n", - "\n", - " # TODO: correct algorithm\n", - " for src in tqdm(g.nodes):\n", - " dist[g.mapping.to_idx(src), g.mapping.to_idx(src) + g.data.edge_index.size(1)] = 0\n", - " for v in event_graph.nodes:\n", - " for w in event_graph.successors(v):\n", - " dist[g.mapping.to_idx(src), w] = min(dist[g.mapping.to_idx(src), w], dist[g.mapping.to_idx(src), v]+1)\n", - " \n", - " dist_fo = dist[:,2*g.m:] - 1\n", - " dist_fo.fill_diagonal_(0)\n", - " return dist_fo, pred\n", - "\n", - "\n", - "def temporal_shortest_paths(g: pp.TemporalGraph, delta: int):\n", - " # generate temporal event DAG\n", - " edge_index = lift_order_efficient(g, delta) \n", - "\n", - " # Add indices of g.n first-order nodes as source nodes of paths in augmented TEG\n", - " src_edges_src = g.m + g.data.edge_index[0,:]\n", - " src_edges_dst = torch.arange(0, g.data.edge_index.size(1))\n", - "\n", - " # Add indices of g.n first-order nodes as target nodes of paths in augmented TEG\n", - " dst_edges_src = torch.arange(0, g.data.edge_index.size(1))\n", - " dst_edges_dst = g.m + g.n + g.data.edge_index[1,:]\n", - "\n", - " src_edges = torch.stack([src_edges_src, src_edges_dst])\n", - " dst_edges = torch.stack([dst_edges_src, dst_edges_dst])\n", - " edge_index = torch.cat([edge_index, src_edges, dst_edges], dim=1)\n", - "\n", - " event_graph = pp.Graph.from_edge_index(edge_index, num_nodes=g.m + 2 * g.n)\n", - " m = event_graph.sparse_adj_matrix()\n", - " print(m.shape)\n", - " # compute shortest paths from all source nodes to all nodes \n", - " dist, pred = dijkstra(m, directed=True, indices = np.arange(g.m, g.m+g.n), return_predecessors=True, unweighted=True)\n", - " print(dist.shape)\n", - " print(g.n + g.m)\n", - " # we are only interested in target nodes, whose indices start at G.m + G.n\n", - " dist_fo = dist[:,g.m+g.n:] - 1\n", - " np.fill_diagonal(dist_fo, 0)\n", - " pred_fo = pred[:,g.n+g.m:]\n", - " return dist_fo, pred_fo\n", - "\n", - "\n", - " \n", - "def temporal_closeness_centrality(g: pp.TemporalGraph, delta: int) -> dict:\n", - "\n", - " centralities = dict()\n", - " dist, _ = temporal_shortest_paths(g, delta)\n", - " for x in g.nodes:\n", - " centralities[x] = sum((g.n - 1) / dist[np.arange(g.n)!=x, g.mapping.to_idx(x)])\n", - "\n", - " return centralities" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 594/594 [00:00<00:00, 6304.91it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(1181, 1181)\n", - "(68, 1181)\n", - "1113\n", - "(68, 68)\n", - "68\n", - "1045\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "dist, pred = temporal_shortest_paths(t_ants, delta=30)\n", - "print(dist.shape)\n", - "print(t_ants.n)\n", - "print(t_ants.m)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'idx' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43midx\u001b[49m[:,\u001b[38;5;241m1\u001b[39m]\n", - "\u001b[0;31mNameError\u001b[0m: name 'idx' is not defined" - ] - } - ], - "source": [ - "idx[:,1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "edge_index = lift_order_efficient(t)\n", - "print(edge_index)" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tensor([[0, 0, 1, 1, 3, 1],\n", - " [1, 2, 2, 3, 4, 4]])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4/4 [00:00<00:00, 2955.30it/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(16, 16)\n", - "(5, 16)\n", - "11\n", - "[[ 0. 1. 1. 2. 3.]\n", - " [inf 0. 1. 1. 1.]\n", - " [inf inf 0. inf inf]\n", - " [inf inf inf 0. 1.]\n", - " [inf inf inf inf 0.]]\n", - "[[-9999 0 1 3 4]\n", - " [-9999 -9999 2 3 5]\n", - " [-9999 -9999 -9999 -9999 -9999]\n", - " [-9999 -9999 -9999 -9999 4]\n", - " [-9999 -9999 -9999 -9999 -9999]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(t.data.edge_index)\n", - "dist, pred = temporal_shortest_paths(t, delta=1)\n", - "\n", - "print(dist)\n", - "print(pred)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 3., 1., inf, 1., 0.])" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dist[:,4]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t.mapping.node_ids" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(temporal_closeness_centrality(t, delta=1))\n", - "print(t.n)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "temporal_shortest_paths(t_sp, delta=3600)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "edge_index[0,:]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t.data.edge_index[:,edge_index[0,:]][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "t.data.edge_index[:,edge_index[1,:]][1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#print(t.data.edge_index)\n", - "print(t_sp)\n", - "g = temporal_shortest_paths(t_sp, delta=300)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "indeg = degree(g.data.edge_index[1])\n", - "roots = torch.where(indeg==0)[0]\n", - "print(roots)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def traverse(g, path):\n", - " if g.get_successors(path[-1]).size(0) == 0:\n", - " pass\n", - " else:\n", - " for w in g.successors(path[-1]):\n", - " traverse(g, path + (w,))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "i = 0\n", - "for x in roots:\n", - " print(x)\n", - " traverse(g, (x,))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ho_index = lift_order_not_efficient(t, delta=1)\n", - "print(ho_index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ho_index = lift_order_efficient(t, delta=1)\n", - "print(ho_index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(t.data.edge_index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "node_sequence = torch.arange(t.data.num_nodes, device=t.data.edge_index.device).unsqueeze(1)\n", - "print(node_sequence)\n", - "node_sequence = torch.cat([node_sequence[t.data.edge_index[0]], node_sequence[t.data.edge_index[1]][:, -1:]], dim=1)\n", - "print(node_sequence)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lift_order_not_efficient(t_sp, delta=300)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lift_order_efficient(t_sp, delta=300)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lift_order_not_efficient(t_sp, delta=300)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x = torch.cartesian_prod(torch.tensor([0,1]), torch.tensor([1,3])).t()\n", - "# edge 0 = 0->1\n", - "# edge 1 = 1->2\n", - "# edge 2 = 0->1\n", - "\n", - "# combination 0,1: 0->1, 1->2\n", - "# combination 0,2: 0->1, 0->1\n", - "print(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "src_edges = torch.index_select(t.data.edge_index, dim=1, index=x[0])\n", - "print(src_edges)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dst_edges = torch.index_select(t.data.edge_index, dim=1, index=x[1])\n", - "print(dst_edges)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - " #select all indices where \n", - "torch.where(src_edges[1,:] == dst_edges[0,:])[0]\n", - "x[:,torch.where(src_edges[1,:] == dst_edges[0,:])[0]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/wl.ipynb b/docs/tutorial/wl.ipynb deleted file mode 100644 index 31b21ad07..000000000 --- a/docs/tutorial/wl.ipynb +++ /dev/null @@ -1,250 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pathpyG as pp\n", - "import torch\n", - "from torch_geometric import EdgeIndex" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "EdgeIndex([[0, 1, 1],\n", - " [1, 2, 3]], sparse_size=(4, 4), nnz=3, sort_order=row)\n", - "EdgeIndex([[0, 1, 1],\n", - " [1, 2, 3]], sparse_size=(4, 4), nnz=3, sort_order=row)\n", - "EdgeIndex([[0, 0, 1, 1, 1, 1],\n", - " [1, 1, 2, 3, 2, 3]], sparse_size=(4, 4), nnz=6, sort_order=row)\n", - "Directed graph with 4 nodes and 6 edges\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g1 = pp.Graph.from_edge_index(torch.tensor([[0,1,1],[1,2,3]]), mapping=pp.IndexMap(['a', 'b', 'c', 'd']))\n", - "print(g1.data.edge_index)\n", - "g2 = pp.Graph.from_edge_index(torch.tensor([[0,1,1],[1,2,3]]), mapping=pp.IndexMap(['a', 'b', 'c', 'd']))\n", - "print(g2.data.edge_index)\n", - "g = g1 + g2\n", - "print(g.data.edge_index)\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "EdgeIndex([[0, 1, 1],\n", - " [1, 2, 3]], sparse_size=(4, 4), nnz=3, sort_order=row)\n", - "EdgeIndex([[0, 1, 1],\n", - " [1, 2, 3]], sparse_size=(4, 4), nnz=3, sort_order=row)\n", - "EdgeIndex([[0, 1, 1, 4, 5, 5],\n", - " [1, 2, 3, 5, 6, 7]], sparse_size=(8, 8), nnz=6, sort_order=row)\n", - "Directed graph with 8 nodes and 6 edges\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g1 = pp.Graph.from_edge_index(torch.tensor([[0,1,1],[1,2,3]]), mapping=pp.IndexMap(['a', 'b', 'c', 'd']))\n", - "print(g1.data.edge_index)\n", - "g2 = pp.Graph.from_edge_index(torch.tensor([[0,1,1],[1,2,3]]), mapping=pp.IndexMap(['e', 'f', 'g', 'h']))\n", - "print(g2.data.edge_index)\n", - "g = g1 + g2\n", - "print(g.data.edge_index)\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "EdgeIndex([[0, 1, 1],\n", - " [1, 2, 3]], sparse_size=(4, 4), nnz=3, sort_order=row)\n", - "EdgeIndex([[0, 1, 1],\n", - " [1, 2, 3]], sparse_size=(4, 4), nnz=3, sort_order=row)\n", - "EdgeIndex([[0, 0, 1, 1, 1, 1],\n", - " [1, 1, 2, 3, 4, 5]], sparse_size=(6, 6), nnz=6, sort_order=row)\n", - "Directed graph with 6 nodes and 6 edges\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g1 = pp.Graph.from_edge_index(torch.tensor([[0,1,1],[1,2,3]]), mapping=pp.IndexMap(['a', 'b', 'c', 'd']))\n", - "print(g1.data.edge_index)\n", - "g2 = pp.Graph.from_edge_index(torch.tensor([[0,1,1],[1,2,3]]), mapping=pp.IndexMap(['a', 'b', 'g', 'h']))\n", - "print(g2.data.edge_index)\n", - "g = g1 + g2\n", - "print(g.data.edge_index)\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def num_labels(d):\n", - " return len(set(d.values()))" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Tuple, List" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "def WL_test(g1: pp.Graph, g2: pp.Graph) -> Tuple[bool, List[str], List[str]]:\n", - " \"\"\"Run Weisfeiler-Leman test on two graphs\"\"\"\n", - " if g1.mapping is None or g2.mapping is None:\n", - " raise Exception('Graphs must contain IndexMap that assigns node IDs')\n", - " if len(set(g1.mapping.node_ids).intersection(g2.mapping.node_ids)) > 0:\n", - " raise Exception('node identifiers of graphs must not overlap')\n", - " g_combined = g1 + g2\n", - " # initialize labels of all ndoes to zero\n", - " fingerprint = { v:'0' for v in g_combined.nodes }\n", - " labels = {} \n", - " label_count = 1\n", - " stop = False\n", - " while not stop:\n", - " new_fingerprint = {} \n", - " for node in g_combined.nodes:\n", - " # create new label based on own label and sorted labels of all neighbors\n", - " n_label = [fingerprint[x] for x in g_combined.successors(node)]\n", - " n_label.sort()\n", - " label = str(fingerprint[node]) + str(n_label)\n", - " # previously unknown label\n", - " if label not in labels:\n", - " # create a new label based on next consecutive number\n", - " labels[label] = label_count\n", - " label_count += 1 \n", - " new_fingerprint[node] = labels[label] \n", - " if len(set(fingerprint.values())) == len(set(new_fingerprint.values())):\n", - " # we processed all nodes in both graphs without encountering a new label, so we stop\n", - " stop = True\n", - " else:\n", - " # update fingerprint and continue\n", - " fingerprint = new_fingerprint.copy()\n", - " fingerprint_1 = [fingerprint[v] for v in g1.nodes]\n", - " fingerprint_1.sort()\n", - " fingerprint_2 = [fingerprint[v] for v in g2.nodes]\n", - " fingerprint_2.sort()\n", - " if fingerprint_1 == fingerprint_2:\n", - " return True, fingerprint_1, fingerprint_2\n", - " return False, fingerprint_1, fingerprint_2" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(True, [3, 4, 5], [3, 4, 5])" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g1 = pp.Graph.from_edge_list([('a', 'b'), ('b', 'c')])\n", - "g2 = pp.Graph.from_edge_list([('y', 'z'), ('x', 'y')])\n", - "WL_test(g1, g2)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d = {'a': 0, 'b': 1, 'c': 0}\n", - "num_labels(d)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/xx_temporal_centralities.ipynb b/docs/tutorial/xx_temporal_centralities.ipynb deleted file mode 100644 index 147599da9..000000000 --- a/docs/tutorial/xx_temporal_centralities.ipynb +++ /dev/null @@ -1,522 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running on cpu\n" - ] - } - ], - "source": [ - "import torch\n", - "\n", - "import pathpyG as pp" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Put this as his in conftest as 'simple_paths_centralities'\n", - "paths = pp.WalkData()\n", - "paths.add(torch.tensor([[2, 1, 3], [1, 3, 5]])) \n", - "paths.add(torch.tensor([[0, 1], [1, 3]])) \n", - "paths.add(torch.tensor([[3], [4]]))\n", - "\n", - "simple_paths_centralities = paths" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# paths = pp.PathData()\n", - "# paths.add_walk(torch.tensor([[0,2,3],[2,3,4]]),freq=3) # A -> C -> D\n", - "# paths.add_walk(torch.tensor([[0,2],[2,3]])) # A -> C -> D\n", - "# paths.add_walk(torch.tensor([[1,2],[2,4]])) # B -> C -> E\n", - "# paths.add_walk(torch.tensor([[4],[5]]))\n", - "# paths.add_walk(torch.tensor([[1,2],[2,4]])) # B -> C -> E\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(tensor([[[0, 1],\n", - " [1, 3],\n", - " [2, 1]],\n", - " \n", - " [[1, 3],\n", - " [3, 5],\n", - " [1, 3]]]),\n", - " tensor([1., 1., 1.]))" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "index, edge_weights = paths.edge_index_k_weighted(k=2)\n", - "index, edge_weights" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "index, edge_weights = paths.edge_index_k_weighted(k=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "def node_traversals(paths):\n", - " \"\"\"Calculates the number of times any path traverses each of the nodes.\n", - "\n", - " Parameters\n", - " ----------\n", - " paths: Paths\n", - "\n", - " Returns\n", - " -------\n", - " dict\n", - " \"\"\"\n", - " traversals = defaultdict(lambda: 0)\n", - " for path_id, path_edgelist in paths.paths.items():\n", - " path_seq = paths.walk_to_node_seq(path_edgelist)\n", - " for node in path_seq:\n", - " traversals[node.item()] += paths.path_freq[path_id]\n", - " return traversals" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from pathpyG.algorithms.centrality import node_traversals" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "ename": "RecursionError", - "evalue": "maximum recursion depth exceeded while calling a Python object", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRecursionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mnode_traversals\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpaths\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:311\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:311\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - " \u001b[0;31m[... skipping similar frames: __getattr__..wrapper at line 311 (2968 times)]\u001b[0m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:311\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:299\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[0;32m--> 299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDid not find method \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with no arguments\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 301\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[38;5;241m0\u001b[39m], TemporalGraph):\n", - "\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded while calling a Python object" - ] - } - ], - "source": [ - "node_traversals(paths)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "from pathpyG.algorithms.centrality import visitation_probabilities" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "ename": "RecursionError", - "evalue": "maximum recursion depth exceeded while calling a Python object", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRecursionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[14], line 11\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m traversals_dict[\u001b[38;5;241m4\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m9\u001b[39m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m traversals_dict[\u001b[38;5;241m5\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m9\u001b[39m\n\u001b[0;32m---> 11\u001b[0m \u001b[43mtest_visitation_probabilities\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimple_paths_centralities\u001b[49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[14], line 2\u001b[0m, in \u001b[0;36mtest_visitation_probabilities\u001b[0;34m(simple_paths_centralities)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtest_visitation_probabilities\u001b[39m(simple_paths_centralities):\n\u001b[0;32m----> 2\u001b[0m traversals_dict \u001b[38;5;241m=\u001b[39m \u001b[43mvisitation_probabilities\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimple_paths_centralities\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mset\u001b[39m(traversals_dict\u001b[38;5;241m.\u001b[39mkeys()) \u001b[38;5;241m==\u001b[39m {\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m2\u001b[39m,\u001b[38;5;241m3\u001b[39m,\u001b[38;5;241m4\u001b[39m,\u001b[38;5;241m5\u001b[39m}\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m traversals_dict[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m9\u001b[39m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:311\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:311\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - " \u001b[0;31m[... skipping similar frames: __getattr__..wrapper at line 311 (2967 times)]\u001b[0m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:311\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m r\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m/workspaces/pathpyG/src/pathpyG/algorithms/centrality.py:299\u001b[0m, in \u001b[0;36m__getattr__..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[0;32m--> 299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDid not find method \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m with no arguments\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 301\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(args[\u001b[38;5;241m0\u001b[39m], TemporalGraph):\n", - "\u001b[0;31mRecursionError\u001b[0m: maximum recursion depth exceeded while calling a Python object" - ] - } - ], - "source": [ - "def test_visitation_probabilities(simple_paths_centralities):\n", - " traversals_dict = visitation_probabilities(simple_paths_centralities)\n", - " assert set(traversals_dict.keys()) == {0,1,2,3,4,5}\n", - " assert traversals_dict[0] == 1/9\n", - " assert traversals_dict[1] == 2/9\n", - " assert traversals_dict[2] == 1/9\n", - " assert traversals_dict[3] == 3/9\n", - " assert traversals_dict[4] == 1/9\n", - " assert traversals_dict[5] == 1/9\n", - "\n", - "test_visitation_probabilities(simple_paths_centralities)" - ] - }, - { - "cell_type": "code", - "execution_count": 339, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - } - ], - "source": [ - "test_shortest_paths(paths)" - ] - }, - { - "cell_type": "code", - "execution_count": 340, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {1: 3.0, 3: 2.0, 0: 0, 2: 0, 4: 0, 5: 0})" - ] - }, - "execution_count": 340, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# @betweenness.register(Paths)\n", - "def betweenness(paths, normalized=False):\n", - " \"\"\"Calculates the betweenness of nodes based on observed shortest paths\n", - " between all pairs of nodes\n", - "\n", - " Parameters\n", - " ----------\n", - " paths:\n", - " Paths object\n", - " normalized: bool\n", - " normalize such that largest value is 1.0\n", - "\n", - " Returns\n", - " -------\n", - " dict\n", - " \"\"\"\n", - " assert isinstance(paths, pp.PathData), \"argument must be an instance of pathpy.Paths\"\n", - " node_centralities = defaultdict(lambda: 0)\n", - "\n", - " # Log.add('Calculating betweenness in paths ...', Severity.INFO)\n", - "\n", - " all_paths = shortest_paths(paths)\n", - "\n", - " for s in all_paths:\n", - " for d in all_paths[s]:\n", - " for p in all_paths[s][d]:\n", - " for x in p[1:-1]:\n", - " if s != d != x:\n", - " node_centralities[x.item()] += 1.0 / len(all_paths[s][d])\n", - " if normalized:\n", - " max_centr = max(node_centralities.values())\n", - " for v in node_centralities:\n", - " node_centralities[v] /= max_centr\n", - " # assign zero values to nodes not occurring on shortest paths\n", - " nodes = [v.item() for v in paths.edge_index.reshape(-1).unique(dim=0)]\n", - " for v in nodes:\n", - " node_centralities[v] += 0\n", - " # Log.add('finished.')\n", - " return node_centralities\n", - "\n", - "betweenness(paths,normalized=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 346, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - } - ], - "source": [ - "def test_betweenness_paths(simple_paths_centralities):\n", - " bw = betweenness(simple_paths_centralities,normalized=False)\n", - " # 1 is in the shortest path between 0-5,2-3,2-5\n", - " assert bw[1] == 3.0\n", - " # 1 is in the shortest path between 2-5,1-5\n", - " assert bw[3] == 2.0\n", - "\n", - "test_betweenness_paths(paths)" - ] - }, - { - "cell_type": "code", - "execution_count": 347, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {0: defaultdict(...()>,\n", - " {0: 0, 1: 1, 3: 2}),\n", - " 1: defaultdict(...()>,\n", - " {1: 0, 3: 1, 5: 2}),\n", - " 2: defaultdict(...()>,\n", - " {2: 0, 1: 1, 3: 2, 5: 3}),\n", - " 3: defaultdict(...()>,\n", - " {3: 0, 4: 1, 5: 1}),\n", - " 4: defaultdict(...()>,\n", - " {4: 0}),\n", - " 5: defaultdict(...()>,\n", - " {5: 0})})" - ] - }, - "execution_count": 347, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\n", - "def distance_matrix(paths):\n", - " \"\"\"\n", - " Calculates shortest path distances between all pairs of\n", - " nodes based on the observed shortest paths (and subpaths)\n", - " \"\"\"\n", - " dist = defaultdict(lambda: defaultdict(lambda: _np.inf))\n", - " # Log.add('Calculating distance matrix based on empirical paths ...', Severity.INFO)\n", - " nodes = [v.item() for v in paths.edge_index.reshape(-1).unique(dim=0)] # NOTE: modify once set of nodes can be obtained from path obeject\n", - " for v in nodes:\n", - " dist[v][v] = 0\n", - "\n", - " p_length = 1\n", - " index, edge_weights = paths.edge_index_k_weighted(k=p_length)\n", - " sources = index[0]\n", - " destinations = index[-1]\n", - " for e, (s, d) in enumerate(zip(sources, destinations)):\n", - " s = s.item()\n", - " d = d.item()\n", - " dist[s][d] = p_length\n", - " # s_p[s][d] = set({torch.tensor([s,d])})\n", - " p_length += 1\n", - " while True: # until max path length\n", - " try:\n", - " index, edge_weights = paths.edge_index_k_weighted(k=p_length)\n", - " sources = index[0, :, 0]\n", - " destinations = index[1, :, -1]\n", - " for e, (s, d) in enumerate(zip(sources, destinations)):\n", - " s = s.item()\n", - " d = d.item()\n", - " if p_length < dist[s][d]:\n", - " # update shortest path length\n", - " dist[s][d] = p_length\n", - " p_length += 1\n", - " except IndexError:\n", - " print(f\"IndexError occurred. Reached maximum path length of {p_length}\")\n", - " break\n", - " return dist\n", - "distance_matrix(paths)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 352, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - } - ], - "source": [ - "def test_distance_matrix_paths(simple_paths_centralities):\n", - " dm = distance_matrix(simple_paths_centralities)\n", - " assert dm[0] == {0: 0, 1: 1, 3: 2}\n", - " assert dm[1] == {1: 0, 3: 1, 5: 2}\n", - " assert dm[2] == {2: 0, 1: 1, 3: 2, 5: 3}\n", - " assert dm[3] == {3: 0, 4: 1, 5: 1}\n", - " assert dm[4] == {4: 0}\n", - " assert dm[5] == {5: 0}\n", - "\n", - "test_distance_matrix_paths(paths)" - ] - }, - { - "cell_type": "code", - "execution_count": 355, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - }, - { - "data": { - "text/plain": [ - "defaultdict(.()>,\n", - " {1: 2.0, 3: 2.0, 4: 1.0, 5: 1.8333333333333333, 0: 0.0, 2: 0.0})" - ] - }, - "execution_count": 355, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def closeness(paths, normalized=False):\n", - " \"\"\"Calculates the closeness of nodes based on observed shortest paths\n", - " between all nodes\n", - "\n", - " Parameters\n", - " ----------\n", - " paths: Paths\n", - " normalized: bool\n", - " normalize such that largest value is 1.0\n", - "\n", - " Returns\n", - " -------\n", - " dict\n", - " \"\"\"\n", - " node_centralities = defaultdict(lambda: 0)\n", - " distances = distance_matrix(paths)\n", - " nodes = [v.item() for v in paths.edge_index.reshape(-1).unique(dim=0)] # NOTE: modify once set of nodes can be obtained from path obeject\n", - "\n", - " for x in nodes:\n", - " # calculate closeness centrality of x\n", - " for d in nodes:\n", - " if x != d and distances[d][x] < _np.inf:\n", - " node_centralities[x] += 1.0 / distances[d][x]\n", - "\n", - " # assign zero values to nodes not occurring\n", - " \n", - " for v in nodes:\n", - " node_centralities[v] += 0.0\n", - "\n", - " if normalized:\n", - " m = max(node_centralities.values())\n", - " for v in nodes:\n", - " node_centralities[v] /= m\n", - "\n", - " return node_centralities\n", - "closeness(paths, normalized=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 360, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "IndexError occurred. Reached maximum path length of 4\n" - ] - } - ], - "source": [ - "def test_closeness_paths(simple_paths_centralities):\n", - " c = closeness(simple_paths_centralities, normalized=False)\n", - " assert c[0] == 0.0\n", - " # 1 reachable from 0 and 2 in one step\n", - " assert c[1] == 1/1 + 1/1\n", - " assert c[2] == 0\n", - " # 3 reachable from 1 in one step, from 0 and 3 in two steps\n", - " assert c[3] == 1 + 1/2 + 1/2\n", - " assert c[4] == 1\n", - " # 5 reachable from 3 in one step, from 1 in two steps, from 2 in three steps\n", - " assert c[5] == 1 + 1/2 + 1/3\n", - "test_closeness_paths(paths)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/tutorial/xx_test_random_walks.ipynb b/docs/tutorial/xx_test_random_walks.ipynb deleted file mode 100644 index 2f0900d04..000000000 --- a/docs/tutorial/xx_test_random_walks.ipynb +++ /dev/null @@ -1,2423 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running on cpu\n" - ] - } - ], - "source": [ - "import pathpyG as pp\n", - "import torch\n", - "from pathpyG.processes.random_walk import RandomWalk, HigherOrderRandomWalk\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Undirected graph with 34 nodes and 154 (directed) edges\n", - "\n", - "Node attributes\n", - "\tnode__pos\t\t\n", - "\tnode_name\t\t\n", - "\tnode_groups\t\t\n", - "\n", - "Graph attributes\n", - "\turl\t\t\n", - "\tcitation\t\t\n", - "\tdescription\t\t\n", - "\tname\t\t\n", - "\ttags\t\t\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "g = pp.io.read_netzschleuder_network('karate', '77')\n", - "print(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "rw = RandomWalk(g)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 34/34 [00:00<00:00, 347.70it/s]\n" - ] - } - ], - "source": [ - "data = rw.run_experiment(steps=100,runs=range(34))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
run_idseedtimenodestate
00000True
10001False
20002False
30003False
40004False
..................
79513333981False
79523333990True
795333339921False
795433331003True
795533331000False
\n", - "

7956 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " run_id seed time node state\n", - "0 0 0 0 0 True\n", - "1 0 0 0 1 False\n", - "2 0 0 0 2 False\n", - "3 0 0 0 3 False\n", - "4 0 0 0 4 False\n", - "... ... ... ... ... ...\n", - "7951 33 33 98 1 False\n", - "7952 33 33 99 0 True\n", - "7953 33 33 99 21 False\n", - "7954 33 33 100 3 True\n", - "7955 33 33 100 0 False\n", - "\n", - "[7956 rows x 5 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0, 8, 32, 31, 32, 22, 32, 8, 33, 30, 1, 30, 32, 20, 33, 14, 32, 15, 33, 13, 1, 21, 1, 30, 33, 13, 1, 19, 1, 7, 0, 13, 33, 9, 33, 18, 33, 13, 0, 5, 6, 0, 1, 2, 7, 0, 12, 0, 12, 0, 17, 1, 30, 32, 23, 32, 22, 32, 23, 25, 24, 25, 23, 33, 23, 29, 33, 27, 24, 25, 23, 25, 24, 27, 23, 25, 23, 27, 24, 31, 33, 23, 33, 13, 2, 7, 0, 19, 0, 6, 16, 5, 10, 0, 5, 10, 0, 19, 33, 27, 24]\n", - "[1, 0, 31, 0, 5, 6, 4, 6, 16, 6, 16, 6, 0, 1, 30, 32, 31, 32, 30, 1, 30, 33, 20, 32, 2, 8, 2, 28, 2, 0, 10, 4, 6, 4, 10, 0, 8, 32, 20, 32, 18, 32, 31, 24, 25, 31, 25, 31, 32, 2, 27, 2, 3, 1, 3, 13, 33, 29, 23, 29, 32, 29, 33, 30, 33, 23, 29, 32, 20, 33, 29, 26, 33, 28, 31, 25, 23, 29, 32, 14, 32, 14, 33, 32, 2, 28, 2, 3, 2, 32, 8, 32, 18, 33, 9, 2, 28, 33, 27, 33, 29]\n", - "[2, 0, 19, 33, 31, 32, 30, 1, 3, 0, 21, 1, 7, 0, 19, 1, 3, 2, 27, 2, 13, 2, 27, 33, 23, 33, 29, 32, 33, 20, 32, 20, 33, 32, 33, 31, 25, 23, 27, 33, 9, 33, 30, 8, 30, 8, 0, 19, 33, 15, 32, 29, 23, 25, 24, 27, 33, 27, 2, 28, 31, 24, 25, 24, 27, 24, 25, 31, 33, 27, 24, 31, 32, 2, 32, 15, 33, 23, 25, 23, 25, 31, 0, 3, 0, 5, 0, 11, 0, 4, 6, 4, 10, 0, 11, 0, 2, 9, 33, 28, 31]\n", - "[3, 7, 0, 5, 16, 6, 16, 6, 5, 6, 16, 6, 0, 3, 0, 3, 13, 1, 7, 0, 7, 3, 2, 1, 19, 0, 11, 0, 31, 33, 9, 33, 19, 33, 27, 23, 29, 26, 29, 32, 8, 0, 11, 0, 17, 1, 17, 1, 2, 27, 24, 31, 25, 23, 32, 20, 32, 23, 32, 22, 32, 29, 33, 26, 33, 18, 33, 20, 32, 22, 32, 29, 26, 33, 28, 2, 8, 30, 8, 2, 27, 24, 27, 23, 32, 30, 33, 20, 33, 28, 33, 27, 23, 29, 26, 33, 18, 32, 30, 32, 29]\n", - "[4, 10, 0, 12, 3, 1, 0, 2, 0, 7, 1, 0, 19, 33, 26, 33, 13, 3, 0, 7, 0, 3, 2, 28, 31, 0, 11, 0, 6, 4, 6, 4, 10, 0, 12, 0, 11, 0, 3, 0, 31, 33, 28, 2, 8, 2, 28, 33, 8, 30, 32, 18, 32, 33, 23, 27, 33, 14, 32, 29, 23, 25, 23, 29, 33, 15, 32, 14, 33, 32, 30, 32, 8, 0, 2, 27, 23, 27, 24, 25, 23, 27, 23, 29, 26, 33, 13, 2, 32, 31, 32, 2, 28, 33, 13, 3, 0, 10, 0, 2, 9]\n", - "[5, 6, 0, 10, 5, 10, 0, 5, 0, 11, 0, 4, 6, 16, 5, 0, 21, 1, 0, 13, 0, 1, 17, 0, 31, 32, 20, 33, 14, 33, 28, 33, 20, 32, 20, 32, 15, 32, 14, 33, 29, 26, 29, 32, 18, 32, 2, 9, 33, 26, 33, 26, 33, 26, 29, 23, 27, 33, 31, 32, 8, 30, 1, 0, 2, 13, 0, 10, 5, 6, 0, 31, 24, 31, 33, 31, 0, 2, 1, 19, 1, 17, 0, 4, 0, 17, 1, 17, 1, 13, 3, 0, 10, 0, 5, 6, 4, 0, 17, 0, 19]\n", - "[6, 0, 10, 5, 16, 6, 4, 6, 5, 6, 0, 31, 32, 2, 7, 3, 12, 3, 0, 13, 33, 31, 25, 31, 33, 20, 33, 23, 27, 33, 18, 32, 22, 32, 30, 33, 13, 1, 0, 6, 5, 16, 6, 5, 0, 7, 0, 13, 2, 13, 2, 3, 1, 19, 1, 17, 0, 6, 5, 0, 3, 0, 6, 5, 6, 4, 6, 0, 3, 1, 21, 0, 4, 10, 5, 16, 5, 6, 16, 5, 0, 12, 0, 31, 32, 31, 25, 31, 32, 30, 33, 20, 32, 30, 33, 15, 32, 22, 32, 23, 33]\n", - "[7, 0, 8, 2, 7, 2, 9, 33, 15, 32, 29, 23, 29, 32, 23, 25, 24, 25, 23, 27, 23, 32, 15, 32, 30, 8, 0, 7, 1, 19, 0, 17, 0, 21, 0, 4, 6, 16, 6, 0, 10, 4, 10, 5, 10, 5, 10, 0, 6, 0, 11, 0, 5, 10, 4, 6, 16, 5, 10, 5, 0, 17, 1, 30, 8, 2, 32, 33, 29, 33, 13, 3, 2, 0, 10, 0, 1, 19, 33, 27, 23, 32, 15, 32, 33, 30, 8, 0, 21, 1, 21, 1, 13, 33, 26, 33, 19, 0, 8, 2, 9]\n", - "[8, 32, 33, 26, 29, 32, 15, 32, 20, 32, 31, 24, 27, 33, 27, 23, 25, 24, 25, 24, 25, 31, 24, 31, 24, 25, 23, 32, 29, 33, 31, 25, 24, 27, 23, 27, 2, 3, 7, 2, 32, 29, 33, 27, 33, 23, 33, 23, 27, 33, 23, 25, 24, 25, 31, 32, 14, 33, 31, 28, 31, 33, 15, 33, 27, 23, 29, 33, 20, 33, 28, 33, 13, 3, 2, 27, 24, 31, 0, 19, 33, 14, 33, 31, 0, 1, 19, 33, 32, 33, 28, 33, 13, 0, 8, 32, 8, 32, 22, 32, 8]\n", - "[9, 33, 23, 33, 31, 25, 23, 25, 24, 31, 25, 24, 25, 24, 25, 23, 29, 23, 25, 24, 25, 31, 28, 2, 13, 0, 7, 2, 3, 0, 11, 0, 21, 0, 17, 0, 8, 30, 1, 7, 1, 3, 7, 3, 12, 3, 2, 8, 2, 0, 2, 32, 23, 29, 33, 28, 2, 7, 1, 30, 1, 0, 19, 33, 32, 8, 30, 33, 13, 0, 31, 25, 23, 27, 24, 31, 28, 2, 32, 33, 9, 2, 1, 3, 1, 21, 1, 2, 27, 23, 33, 23, 32, 30, 8, 2, 13, 1, 0, 11, 0]\n", - "[10, 4, 10, 4, 10, 4, 10, 4, 10, 4, 0, 13, 33, 28, 31, 24, 31, 25, 24, 27, 23, 32, 15, 32, 23, 27, 24, 25, 23, 29, 23, 27, 23, 32, 22, 32, 2, 7, 0, 5, 0, 4, 6, 5, 16, 5, 16, 6, 0, 19, 33, 26, 29, 32, 8, 30, 8, 32, 14, 33, 18, 32, 20, 32, 31, 28, 31, 25, 31, 0, 5, 16, 5, 10, 4, 0, 31, 0, 10, 5, 6, 5, 16, 6, 4, 6, 16, 5, 6, 5, 6, 0, 11, 0, 4, 10, 5, 16, 5, 16, 5]\n", - "[11, 0, 2, 9, 33, 27, 24, 27, 2, 7, 1, 21, 0, 5, 0, 1, 19, 33, 20, 32, 23, 29, 26, 29, 33, 18, 33, 13, 0, 5, 0, 6, 16, 5, 6, 4, 6, 0, 5, 10, 0, 11, 0, 4, 6, 5, 0, 31, 25, 31, 24, 25, 24, 25, 24, 27, 33, 15, 33, 30, 1, 3, 12, 0, 8, 30, 33, 29, 26, 29, 33, 31, 25, 31, 33, 23, 25, 23, 32, 8, 32, 2, 9, 33, 9, 2, 32, 15, 33, 14, 32, 8, 30, 33, 29, 32, 2, 32, 33, 9, 2]\n", - "[12, 0, 6, 16, 6, 0, 21, 1, 30, 8, 2, 0, 4, 10, 5, 6, 5, 0, 3, 12, 0, 1, 0, 8, 30, 1, 19, 1, 21, 1, 21, 0, 17, 0, 13, 33, 29, 23, 33, 30, 1, 21, 1, 13, 33, 14, 33, 32, 2, 32, 18, 32, 18, 33, 18, 32, 15, 33, 18, 33, 32, 14, 32, 30, 1, 13, 1, 17, 0, 5, 6, 16, 6, 4, 6, 0, 8, 33, 27, 2, 9, 33, 30, 33, 13, 0, 1, 21, 0, 1, 13, 1, 17, 0, 7, 2, 27, 2, 32, 30, 32]\n", - "[13, 3, 2, 3, 12, 3, 0, 11, 0, 1, 2, 27, 2, 1, 2, 28, 33, 29, 33, 18, 33, 31, 28, 2, 0, 17, 1, 3, 1, 21, 1, 17, 1, 17, 0, 1, 7, 1, 13, 1, 2, 8, 32, 14, 33, 31, 28, 31, 28, 2, 7, 2, 28, 2, 27, 33, 15, 32, 14, 33, 23, 33, 32, 20, 32, 8, 33, 8, 30, 1, 2, 1, 13, 0, 6, 0, 13, 1, 2, 27, 23, 29, 23, 27, 2, 27, 2, 8, 0, 13, 1, 3, 0, 5, 16, 5, 16, 5, 10, 0, 13]\n", - "[14, 32, 8, 30, 32, 33, 23, 29, 32, 33, 8, 30, 33, 27, 24, 27, 33, 8, 0, 31, 24, 27, 24, 31, 25, 23, 32, 33, 19, 33, 19, 1, 17, 0, 5, 16, 6, 0, 8, 30, 32, 23, 25, 31, 28, 31, 0, 4, 6, 16, 6, 4, 10, 5, 10, 0, 7, 0, 19, 1, 19, 1, 2, 8, 2, 8, 30, 8, 0, 5, 16, 5, 0, 17, 0, 17, 1, 21, 0, 19, 1, 30, 33, 31, 24, 25, 24, 31, 28, 2, 13, 2, 9, 33, 19, 1, 3, 13, 0, 11, 0]\n", - "[15, 33, 26, 33, 27, 23, 29, 23, 27, 2, 8, 33, 15, 33, 27, 23, 25, 31, 33, 9, 33, 29, 32, 23, 33, 20, 33, 31, 33, 30, 1, 21, 1, 13, 3, 1, 2, 28, 31, 25, 24, 27, 33, 8, 30, 1, 2, 13, 33, 30, 32, 31, 28, 33, 30, 8, 0, 2, 0, 31, 25, 23, 33, 27, 23, 32, 18, 33, 28, 31, 25, 23, 32, 8, 33, 20, 32, 23, 33, 8, 33, 18, 33, 31, 24, 27, 23, 27, 23, 33, 8, 0, 8, 2, 32, 14, 33, 19, 1, 0, 6]\n", - "[16, 5, 16, 6, 5, 6, 4, 10, 0, 2, 28, 33, 19, 1, 19, 0, 7, 2, 7, 2, 7, 0, 7, 1, 2, 7, 0, 19, 1, 13, 0, 5, 10, 0, 13, 3, 13, 2, 27, 24, 27, 24, 25, 31, 0, 8, 33, 8, 32, 2, 9, 33, 18, 32, 29, 23, 25, 31, 33, 29, 33, 30, 8, 2, 13, 2, 3, 13, 33, 19, 33, 29, 32, 2, 1, 21, 0, 2, 27, 23, 32, 14, 33, 20, 32, 31, 32, 18, 33, 9, 2, 27, 24, 31, 32, 15, 33, 13, 2, 3, 1]\n", - "[17, 0, 10, 0, 7, 0, 12, 3, 12, 3, 2, 0, 19, 33, 29, 23, 29, 23, 33, 26, 33, 20, 33, 9, 2, 1, 19, 0, 8, 30, 8, 32, 22, 32, 15, 32, 20, 33, 32, 23, 27, 2, 27, 24, 27, 23, 29, 23, 33, 29, 32, 15, 32, 14, 33, 32, 22, 32, 15, 32, 23, 29, 33, 19, 1, 21, 1, 2, 13, 1, 7, 1, 0, 31, 25, 24, 27, 23, 33, 30, 8, 32, 14, 33, 31, 0, 6, 16, 6, 16, 6, 16, 5, 16, 5, 10, 4, 6, 16, 5, 0]\n", - "[18, 32, 20, 33, 29, 23, 29, 26, 29, 26, 29, 26, 29, 32, 2, 8, 32, 15, 33, 9, 33, 19, 1, 7, 0, 13, 2, 28, 31, 33, 26, 29, 23, 32, 15, 32, 30, 33, 26, 33, 19, 0, 10, 5, 6, 4, 10, 5, 6, 16, 6, 4, 6, 4, 0, 10, 5, 0, 17, 1, 0, 21, 1, 19, 33, 32, 22, 32, 29, 23, 32, 18, 33, 8, 30, 1, 2, 32, 20, 33, 31, 33, 20, 32, 23, 32, 30, 33, 26, 33, 28, 33, 15, 33, 26, 29, 33, 13, 33, 28, 31]\n", - "[19, 0, 12, 0, 19, 0, 17, 0, 13, 1, 3, 12, 3, 2, 0, 2, 8, 32, 2, 27, 23, 33, 32, 31, 25, 31, 28, 33, 28, 33, 19, 0, 31, 24, 27, 24, 25, 24, 31, 33, 23, 27, 23, 33, 28, 31, 25, 31, 33, 32, 15, 33, 18, 33, 29, 33, 32, 20, 32, 15, 32, 30, 1, 30, 32, 33, 23, 27, 2, 13, 1, 21, 1, 30, 32, 2, 9, 33, 18, 33, 19, 1, 13, 2, 9, 33, 32, 29, 33, 8, 33, 32, 14, 32, 20, 32, 8, 0, 3, 1, 30]\n", - "[20, 33, 8, 32, 23, 25, 31, 33, 8, 30, 32, 2, 8, 33, 14, 32, 8, 30, 1, 17, 0, 5, 16, 5, 0, 17, 0, 3, 12, 0, 19, 1, 17, 0, 13, 0, 6, 5, 16, 5, 0, 31, 24, 27, 24, 31, 33, 30, 1, 7, 1, 19, 1, 21, 1, 17, 0, 3, 12, 3, 13, 2, 0, 11, 0, 2, 32, 18, 33, 9, 33, 13, 0, 6, 16, 6, 0, 31, 25, 23, 32, 15, 33, 32, 31, 28, 2, 0, 11, 0, 8, 30, 32, 29, 32, 22, 32, 29, 23, 25, 31]\n", - "[21, 1, 30, 32, 18, 32, 33, 15, 32, 8, 33, 32, 29, 33, 15, 33, 27, 2, 32, 22, 32, 30, 8, 32, 8, 2, 27, 24, 27, 24, 27, 23, 27, 2, 13, 0, 17, 0, 2, 13, 0, 21, 1, 30, 1, 2, 7, 3, 7, 1, 2, 13, 0, 17, 1, 13, 0, 1, 19, 0, 8, 30, 1, 17, 1, 21, 1, 2, 9, 33, 15, 32, 33, 20, 33, 15, 32, 20, 32, 30, 33, 13, 3, 13, 3, 13, 1, 30, 32, 31, 24, 25, 24, 27, 2, 28, 2, 8, 30, 32, 29]\n", - "[22, 32, 8, 32, 33, 29, 32, 22, 32, 18, 32, 29, 32, 31, 24, 25, 23, 29, 33, 28, 33, 28, 2, 3, 13, 0, 7, 2, 28, 33, 14, 32, 14, 33, 15, 32, 31, 28, 31, 28, 33, 26, 33, 8, 30, 1, 30, 33, 20, 33, 32, 20, 32, 2, 27, 2, 9, 33, 26, 33, 18, 32, 20, 33, 29, 23, 29, 26, 29, 33, 27, 2, 32, 30, 8, 30, 33, 27, 23, 29, 32, 20, 32, 18, 32, 33, 26, 29, 32, 14, 32, 15, 32, 31, 33, 20, 32, 23, 33, 15, 32]\n", - "[23, 25, 23, 32, 29, 26, 33, 9, 2, 1, 17, 1, 21, 1, 7, 0, 21, 1, 30, 32, 15, 33, 9, 33, 9, 33, 27, 24, 25, 23, 33, 27, 33, 32, 14, 32, 22, 32, 31, 28, 31, 25, 31, 32, 14, 33, 27, 23, 25, 24, 27, 24, 31, 25, 24, 25, 23, 27, 2, 27, 23, 32, 18, 33, 19, 0, 1, 7, 0, 10, 4, 10, 5, 16, 6, 16, 6, 5, 6, 16, 5, 10, 5, 16, 5, 6, 4, 10, 4, 10, 5, 16, 5, 6, 5, 10, 0, 2, 1, 19, 1]\n", - "[24, 25, 23, 25, 31, 32, 29, 33, 30, 32, 15, 32, 23, 32, 30, 8, 30, 33, 31, 28, 2, 9, 2, 3, 2, 8, 2, 27, 23, 32, 14, 33, 19, 1, 3, 12, 3, 0, 12, 3, 13, 3, 1, 17, 1, 21, 1, 13, 3, 12, 3, 13, 1, 21, 1, 21, 1, 13, 3, 1, 7, 1, 19, 1, 2, 7, 1, 0, 5, 6, 0, 2, 7, 2, 0, 12, 3, 7, 1, 2, 3, 7, 2, 3, 12, 3, 1, 19, 0, 11, 0, 17, 1, 30, 8, 32, 20, 32, 30, 1, 17]\n", - "[25, 31, 0, 11, 0, 31, 24, 25, 24, 31, 24, 27, 33, 19, 33, 20, 32, 14, 32, 2, 8, 2, 3, 12, 3, 1, 2, 0, 13, 2, 7, 1, 30, 32, 2, 0, 10, 0, 6, 0, 8, 32, 31, 28, 2, 13, 0, 1, 17, 0, 10, 4, 0, 2, 7, 0, 17, 0, 21, 1, 13, 2, 1, 7, 2, 1, 30, 33, 23, 29, 23, 32, 2, 7, 3, 1, 21, 0, 12, 0, 13, 1, 3, 7, 3, 0, 4, 10, 5, 10, 0, 3, 12, 3, 12, 0, 5, 6, 5, 0, 8]\n", - "[26, 29, 23, 27, 24, 25, 24, 25, 31, 0, 2, 9, 33, 8, 32, 22, 32, 22, 32, 33, 32, 14, 33, 26, 33, 23, 29, 26, 33, 27, 33, 28, 31, 24, 31, 0, 11, 0, 13, 33, 27, 2, 9, 2, 1, 0, 2, 13, 33, 19, 33, 15, 33, 15, 32, 22, 32, 23, 25, 24, 25, 24, 31, 0, 31, 0, 13, 2, 0, 3, 0, 3, 2, 27, 24, 25, 24, 25, 24, 31, 32, 33, 32, 29, 33, 9, 2, 32, 22, 32, 14, 32, 8, 33, 14, 33, 26, 33, 31, 28, 33]\n", - "[27, 24, 27, 2, 7, 2, 0, 12, 0, 10, 5, 10, 5, 16, 5, 16, 6, 0, 13, 0, 1, 17, 1, 7, 2, 0, 12, 0, 10, 0, 21, 1, 30, 33, 30, 32, 31, 0, 6, 4, 0, 12, 0, 19, 33, 20, 32, 29, 26, 33, 8, 0, 2, 13, 3, 1, 13, 2, 27, 24, 27, 2, 3, 7, 1, 30, 8, 30, 1, 2, 32, 22, 32, 23, 33, 8, 32, 31, 32, 33, 8, 32, 31, 33, 8, 32, 2, 0, 17, 0, 21, 0, 7, 3, 2, 1, 17, 0, 11, 0, 8]\n", - "[28, 31, 24, 31, 33, 30, 1, 3, 2, 7, 2, 3, 2, 8, 2, 9, 33, 23, 29, 26, 33, 27, 33, 18, 33, 29, 33, 13, 0, 10, 0, 11, 0, 4, 0, 4, 10, 0, 31, 24, 27, 33, 15, 32, 22, 32, 30, 32, 29, 26, 29, 23, 33, 14, 32, 14, 32, 22, 32, 14, 32, 23, 29, 23, 29, 32, 22, 32, 14, 32, 8, 30, 8, 32, 23, 25, 31, 28, 2, 0, 7, 0, 17, 0, 7, 3, 0, 17, 0, 31, 33, 29, 23, 29, 32, 30, 32, 8, 2, 27, 33]\n", - "[29, 26, 29, 33, 19, 33, 15, 32, 8, 2, 1, 2, 27, 23, 25, 23, 27, 23, 32, 2, 27, 33, 31, 32, 31, 0, 2, 27, 23, 27, 2, 3, 0, 2, 3, 0, 3, 13, 33, 19, 0, 4, 0, 12, 0, 3, 7, 0, 6, 0, 17, 0, 13, 2, 27, 2, 7, 2, 27, 23, 29, 33, 27, 33, 20, 33, 31, 28, 31, 25, 31, 24, 25, 24, 31, 0, 31, 32, 31, 24, 31, 28, 2, 27, 2, 1, 0, 5, 6, 0, 13, 0, 6, 0, 19, 33, 27, 2, 0, 13, 33]\n", - "[30, 33, 9, 2, 3, 13, 3, 13, 0, 10, 5, 6, 16, 6, 4, 0, 2, 28, 2, 9, 2, 3, 13, 33, 26, 29, 33, 31, 24, 27, 24, 31, 28, 31, 33, 13, 0, 13, 3, 13, 1, 30, 1, 17, 0, 12, 3, 12, 0, 2, 3, 12, 3, 0, 10, 4, 0, 2, 13, 2, 3, 2, 9, 33, 13, 3, 2, 9, 33, 32, 23, 25, 24, 25, 23, 32, 23, 29, 32, 15, 32, 8, 0, 17, 0, 19, 1, 2, 9, 2, 13, 3, 2, 13, 33, 19, 1, 30, 32, 33, 20]\n", - "[31, 0, 3, 2, 0, 4, 6, 0, 13, 1, 19, 33, 13, 0, 8, 33, 23, 29, 26, 29, 26, 29, 33, 27, 24, 25, 31, 0, 13, 1, 30, 1, 7, 0, 7, 2, 28, 31, 25, 23, 32, 14, 33, 28, 2, 3, 0, 7, 2, 13, 3, 1, 0, 5, 16, 5, 0, 21, 1, 0, 21, 0, 21, 1, 13, 1, 21, 0, 2, 32, 18, 33, 30, 32, 8, 2, 27, 24, 31, 33, 19, 1, 0, 17, 0, 12, 0, 4, 10, 5, 10, 4, 10, 4, 6, 0, 6, 5, 6, 16, 6]\n", - "[32, 22, 32, 29, 26, 33, 20, 33, 9, 2, 13, 3, 0, 5, 16, 6, 16, 6, 5, 10, 5, 6, 5, 16, 6, 0, 2, 9, 2, 9, 33, 20, 33, 15, 33, 30, 8, 30, 33, 18, 33, 14, 33, 28, 2, 13, 33, 20, 32, 33, 30, 33, 23, 25, 24, 27, 23, 33, 23, 27, 33, 19, 33, 18, 32, 29, 32, 23, 27, 24, 31, 33, 18, 33, 9, 2, 32, 23, 29, 33, 31, 28, 33, 27, 23, 29, 23, 32, 22, 32, 33, 26, 29, 33, 28, 33, 27, 24, 27, 2, 7]\n", - "[33, 15, 33, 20, 32, 2, 27, 24, 31, 28, 31, 32, 22, 32, 30, 33, 32, 30, 8, 32, 20, 32, 23, 29, 26, 33, 19, 1, 21, 0, 6, 16, 6, 0, 1, 0, 3, 0, 4, 6, 5, 16, 6, 4, 6, 16, 6, 5, 6, 4, 6, 16, 6, 4, 10, 5, 0, 13, 0, 17, 0, 6, 0, 4, 10, 0, 10, 4, 6, 16, 6, 4, 0, 7, 1, 17, 0, 8, 33, 13, 2, 32, 8, 30, 32, 14, 33, 14, 33, 9, 2, 13, 3, 1, 30, 1, 3, 1, 21, 0, 3]\n" - ] - } - ], - "source": [ - "paths = rw.get_paths(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n", - " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,\n", - " 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,\n", - " 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,\n", - " 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,\n", - " 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,\n", - " 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,\n", - " 98, 99],\n", - " [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,\n", - " 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,\n", - " 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,\n", - " 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,\n", - " 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70,\n", - " 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,\n", - " 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,\n", - " 99, 100]])" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths.dags[0].edge_index" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Directed graph with 154 nodes and 1005 edges\n", - "\n", - "Node attributes\n", - "\tnode_sequences\t\t -> torch.Size([154, 2])\n", - "\n", - "Edge attributes\n", - "\tedge_weight\t\t -> torch.Size([1005])\n", - "\n", - "Graph attributes\n", - "\tnum_nodes\t\t\n", - "\n" - ] - } - ], - "source": [ - "m = pp.MultiOrderModel.from_DAGs(paths, max_order=3)\n", - "g2 = m.layers[2]\n", - "print(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "#print(g_ho)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "#for v in g_ho.nodes:\n", - "# print(v)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "g2= pp.Graph.from_edge_list([\n", - " ['a', 'b'],\n", - " ['b', 'c'],\n", - " ['c', 'd'],\n", - " ['d', 'e'],\n", - " ['e', 'f'],\n", - " ['f', 'g'],\n", - " ['g', 'h'],\n", - " ['h', 'i'],\n", - " ['i', 'j'],\n", - " ['j', 'k'],\n", - " ['k', 'l'],\n", - " ['l', 'm'],\n", - " ['m', 'n'],\n", - " ['n', 'o'],\n", - " ['o', 'a']\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g2.mapping.to_idx('b')" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "rw2 = RandomWalk(g2)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 2/2 [00:00<00:00, 1900.02it/s]\n" - ] - } - ], - "source": [ - "data2 = rw2.run_experiment(steps=20,runs=['a','b'])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
run_idseedtimenodestate
00a0aTrue
10a0bFalse
20a0cFalse
30a0dFalse
40a0eFalse
..................
1051b18dFalse
1061b19fTrue
1071b19eFalse
1081b20gTrue
1091b20fFalse
\n", - "

110 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " run_id seed time node state\n", - "0 0 a 0 a True\n", - "1 0 a 0 b False\n", - "2 0 a 0 c False\n", - "3 0 a 0 d False\n", - "4 0 a 0 e False\n", - ".. ... ... ... ... ...\n", - "105 1 b 18 d False\n", - "106 1 b 19 f True\n", - "107 1 b 19 e False\n", - "108 1 b 20 g True\n", - "109 1 b 20 f False\n", - "\n", - "[110 rows x 5 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data2" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: tensor([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2,\n", - " 3, 4],\n", - " [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3,\n", - " 4, 5]], dtype=torch.int32),\n", - " 1: tensor([[ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3,\n", - " 4, 5],\n", - " [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, 1, 2, 3, 4,\n", - " 5, 6]], dtype=torch.int32)}" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths2 = rw2.get_paths(data2)\n", - "\n", - "paths2.paths" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pp.plot(g2);" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: 'a',\n", - " 1: 'b',\n", - " 2: 'c',\n", - " 3: 'd',\n", - " 4: 'e',\n", - " 5: 'f',\n", - " 6: 'g',\n", - " 7: 'h',\n", - " 8: 'i',\n", - " 9: 'j',\n", - " 10: 'k',\n", - " 11: 'l',\n", - " 12: 'm',\n", - " 13: 'n',\n", - " 14: 'o'}" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g2.mapping.idx_to_id" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "g2_ho = pp.HigherOrderGraph(paths2, order = 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pp.plot(g2_ho,node_label=[g2_ho.mapping.to_id(x) for x in range(g2_ho.n)]);" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Higher Order Random Walk" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "g3 = pp.Graph.from_edge_list([\n", - " ['a','b'],\n", - " ['b','c'],\n", - " ['c','a'],\n", - " ['c','d'],\n", - " ['d','a']\n", - " ])\n", - "\n", - "g3.data['edge_weight'] = torch.tensor([[1],[1],[2],[1],[1]])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pp.plot(g3, node_label= [g3.mapping.to_id(x) for x in range(g3.n)]);" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'a': 0, 'b': 1, 'c': 2, 'd': 3}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g3.mapping.id_to_idx" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "paths = pp.DAGData(g3.mapping)\n", - "paths.append_walk(['a','b','c'],weight=1)\n", - "paths.append_walk(['b','c','a'],weight=1)\n", - "paths.append_walk(['b','c','d'],weight=0.2)\n", - "paths.append_walk(['c','a','b'],weight=1)\n", - "paths.append_walk(['c','d','a'],weight=0.2)\n", - "paths.append_walk(['d','a','b'],weight=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[Data(edge_index=[2, 2], node_sequences=[3, 1], num_nodes=3, weight=1),\n", - " Data(edge_index=[2, 2], node_sequences=[3, 1], num_nodes=3, weight=1),\n", - " Data(edge_index=[2, 2], node_sequences=[3, 1], num_nodes=3, weight=0.20000000298023224),\n", - " Data(edge_index=[2, 2], node_sequences=[3, 1], num_nodes=3, weight=1),\n", - " Data(edge_index=[2, 2], node_sequences=[3, 1], num_nodes=3, weight=0.20000000298023224),\n", - " Data(edge_index=[2, 2], node_sequences=[3, 1], num_nodes=3, weight=1)]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "paths.dags" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "m = pp.MultiOrderModel.from_DAGs(paths, max_order=3)\n", - "g3_ho = m.layers[2]" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "\n", - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "pp.plot(g3_ho, node_label = [g3_ho.mapping.to_id(x) for x in range(g3_ho.n)]);" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "rw = HigherOrderRandomWalk(g3_ho, g3, weight=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 5/5 [00:00<00:00, 511.38it/s]\n" - ] - } - ], - "source": [ - "data = rw.run_experiment(steps=100, runs=list(g3_ho.nodes))" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
run_idseedtimenodestate
00(a, b)0(a, b)True
10(a, b)0(b, c)False
20(a, b)0(c, a)False
30(a, b)0(c, d)False
40(a, b)0(d, a)False
..................
10204(d, a)98(b, c)False
10214(d, a)99(d, a)True
10224(d, a)99(c, d)False
10234(d, a)100(a, b)True
10244(d, a)100(d, a)False
\n", - "

1025 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " run_id seed time node state\n", - "0 0 (a, b) 0 (a, b) True\n", - "1 0 (a, b) 0 (b, c) False\n", - "2 0 (a, b) 0 (c, a) False\n", - "3 0 (a, b) 0 (c, d) False\n", - "4 0 (a, b) 0 (d, a) False\n", - "... ... ... ... ... ...\n", - "1020 4 (d, a) 98 (b, c) False\n", - "1021 4 (d, a) 99 (d, a) True\n", - "1022 4 (d, a) 99 (c, d) False\n", - "1023 4 (d, a) 100 (a, b) True\n", - "1024 4 (d, a) 100 (d, a) False\n", - "\n", - "[1025 rows x 5 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DAGData with 2 dags with total weight 2.0\n" - ] - } - ], - "source": [ - "path = rw.get_paths(data,[0,1])\n", - "print(path)" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "('a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'd',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b',\n", - " 'c',\n", - " 'a',\n", - " 'b')" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "path.get_walk(0)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: 'a', 1: 'b', 2: 'c', 3: 'd'}" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "g3.mapping.idx_to_id" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current node = c\n", - "[0. 1. 1. 0.]\n", - "Current node = a\n", - "[1. 1. 1. 0.]\n", - "Current node = b\n", - "[1. 2. 1. 0.]\n", - "Current node = c\n", - "[1. 2. 2. 0.]\n", - "Current node = a\n", - "[2. 2. 2. 0.]\n", - "Current node = b\n", - "[2. 3. 2. 0.]\n", - "Current node = c\n", - "[2. 3. 3. 0.]\n", - "Current node = a\n", - "[3. 3. 3. 0.]\n", - "Current node = b\n", - "[3. 4. 3. 0.]\n", - "Current node = c\n", - "[3. 4. 4. 0.]\n", - "Current node = a\n", - "[4. 4. 4. 0.]\n", - "Current node = b\n", - "[4. 5. 4. 0.]\n", - "Current node = c\n", - "[4. 5. 5. 0.]\n", - "Current node = d\n", - "[4. 5. 5. 1.]\n", - "Current node = a\n", - "[5. 5. 5. 1.]\n", - "Current node = b\n", - "[5. 6. 5. 1.]\n", - "Current node = c\n", - "[5. 6. 6. 1.]\n", - "Current node = d\n", - "[5. 6. 6. 2.]\n", - "Current node = a\n", - "[6. 6. 6. 2.]\n", - "Current node = b\n", - "[6. 7. 6. 2.]\n", - "Current node = c\n", - "[6. 7. 7. 2.]\n", - "Current node = a\n", - "[7. 7. 7. 2.]\n", - "Current node = b\n", - "[7. 8. 7. 2.]\n", - "Current node = c\n", - "[7. 8. 8. 2.]\n", - "Current node = a\n", - "[8. 8. 8. 2.]\n", - "Current node = b\n", - "[8. 9. 8. 2.]\n", - "Current node = c\n", - "[8. 9. 9. 2.]\n", - "Current node = d\n", - "[8. 9. 9. 3.]\n", - "Current node = a\n", - "[9. 9. 9. 3.]\n", - "Current node = b\n", - "[ 9. 10. 9. 3.]\n", - "Current node = c\n", - "[ 9. 10. 10. 3.]\n", - "Current node = a\n", - "[10. 10. 10. 3.]\n", - "Current node = b\n", - "[10. 11. 10. 3.]\n", - "Current node = c\n", - "[10. 11. 11. 3.]\n", - "Current node = a\n", - "[11. 11. 11. 3.]\n", - "Current node = b\n", - "[11. 12. 11. 3.]\n", - "Current node = c\n", - "[11. 12. 12. 3.]\n", - "Current node = a\n", - "[12. 12. 12. 3.]\n", - "Current node = b\n", - "[12. 13. 12. 3.]\n", - "Current node = c\n", - "[12. 13. 13. 3.]\n", - "Current node = a\n", - "[13. 13. 13. 3.]\n", - "Current node = b\n", - "[13. 14. 13. 3.]\n", - "Current node = c\n", - "[13. 14. 14. 3.]\n", - "Current node = a\n", - "[14. 14. 14. 3.]\n", - "Current node = b\n", - "[14. 15. 14. 3.]\n", - "Current node = c\n", - "[14. 15. 15. 3.]\n", - "Current node = d\n", - "[14. 15. 15. 4.]\n", - "Current node = a\n", - "[15. 15. 15. 4.]\n", - "Current node = b\n", - "[15. 16. 15. 4.]\n", - "Current node = c\n", - "[15. 16. 16. 4.]\n" - ] - } - ], - "source": [ - "for time,_ in rw.simulation_run(steps=50, seed=('a','b')):\n", - " print('Current node = {0}'.format(rw.first_order_node(rw.current_node)))\n", - " print(rw._first_order_visitations)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/mkdocs.yml b/mkdocs.yml index 778162ef7..b0f4900cf 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -26,7 +26,8 @@ nav: - Path Data and Higher-Order Models: tutorial/paths_higher_order.ipynb - Higher-Order Models for Time-Respecting Paths: tutorial/trp_higher_order.ipynb - Causality-Aware GNNs: tutorial/dbgnn.ipynb - - Generative Models for Random Graphs: tutorial/generative_models.ipynb + - Generative Models for Random Graphs: tutorial/generative_models.ipynb + - Implementation Concepts: tutorial/implementation_concepts.ipynb - Develop your own plot Functions: plot_tutorial.md - Code Reference: reference/ # The rest is done automatically by literate-nav - Contributing: diff --git a/src/pathpyG/algorithms/lift_order.py b/src/pathpyG/algorithms/lift_order.py index 2df796077..9ad74cdac 100644 --- a/src/pathpyG/algorithms/lift_order.py +++ b/src/pathpyG/algorithms/lift_order.py @@ -66,7 +66,6 @@ def lift_order_edge_index(edge_index: torch.Tensor, num_nodes: int | None = None # Map outdegree to each destination node to create an edge for each combination # of incoming and outgoing edges for each destination node outdegree_per_dst = outdegree[edge_index[1]] - num_new_edges = outdegree_per_dst.sum() # Create sources of the new higher-order edges ho_edge_srcs = torch.repeat_interleave(outdegree_per_dst) @@ -74,7 +73,7 @@ def lift_order_edge_index(edge_index: torch.Tensor, num_nodes: int | None = None # of all previous nodes in the ordered sequence of nodes ptrs = cumsum(outdegree, dim=0)[:-1] ho_edge_dsts = torch.repeat_interleave(ptrs[edge_index[1]], outdegree_per_dst) - idx_correction = torch.arange(num_new_edges, dtype=torch.long, device=edge_index.device) + idx_correction = torch.arange(ho_edge_srcs.size(0), dtype=torch.long, device=edge_index.device) idx_correction -= cumsum(outdegree_per_dst, dim=0)[ho_edge_srcs] ho_edge_dsts += idx_correction return torch.stack([ho_edge_srcs, ho_edge_dsts], dim=0) diff --git a/src/pathpyG/algorithms/temporal.py b/src/pathpyG/algorithms/temporal.py index 5f2a42146..24d55db79 100644 --- a/src/pathpyG/algorithms/temporal.py +++ b/src/pathpyG/algorithms/temporal.py @@ -1,20 +1,29 @@ """Algorithms for the analysis of time-respecting paths in temporal graphs.""" from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Union, List, Tuple + +from typing import Tuple import numpy as np -from tqdm import tqdm import torch from scipy.sparse.csgraph import dijkstra +from tqdm import tqdm from pathpyG import Graph -from pathpyG.utils import to_numpy from pathpyG.core.temporal_graph import TemporalGraph +from pathpyG.utils import to_numpy def lift_order_temporal(g: TemporalGraph, delta: int = 1): + """Lift a temporal graph to a second-order temporal event graph. + Args: + g: Temporal graph to lift. + delta: Maximum time difference between events to consider them connected. + + Returns: + ho_index: Edge index of the second-order temporal event graph. + """ # first-order edge index edge_index, timestamps = g.data.edge_index, g.data.time @@ -26,7 +35,6 @@ def lift_order_temporal(g: TemporalGraph, delta: int = 1): # lift order: find possible continuations for edges in each time stamp for t in tqdm(unique_t): - # find indices of all source edges that occur at unique timestamp t src_time_mask = timestamps == t src_edge_idx = indices[src_time_mask] @@ -36,7 +44,6 @@ def lift_order_temporal(g: TemporalGraph, delta: int = 1): dst_edge_idx = indices[dst_time_mask] if dst_edge_idx.size(0) > 0 and src_edge_idx.size(0) > 0: - # compute second-order edges between src and dst idx # for all edges where dst in src_edges (edge_index[1, x[:, 0]]) matches src in dst_edges (edge_index[0, x[:, 1]]) x = torch.cartesian_prod(src_edge_idx, dst_edge_idx)