From dc16098f49deea42766a68e5c142c17631bddf7c Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Wed, 6 Mar 2019 19:31:26 -0500 Subject: [PATCH 01/10] Refactored degree_dist to degree_hist, since it computes a histogram. --- pathpy/algorithms/statistics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 6d7de32..66b49a2 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -95,8 +95,8 @@ def mean_degree(network, degree='degree'): return _np.mean([network.nodes[x][degree] for x in network.nodes]) -def degree_dist(network, degree='degree'): - r"""Calculates the (in/out)-degree distribution of a directed or undirected network. +def degree_hist(network, degree='degree'): + r"""Calculates the (in/out)-degree histogram of a directed or undirected network. Parameters ---------- @@ -120,7 +120,7 @@ def degree_moment(network, k, degree='degree'): network: Network The network in which to calculate the k-th moment of the degree distribution """ - p_k = degree_dist(network, degree) + p_k = degree_hist(network, degree) mom = 0 for x in p_k: mom += x**k * p_k[x] @@ -173,7 +173,7 @@ def generating_func(network, x, degree='degree'): assert isinstance(x, (float, list, _np.ndarray)), \ 'Argument can only be float, list or numpy.ndarray' - p_k = degree_dist(network, degree) + p_k = degree_hist(network, degree) if isinstance(x, float): x_range = [x] From 5c56a28a7ebfbd4a78583b05d1aaa4e33825ef87 Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Wed, 6 Mar 2019 19:48:07 -0500 Subject: [PATCH 02/10] Added degree distribution function to statistics.py --- pathpy/algorithms/statistics.py | 64 +++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 66b49a2..6ef687d 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -201,3 +201,67 @@ def molloy_reed_fraction(network, degree='degree'): The network in which to calculate the Molloy-Reed fraction """ return degree_moment(network, k=2, degree=degree)/degree_moment(network, k=1, degree=degree) + + +def get_bins(values, num_bins, log_bins): + min_val = values.min() + max_val = values.max() + + if log_bins: + bins = _np.logspace(_np.log10(min_val), _np.log10(max_val), num_bins+1) + else: + bins = _np.linspace(min_val, max_val, num_bins+1) + + return bins + + +def degree_distribution(network, num_bins=30, degree='degree', log_bins=True, is_pmf=True): + ''' + Take a pathpy.network object and return the degree distribution. + + Parameters + --------- + network: Network + The network to compute the degree distribution + num_bins: int + Number of bins in the histogram + degree: str + Type of degree. Options are degree (total), indegree, outdegree + log_bins: logical + Bin degrees logarithmically or linearly + is_pmf: logical + Compute probability mass function or density + + Returns + ------- + x: np.array + centers of the bins + y: np.array + Heights of the bins + + ''' + assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\ + 'Unknown degree property' + + if degree == 'degree': + degrees = _np.array([attr['indegree']+attr['outdegree'] for _,attr in network.nodes.items()]) + else: + degrees = _np.array([attr[degree] for _,attr in network.nodes.items()]) + + + degrees = degrees[degrees>0] + bins = get_bins(degrees, num_bins, log_bins) + + if is_pmf: + y, _ = _np.histogram(degrees, bins=bins, density=False) + p = y/float(y.sum()) + else: + p, _ = _np.histogram(degrees, bins=bins, density=True) + + x = bins[1:] - _np.diff(bins)/2.0 + + x = x[p>0] + p = p[p>0] + + return x, p + From 1b2fb3878b51e9be2e06dd28ed6a80d11bb91c54 Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Wed, 6 Mar 2019 19:54:58 -0500 Subject: [PATCH 03/10] Documented get_bins function. --- pathpy/algorithms/statistics.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 6ef687d..3169c1e 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -203,7 +203,24 @@ def molloy_reed_fraction(network, degree='degree'): return degree_moment(network, k=2, degree=degree)/degree_moment(network, k=1, degree=degree) -def get_bins(values, num_bins, log_bins): +def get_bins(values, num_bins, log_bins=False): + ''' + Compute (linear or logarithmic) bins for values. + + Parameters + --------- + values: np.array + values to be binned + num_bins: int + number of bins to use + log_bins: logical + If True, use logarithmic bins. Default is linear bins. + + Returns + ------- + bins: np.array + edges of num_bins bins + ''' min_val = values.min() max_val = values.max() From 84526754fb3a3e78a4a94487a130125cc9c65ab7 Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Wed, 6 Mar 2019 20:17:59 -0500 Subject: [PATCH 04/10] Added clustering_by_degree to statistics.py. --- pathpy/algorithms/statistics.py | 60 +++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 3169c1e..5f125fb 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -282,3 +282,63 @@ def degree_distribution(network, num_bins=30, degree='degree', log_bins=True, is return x, p + +def clustering_by_degree(network, num_bins=20, degree='degree', log_bins=False): + ''' + Compute binned clustering by degree. + + Parameters + ---------- + network: pp.Network + Network object + num_bins: int + Number of bins to use + degree: str + Which degree to use for binning + log_bins: logical + If True, use logarithmic bins. Default is linear bins. + + Returns + ------- + x: np.array + Centers of bins + y: np.array + Heights of bins + + ''' + assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\ + 'Unknown degree property' + + if degree == 'degree': + degrees_dict = {node:attr['indegree']+attr['outdegree'] for node, attr in network.nodes.items()} + else: + degrees_dict = {node:attr[degree] for node, attr in network.nodes()} + + ## Get degrees + degrees = _np.array(list(degrees_dict.values())) + degrees = degrees[degrees>0] + + ## Get bins + bins = get_bins(degrees, num_bins, log_bins) + start = bins[:-1] + end = bins[1:] + center = start + (end-start)*0.5 + + cc_k = dict((k,0.0) for k in range(len(center))) + counts = dict((k,0.0) for k in range(len(center))) + + for node, k in degrees_dict.items(): + ## get the bin + index = _np.argmax((k>=start) & (k 0: + x.append(center[index]) + y.append(cc_k[index]/counts[index]) + + x,y = _np.array(x), _np.array(y) + return x,y + From 1c6e4d0f327a81efa9bcaf5e601639ed99195b0c Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Fri, 8 Mar 2019 17:18:37 -0500 Subject: [PATCH 05/10] Updated options for computing distributions based on degree types. --- pathpy/algorithms/statistics.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 5f125fb..f7ecb90 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -257,15 +257,17 @@ def degree_distribution(network, num_bins=30, degree='degree', log_bins=True, is Heights of the bins ''' - assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\ + assert degree in ['degree', 'indegree', 'outdegree', 'inweight', 'outweight', 'weight'],\ 'Unknown degree property' - if degree == 'degree': - degrees = _np.array([attr['indegree']+attr['outdegree'] for _,attr in network.nodes.items()]) + if network.directed: + if degree == 'degree': + degrees = _np.array([attr['indegree']+attr['outdegree'] for _,attr in network.nodes.items()]) + else: + degrees = _np.array([attr[degree] for _,attr in network.nodes.items()]) else: degrees = _np.array([attr[degree] for _,attr in network.nodes.items()]) - degrees = degrees[degrees>0] bins = get_bins(degrees, num_bins, log_bins) @@ -306,13 +308,16 @@ def clustering_by_degree(network, num_bins=20, degree='degree', log_bins=False): Heights of bins ''' - assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\ + assert degree in ['degree', 'indegree', 'outdegree', 'inweight', 'outweight', 'weight'],\ 'Unknown degree property' - if degree == 'degree': - degrees_dict = {node:attr['indegree']+attr['outdegree'] for node, attr in network.nodes.items()} + if network.directed: + if degree == 'degree': + degrees_dict = {node:attr['indegree']+attr['outdegree'] for node, attr in network.nodes.items()} + else: + degrees_dict = {node:attr[degree] for node, attr in network.nodes.items()} else: - degrees_dict = {node:attr[degree] for node, attr in network.nodes()} + degrees_dict = {node:attr[degree] for node, attr in network.nodes.items()} ## Get degrees degrees = _np.array(list(degrees_dict.values())) From 08c31cb8c333e2fd3c7d895c32e23cb25921a68f Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Fri, 8 Mar 2019 17:23:30 -0500 Subject: [PATCH 06/10] Fixed weight options. --- pathpy/algorithms/statistics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index f7ecb90..59dce6d 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -263,6 +263,8 @@ def degree_distribution(network, num_bins=30, degree='degree', log_bins=True, is if network.directed: if degree == 'degree': degrees = _np.array([attr['indegree']+attr['outdegree'] for _,attr in network.nodes.items()]) + elif degree == 'weight': + degrees = _np.array([attr['inweight']+attr['outweight'] for _,attr in network.nodes.items()]) else: degrees = _np.array([attr[degree] for _,attr in network.nodes.items()]) else: @@ -314,6 +316,8 @@ def clustering_by_degree(network, num_bins=20, degree='degree', log_bins=False): if network.directed: if degree == 'degree': degrees_dict = {node:attr['indegree']+attr['outdegree'] for node, attr in network.nodes.items()} + elif degree == 'weight': + degrees_dict = {node:attr['inweight']+attr['outweight'] for node, attr in network.nodes.items()} else: degrees_dict = {node:attr[degree] for node, attr in network.nodes.items()} else: From b7f0cb803d1ce8ddb5b9c3c723d1a5cbfadf3719 Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Tue, 26 Mar 2019 09:33:37 -0400 Subject: [PATCH 07/10] Changed degree_dist and degree_dist_binned to match previous API. --- pathpy/algorithms/statistics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 59dce6d..f6f1a74 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -95,7 +95,7 @@ def mean_degree(network, degree='degree'): return _np.mean([network.nodes[x][degree] for x in network.nodes]) -def degree_hist(network, degree='degree'): +def degree_dist(network, degree='degree'): r"""Calculates the (in/out)-degree histogram of a directed or undirected network. Parameters @@ -120,7 +120,7 @@ def degree_moment(network, k, degree='degree'): network: Network The network in which to calculate the k-th moment of the degree distribution """ - p_k = degree_hist(network, degree) + p_k = degree_dist(network, degree) mom = 0 for x in p_k: mom += x**k * p_k[x] @@ -173,7 +173,7 @@ def generating_func(network, x, degree='degree'): assert isinstance(x, (float, list, _np.ndarray)), \ 'Argument can only be float, list or numpy.ndarray' - p_k = degree_hist(network, degree) + p_k = degree_dist(network, degree) if isinstance(x, float): x_range = [x] @@ -232,7 +232,7 @@ def get_bins(values, num_bins, log_bins=False): return bins -def degree_distribution(network, num_bins=30, degree='degree', log_bins=True, is_pmf=True): +def degree_dist_binned(network, num_bins=30, degree='degree', log_bins=True, is_pmf=True): ''' Take a pathpy.network object and return the degree distribution. From e711d19bf915ba532974c5fe2658fac67c365c98 Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Tue, 26 Mar 2019 13:41:46 -0400 Subject: [PATCH 08/10] Updated clustering by degree api. --- pathpy/algorithms/statistics.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index f6f1a74..356f764 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -287,7 +287,7 @@ def degree_dist_binned(network, num_bins=30, degree='degree', log_bins=True, is_ return x, p -def clustering_by_degree(network, num_bins=20, degree='degree', log_bins=False): +def clustering_by_degree(network, num_bins=20, degree='degree', binned=True, log_bins=False): ''' Compute binned clustering by degree. @@ -325,13 +325,19 @@ def clustering_by_degree(network, num_bins=20, degree='degree', log_bins=False): ## Get degrees degrees = _np.array(list(degrees_dict.values())) - degrees = degrees[degrees>0] - ## Get bins - bins = get_bins(degrees, num_bins, log_bins) - start = bins[:-1] - end = bins[1:] - center = start + (end-start)*0.5 + if binned: + degrees = degrees[degrees>0] + ## Get bins + bins = get_bins(degrees, num_bins, log_bins) + start = bins[:-1] + end = bins[1:] + center = start + (end-start)*0.5 + else: + bins = _np.unique(degrees) + start = bins[:-1] + end = bins[1:] + center = start + (end-start)*0.5 cc_k = dict((k,0.0) for k in range(len(center))) counts = dict((k,0.0) for k in range(len(center))) From 98eff7671baebc5d1ca9b5c9b4c584c6990f6cd8 Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Wed, 27 Mar 2019 09:43:52 -0400 Subject: [PATCH 09/10] Documentation and spacing improvements. --- pathpy/algorithms/statistics.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 356f764..8180921 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -43,9 +43,9 @@ def local_clustering_coefficient(network, v): as zero. For all other nodes, it is defined as: cc(c) := 2*k(i)/(d_i(d_i-1)) - + or - + cc(c) := k(i)/(d_out_i(d_out_i-1)) in undirected and directed networks respectively. @@ -101,7 +101,7 @@ def degree_dist(network, degree='degree'): Parameters ---------- network: Network - The network for which to calculate the degree distribution + The network for which to calculate the degree histogram """ assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\ 'Unknown degree property' @@ -160,7 +160,7 @@ def generating_func(network, x, degree='degree'): >>> n.add_edge('d', 'e') >>> n.add_edge('d', 'f') >>> n.add_edge('e', 'f') - + >>> # print single value f(x) >>> print(pp.statistics.generating_func(n, 0.3)) @@ -207,6 +207,9 @@ def get_bins(values, num_bins, log_bins=False): ''' Compute (linear or logarithmic) bins for values. + NOTE: If log_bins is True, 0s should be removed from values _before_ + calling this function. + Parameters --------- values: np.array @@ -236,6 +239,8 @@ def degree_dist_binned(network, num_bins=30, degree='degree', log_bins=True, is_ ''' Take a pathpy.network object and return the degree distribution. + NOTE: Ignores singleton (degree 0) nodes. + Parameters --------- network: Network @@ -291,16 +296,20 @@ def clustering_by_degree(network, num_bins=20, degree='degree', binned=True, log ''' Compute binned clustering by degree. + NOTE: Ignores singleton (degree 0) nodes. + Parameters ---------- network: pp.Network - Network object + Network (or HigherOrderNetwork) object num_bins: int - Number of bins to use + Number of bins to use. Default 20. degree: str - Which degree to use for binning + Which degree to use for binning. Default is total degree. + binned: logical + If True, bin the distribution. Default is True. log_bins: logical - If True, use logarithmic bins. Default is linear bins. + If True, use logarithmic bins. Ignored when binned=False. Default is linear bins. Returns ------- @@ -325,9 +334,9 @@ def clustering_by_degree(network, num_bins=20, degree='degree', binned=True, log ## Get degrees degrees = _np.array(list(degrees_dict.values())) + degrees = degrees[degrees>0] if binned: - degrees = degrees[degrees>0] ## Get bins bins = get_bins(degrees, num_bins, log_bins) start = bins[:-1] From 2d29f588dc56221f6b925a4176e8b6e9a08351cd Mon Sep 17 00:00:00 2001 From: Tim LaRock Date: Wed, 27 Mar 2019 09:46:13 -0400 Subject: [PATCH 10/10] Documentation update. --- pathpy/algorithms/statistics.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pathpy/algorithms/statistics.py b/pathpy/algorithms/statistics.py index 8180921..567bc7d 100644 --- a/pathpy/algorithms/statistics.py +++ b/pathpy/algorithms/statistics.py @@ -204,8 +204,7 @@ def molloy_reed_fraction(network, degree='degree'): def get_bins(values, num_bins, log_bins=False): - ''' - Compute (linear or logarithmic) bins for values. + r"""Compute (linear or logarithmic) bins for values. NOTE: If log_bins is True, 0s should be removed from values _before_ calling this function. @@ -223,7 +222,7 @@ def get_bins(values, num_bins, log_bins=False): ------- bins: np.array edges of num_bins bins - ''' + """ min_val = values.min() max_val = values.max() @@ -236,8 +235,7 @@ def get_bins(values, num_bins, log_bins=False): def degree_dist_binned(network, num_bins=30, degree='degree', log_bins=True, is_pmf=True): - ''' - Take a pathpy.network object and return the degree distribution. + r"""Take a pathpy.network object and return the degree distribution. NOTE: Ignores singleton (degree 0) nodes. @@ -261,7 +259,7 @@ def degree_dist_binned(network, num_bins=30, degree='degree', log_bins=True, is_ y: np.array Heights of the bins - ''' + """ assert degree in ['degree', 'indegree', 'outdegree', 'inweight', 'outweight', 'weight'],\ 'Unknown degree property' @@ -293,8 +291,7 @@ def degree_dist_binned(network, num_bins=30, degree='degree', log_bins=True, is_ def clustering_by_degree(network, num_bins=20, degree='degree', binned=True, log_bins=False): - ''' - Compute binned clustering by degree. + r"""Compute average local clustering coefficient by degree. NOTE: Ignores singleton (degree 0) nodes. @@ -318,7 +315,7 @@ def clustering_by_degree(network, num_bins=20, degree='degree', binned=True, log y: np.array Heights of bins - ''' + """ assert degree in ['degree', 'indegree', 'outdegree', 'inweight', 'outweight', 'weight'],\ 'Unknown degree property'