From ce230dba7ec01dc4bed42848035c4c8559909f70 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 5 May 2021 17:05:56 +0200 Subject: [PATCH 01/53] Class Point / K-means algorithm --- pyske/core/util/point.py | 51 ++++++++++++ pyske/examples/list/k_means.py | 121 ++++++++++++++++++++++++++++ pyske/examples/list/k_means_main.py | 25 ++++++ pyske/examples/list/util.py | 14 ++++ 4 files changed, 211 insertions(+) create mode 100644 pyske/core/util/point.py create mode 100644 pyske/examples/list/k_means.py create mode 100644 pyske/examples/list/k_means_main.py diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py new file mode 100644 index 0000000..0a5d2fc --- /dev/null +++ b/pyske/core/util/point.py @@ -0,0 +1,51 @@ +""" +A module to represent a point +""" + +from math import sqrt + + +class Point(object): + """A class to represent a point""" + + def __init__(self, x, y): + self.__x = x + self.__y = y + + def __repr__(self): + return "(%s, %s)" % (self.__x, self.__y) + + def __eq__(self, other): + if isinstance(other, Point): + return self.__x == other.x and self.__y == other.__y + return False + + @property + def x(self): + """X getter""" + return self.__x + + @property + def y(self): + """Y getter""" + return self.__y + + def distance(self, other: 'Point'): + """ + Returns the distance from another point. + + Examples:: + + >>> from pyske.core.util.point import Point + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1.distance(p2) + 2.0 + + :param other: a point + :return: distance from other point + + """ + dx = self.__x - other.x + dy = self.__y - other.y + return sqrt(dx ** 2 + dy ** 2) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py new file mode 100644 index 0000000..69dbdbc --- /dev/null +++ b/pyske/examples/list/k_means.py @@ -0,0 +1,121 @@ +""" +K-Means +""" + +from pyske.core.interface import List +from pyske.core.list import SList +import random +import matplotlib.pyplot as plt +from pyske.core.util.point import Point + + +def cluster_index(p, centroids): + """ + Get the centroid index of the closest centroid + """ + min_dist = float("inf") + p_centroid = centroids[0] + for c in centroids: + if p.distance(c) < min_dist: + min_dist = p.distance(c) + p_centroid = c + return centroids.index(p_centroid) + + +def make_clusters(input_list, centroids): + """ + Append all points to the cluster with the minimal distance from its centroid + """ + clusters = [[] for c in centroids] + for p in input_list.to_seq(): + index = cluster_index(p, centroids) + clusters[index].append(p) + return clusters + + +def coords_average(cluster): + """ + Get the coordinates average of all points in one cluster + """ + x_average = sum([p.x for p in cluster]) / len(cluster) + y_average = sum([p.y for p in cluster]) / len(cluster) + return Point(x_average, y_average) + + +def get_new_centroid(cluster): + """ + Get closest point to average of point coordinates + """ + average_point = coords_average(cluster) + min_dist = float("inf") + new_centroid = cluster[0] + for p in cluster: + if p.distance(average_point) < min_dist: + min_dist = p.distance(average_point) + new_centroid = p + return new_centroid + + +def define_centroids(clusters): + """ + Redefine centroids of clusters + """ + centroids = [] + for cluster in clusters: + centroids.append(get_new_centroid(cluster)) + return centroids + + +def k_means_init(input_list: List, n_cluster: int): + """ + K-means++ initialisation + + :param input_list: a list of point + :param n_cluster: number of cluster + + :return: n_cluster centroids + """ + centroids = SList([]) + c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)] + centroids.append(c1) + + for c in range(n_cluster - 1): + dist = input_list.map(lambda x: x.distance(centroids[0])) + for i in range(1, len(centroids)): + temp_dist = input_list.map(lambda x: x.distance(centroids[i])) + dist = dist.map2(lambda x, y: min(x, y), temp_dist) + + index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())] + next_centroid = input_list.to_seq()[index_max[0]] + centroids.append(next_centroid) + + return centroids + + +def k_means(input_list: List, n_cluster: int, max_iter: int = 10): + """ + K-means algorithm on a list of point + + :param input_list: a list of point + :param n_cluster: number of cluster + :param max_iter: number of iteration + + :return: a list of class + """ + centroids = k_means_init(input_list, n_cluster) + j = 0 + while j < max_iter: + clusters = make_clusters(input_list, centroids) + plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()], + c='yellow') + clusters_color = ['green', 'blue', 'black', 'purple', 'brown'] + for i in range(len(clusters)): + plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]], + c=clusters_color[i]) + + centroids = define_centroids(clusters) + plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') + plt.show() + j = j + 1 + + return clusters diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py new file mode 100644 index 0000000..f2b8c0c --- /dev/null +++ b/pyske/examples/list/k_means_main.py @@ -0,0 +1,25 @@ +""" +Execution of k_means +""" +import gc + +from pyske.core import Timing +from pyske.examples.list.k_means import k_means +from pyske.examples.list import util + + +if __name__ == '__main__': + size, num_iter, choice = util.standard_parse_command_line() + pyske_list_class = util.select_pyske_list(choice) + input_list = util.rand_point_list(pyske_list_class, size) + timing = Timing() + execute = util.select_execute(choice) + example = k_means + execute(lambda: print('Version:\t', choice)) + gc.disable() + for iteration in range(1, 1 + num_iter): + timing.start() + result = example(input_list, 5) + timing.stop() + gc.collect() + util.print_experiment(result, timing.get(), execute, iteration) \ No newline at end of file diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 57bed0d..26dbb69 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -89,6 +89,20 @@ def rand_list(cls, size): return cls.init(lambda _: float(random.randint(-100, 100)), size) +def rand_point_list(cls, size): + """ + Return a randomly generated list of points. + + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :return: a list of the given class + """ + from pyske.core.util.point import Point + import random + return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size) + + def print_experiment(result, timing, execute, iteration=None): """ Print the result and timing of the experiment. From 0b33eacce3ceaa0dc1ed32ee8bad7b99b30d75e8 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 6 May 2021 16:33:41 +0200 Subject: [PATCH 02/53] number of clusters in parameters / test on datasets --- pyske/examples/list/k_means.py | 11 ++--------- pyske/examples/list/k_means_main.py | 27 +++++++++++++++++++++++---- pyske/examples/list/util.py | 11 ++++++++--- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 69dbdbc..d500fbb 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -100,22 +100,15 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10): :param n_cluster: number of cluster :param max_iter: number of iteration - :return: a list of class + :return: 2 dimension list of points """ centroids = k_means_init(input_list, n_cluster) j = 0 while j < max_iter: clusters = make_clusters(input_list, centroids) - plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()], - c='yellow') - clusters_color = ['green', 'blue', 'black', 'purple', 'brown'] - for i in range(len(clusters)): - plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]], - c=clusters_color[i]) centroids = define_centroids(clusters) - plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') - plt.show() + # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') j = j + 1 return clusters diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index f2b8c0c..0367361 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -6,12 +6,28 @@ from pyske.core import Timing from pyske.examples.list.k_means import k_means from pyske.examples.list import util +import matplotlib.pyplot as plt +import argparse +PAR = 'parallel' +SEQ = 'sequential' if __name__ == '__main__': - size, num_iter, choice = util.standard_parse_command_line() + + parser = argparse.ArgumentParser() + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000) + parser.add_argument("--iter", help="number of iterations", type=int, default=30) + parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) + parser.add_argument("--clusters", help="number of clusters", type=int, default=3) + + args = parser.parse_args() + size = args.size + num_iter = args.iter + choice = args.data + clusters = args.clusters + pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_list(pyske_list_class, size) + input_list = util.rand_point_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means @@ -19,7 +35,10 @@ gc.disable() for iteration in range(1, 1 + num_iter): timing.start() - result = example(input_list, 5) + result = example(input_list, clusters) timing.stop() gc.collect() - util.print_experiment(result, timing.get(), execute, iteration) \ No newline at end of file + util.print_experiment("", timing.get(), execute, iteration) + for i in range(len(result)): + plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) + plt.show() diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 26dbb69..965067f 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -2,6 +2,8 @@ Utility functions for PySke examples """ +from sklearn.datasets import make_blobs + PAR = 'parallel' SEQ = 'sequential' @@ -89,18 +91,21 @@ def rand_list(cls, size): return cls.init(lambda _: float(random.randint(-100, 100)), size) -def rand_point_list(cls, size): +def rand_point_list(cls, size, clusters): """ Return a randomly generated list of points. :param cls: the class of the generated list. :param size: a positive number Precondition: size >= 0 + :param clusters: number of clusters :return: a list of the given class """ from pyske.core.util.point import Point - import random - return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size) + x, y_true = make_blobs(n_samples=size, centers=clusters) + x = x.tolist() + x = list(map(lambda y: Point(y[0], y[1]), x)) + return cls.from_seq(x) def print_experiment(result, timing, execute, iteration=None): From 8b89af61f688b9b756ccfc0c29747c933ce106e3 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 11 May 2021 09:55:38 +0200 Subject: [PATCH 03/53] radon cc in k_means_init / pylinting --- pyske/examples/list/k_means.py | 29 +++++++++++++++++++---------- pyske/examples/list/k_means_main.py | 6 +++--- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index d500fbb..4b9ed26 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -1,11 +1,9 @@ """ K-Means """ - +import random from pyske.core.interface import List from pyske.core.list import SList -import random -import matplotlib.pyplot as plt from pyske.core.util.point import Point @@ -65,6 +63,17 @@ def define_centroids(clusters): centroids.append(get_new_centroid(cluster)) return centroids +def index_max_value(input_list: List): + """ + Return the index of the maximum value + """ + index_max = 0 + max_dist = 0 + for i in range(len(input_list.to_seq())): + if input_list.to_seq()[i] > max_dist: + max_dist = input_list.to_seq()[i] + index_max = i + return index_max def k_means_init(input_list: List, n_cluster: int): """ @@ -79,14 +88,14 @@ def k_means_init(input_list: List, n_cluster: int): c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)] centroids.append(c1) - for c in range(n_cluster - 1): + for _ in range(n_cluster - 1): dist = input_list.map(lambda x: x.distance(centroids[0])) for i in range(1, len(centroids)): - temp_dist = input_list.map(lambda x: x.distance(centroids[i])) - dist = dist.map2(lambda x, y: min(x, y), temp_dist) + temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index])) + dist = dist.map2(lambda x, y: y if y < x else x, temp_dist) - index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())] - next_centroid = input_list.to_seq()[index_max[0]] + index_max = index_max_value(dist) + next_centroid = input_list.to_seq()[index_max] centroids.append(next_centroid) return centroids @@ -100,13 +109,13 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10): :param n_cluster: number of cluster :param max_iter: number of iteration - :return: 2 dimension list of points + :return: 2 dimensions list of points """ + centroids = k_means_init(input_list, n_cluster) j = 0 while j < max_iter: clusters = make_clusters(input_list, centroids) - centroids = define_centroids(clusters) # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') j = j + 1 diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 0367361..f08a918 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -2,12 +2,12 @@ Execution of k_means """ import gc +import argparse +import matplotlib.pyplot as plt from pyske.core import Timing from pyske.examples.list.k_means import k_means from pyske.examples.list import util -import matplotlib.pyplot as plt -import argparse PAR = 'parallel' SEQ = 'sequential' @@ -15,7 +15,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000) + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) parser.add_argument("--iter", help="number of iterations", type=int, default=30) parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) From 4c7d8727d5ea23062681d352e0dc49987c64bf5a Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 25 May 2021 15:47:05 +0200 Subject: [PATCH 04/53] Default constructor, addition between two points, multiplication by scalar and by a point --- pyske/core/util/point.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py index 0a5d2fc..cb55684 100644 --- a/pyske/core/util/point.py +++ b/pyske/core/util/point.py @@ -8,7 +8,7 @@ class Point(object): """A class to represent a point""" - def __init__(self, x, y): + def __init__(self, x=0, y=0): self.__x = x self.__y = y @@ -20,6 +20,38 @@ def __eq__(self, other): return self.__x == other.x and self.__y == other.__y return False + def __add__(self, other): + """ + Addition of two points + + Examples:: + + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1 + p2 + (10, 12) + """ + if isinstance(other, Point): + return Point(self.x + other.x, self.y + other.y) + + def __mul__(self, other): + """ + Multiplication by a point or a scalar + + Examples:: + + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1 * 5 + (25, 25) + >>> p1 * p2 + (25, 35) + """ + if isinstance(other, Point): + return Point(self.x * other.x, self.y * other.y) + if isinstance(other, int) or isinstance(other, float): + return Point(self.x * other, self.y * other) + @property def x(self): """X getter""" From ee146229998184612db64bc0cb01ae1160fe0b80 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 25 May 2021 16:13:40 +0200 Subject: [PATCH 05/53] parrallel optimization in k_means_init --- pyske/examples/list/k_means.py | 41 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 4b9ed26..d7ab21f 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -2,20 +2,21 @@ K-Means """ import random +from typing import Callable, Tuple from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point import Point -def cluster_index(p, centroids): +def cluster_index(point, centroids): """ Get the centroid index of the closest centroid """ min_dist = float("inf") p_centroid = centroids[0] for c in centroids: - if p.distance(c) < min_dist: - min_dist = p.distance(c) + if point.distance(c) < min_dist: + min_dist = point.distance(c) p_centroid = c return centroids.index(p_centroid) @@ -54,7 +55,7 @@ def get_new_centroid(cluster): return new_centroid -def define_centroids(clusters): +def define_centroids(clusters): # Pas utile car tuple ( num_cluster, point ) """ Redefine centroids of clusters """ @@ -63,17 +64,16 @@ def define_centroids(clusters): centroids.append(get_new_centroid(cluster)) return centroids -def index_max_value(input_list: List): + +def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): """ - Return the index of the maximum value + Return the tuple with the maximum distance """ - index_max = 0 - max_dist = 0 - for i in range(len(input_list.to_seq())): - if input_list.to_seq()[i] > max_dist: - max_dist = input_list.to_seq()[i] - index_max = i - return index_max + if pair_a[1] > pair_b[1]: + return pair_a + else: + return pair_b + def k_means_init(input_list: List, n_cluster: int): """ @@ -94,30 +94,31 @@ def k_means_init(input_list: List, n_cluster: int): temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index])) dist = dist.map2(lambda x, y: y if y < x else x, temp_dist) - index_max = index_max_value(dist) - next_centroid = input_list.to_seq()[index_max] + zip_list = input_list.zip(dist) + next_centroid = zip_list.reduce(max_dist)[0] centroids.append(next_centroid) return centroids -def k_means(input_list: List, n_cluster: int, max_iter: int = 10): +def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10): """ K-means algorithm on a list of point :param input_list: a list of point :param n_cluster: number of cluster :param max_iter: number of iteration + :param init_function: a function that initialize centroids :return: 2 dimensions list of points """ - centroids = k_means_init(input_list, n_cluster) + centroids = init_function(input_list, n_cluster) j = 0 while j < max_iter: - clusters = make_clusters(input_list, centroids) - centroids = define_centroids(clusters) - # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') + clusters = make_clusters(input_list, centroids) # assign_cluster + centroids = define_centroids(clusters) # update_centroids j = j + 1 return clusters From 057457c98096184e89eac1af93d6ec433eba9428 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 26 May 2021 17:04:36 +0200 Subject: [PATCH 06/53] fix: init instead of from_seq --- pyske/examples/list/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 965067f..d6517fa 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -102,10 +102,11 @@ def rand_point_list(cls, size, clusters): :return: a list of the given class """ from pyske.core.util.point import Point + print(clusters) x, y_true = make_blobs(n_samples=size, centers=clusters) x = x.tolist() x = list(map(lambda y: Point(y[0], y[1]), x)) - return cls.from_seq(x) + return cls.init(lambda i: x[i], size) def print_experiment(result, timing, execute, iteration=None): From 87000f9a8ec99b89a26c528e8ba3184838591f38 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 26 May 2021 17:05:11 +0200 Subject: [PATCH 07/53] Division of a point --- pyske/core/util/point.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py index cb55684..b4c43f0 100644 --- a/pyske/core/util/point.py +++ b/pyske/core/util/point.py @@ -52,6 +52,10 @@ def __mul__(self, other): if isinstance(other, int) or isinstance(other, float): return Point(self.x * other, self.y * other) + def __truediv__(self, other): + if isinstance(other, int): + return Point(self.x / other, self.y / other) + @property def x(self): """X getter""" From 0d9b023430548a3f91a804dd060c465f686e68df Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 26 May 2021 17:06:15 +0200 Subject: [PATCH 08/53] parallel optimization, assign_cluster and update_cluster --- pyske/examples/list/k_means.py | 62 +++++++++++------------------ pyske/examples/list/k_means_main.py | 15 ++++--- 2 files changed, 30 insertions(+), 47 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index d7ab21f..89cf643 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -18,51 +18,32 @@ def cluster_index(point, centroids): if point.distance(c) < min_dist: min_dist = point.distance(c) p_centroid = c - return centroids.index(p_centroid) + return point, centroids.index(p_centroid) -def make_clusters(input_list, centroids): +def assign_clusters(input_list, centroids): """ - Append all points to the cluster with the minimal distance from its centroid + Assign to each point to a cluster """ - clusters = [[] for c in centroids] - for p in input_list.to_seq(): - index = cluster_index(p, centroids) - clusters[index].append(p) - return clusters - -def coords_average(cluster): - """ - Get the coordinates average of all points in one cluster - """ - x_average = sum([p.x for p in cluster]) / len(cluster) - y_average = sum([p.y for p in cluster]) / len(cluster) - return Point(x_average, y_average) + return input_list.map(lambda x: cluster_index(x, centroids)) -def get_new_centroid(cluster): +def update_centroids(clusters, centroids): """ - Get closest point to average of point coordinates + Update centroids of clusters """ - average_point = coords_average(cluster) - min_dist = float("inf") - new_centroid = cluster[0] - for p in cluster: - if p.distance(average_point) < min_dist: - min_dist = p.distance(average_point) - new_centroid = p - return new_centroid - - -def define_centroids(clusters): # Pas utile car tuple ( num_cluster, point ) - """ - Redefine centroids of clusters - """ - centroids = [] - for cluster in clusters: - centroids.append(get_new_centroid(cluster)) - return centroids + new_centroids = SList([]) + i = 0 + while i < len(centroids): + cluster = clusters.filter(lambda x: x[1] == i) + sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y) + average_point = sum_cluster / cluster.length() + centroid = clusters.reduce( + lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0] + new_centroids.append(centroid) + i += 1 + return new_centroids def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): @@ -113,12 +94,15 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus :return: 2 dimensions list of points """ - centroids = init_function(input_list, n_cluster) + j = 0 + while j < max_iter: - clusters = make_clusters(input_list, centroids) # assign_cluster - centroids = define_centroids(clusters) # update_centroids + clusters = assign_clusters(input_list, centroids) + + centroids = update_centroids(clusters, centroids) + j = j + 1 return clusters diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index f08a918..0a3b171 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -1,13 +1,13 @@ """ Execution of k_means """ -import gc import argparse import matplotlib.pyplot as plt from pyske.core import Timing -from pyske.examples.list.k_means import k_means +from pyske.examples.list.k_means import k_means, k_means_init from pyske.examples.list import util +from pyske.core.support import parallel PAR = 'parallel' SEQ = 'sequential' @@ -32,13 +32,12 @@ execute = util.select_execute(choice) example = k_means execute(lambda: print('Version:\t', choice)) - gc.disable() for iteration in range(1, 1 + num_iter): timing.start() - result = example(input_list, clusters) + result = example(input_list, k_means_init, clusters) timing.stop() - gc.collect() util.print_experiment("", timing.get(), execute, iteration) - for i in range(len(result)): - plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) - plt.show() + #if parallel.PID == 0: + # for i in range((len(result))): + # plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) + # plt.show() From da4a4d6a5a62b4de6e5434b72c9862e89be67d4d Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 27 May 2021 14:34:55 +0200 Subject: [PATCH 09/53] pylinting, typing --- pyske/examples/list/k_means.py | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 89cf643..f5da8a0 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -3,33 +3,33 @@ """ import random from typing import Callable, Tuple + from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point import Point -def cluster_index(point, centroids): +def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]: """ Get the centroid index of the closest centroid """ min_dist = float("inf") p_centroid = centroids[0] - for c in centroids: - if point.distance(c) < min_dist: - min_dist = point.distance(c) - p_centroid = c + for centroid in centroids: + if point.distance(centroid) < min_dist: + min_dist = point.distance(centroid) + p_centroid = centroid return point, centroids.index(p_centroid) -def assign_clusters(input_list, centroids): +def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]: """ - Assign to each point to a cluster + Assign each point to a cluster """ - return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters, centroids): +def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): """ Update centroids of clusters """ @@ -52,11 +52,10 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): """ if pair_a[1] > pair_b[1]: return pair_a - else: - return pair_b + return pair_b -def k_means_init(input_list: List, n_cluster: int): +def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]: """ K-means++ initialisation @@ -66,8 +65,8 @@ def k_means_init(input_list: List, n_cluster: int): :return: n_cluster centroids """ centroids = SList([]) - c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)] - centroids.append(c1) + first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)] + centroids.append(first_centroid) for _ in range(n_cluster - 1): dist = input_list.map(lambda x: x.distance(centroids[0])) @@ -82,8 +81,8 @@ def k_means_init(input_list: List, n_cluster: int): return centroids -def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10): +def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10) -> SList[SList[Point]]: """ K-means algorithm on a list of point @@ -97,7 +96,6 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus centroids = init_function(input_list, n_cluster) j = 0 - while j < max_iter: clusters = assign_clusters(input_list, centroids) @@ -105,4 +103,9 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus j = j + 1 - return clusters + clusters2d = SList([]) + for i in range(len(centroids)): + clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster) + .map(lambda x: x[0]).to_seq() + ) + return clusters2d From 9f1e0fcead895f6aa292a0e497f56a99506e2306 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 27 May 2021 14:39:37 +0200 Subject: [PATCH 10/53] FIX: bad list initialization parallel list --- pyske/examples/list/util.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index d6517fa..703dfbb 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -102,11 +102,13 @@ def rand_point_list(cls, size, clusters): :return: a list of the given class """ from pyske.core.util.point import Point - print(clusters) - x, y_true = make_blobs(n_samples=size, centers=clusters) + from pyske.core import Distribution + + x, _ = make_blobs(n_samples=size, centers=clusters) x = x.tolist() x = list(map(lambda y: Point(y[0], y[1]), x)) - return cls.init(lambda i: x[i], size) + distr = Distribution().balanced(size) + return cls.from_seq(x).distribute(distr) def print_experiment(result, timing, execute, iteration=None): From a8d0385a486216141b46fb267616ca56fb1d4816 Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Thu, 27 May 2021 20:39:52 +0200 Subject: [PATCH 11/53] Changing sample type from custom type "Point" to Tuple --- pyske/examples/list/k_means.py | 43 ++++++++++++++++++++++------- pyske/examples/list/k_means_main.py | 3 +- pyske/examples/list/util.py | 17 ++++++++++++ 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 89cf643..dbddc8e 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -1,24 +1,47 @@ """ K-Means """ +import operator import random +from math import sqrt from typing import Callable, Tuple from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point import Point -def cluster_index(point, centroids): +def distance2D(sample_1, sample_2): + """ + return distance between 2d sample. + + Examples:: + + >>> from pyske.core.util.point import Point + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1.distance(p2) + 2.0 + + :param other: a point + :return: distance from other point + + """ + dx = sample_1[0] - sample_2[0] + dy = sample_1[1] - sample_2[1] + return sqrt(dx ** 2 + dy ** 2) + + +def cluster_index(sample, centroids): """ Get the centroid index of the closest centroid """ min_dist = float("inf") p_centroid = centroids[0] for c in centroids: - if point.distance(c) < min_dist: - min_dist = point.distance(c) + if distance2D(sample, c) < min_dist: + min_dist = distance2D(sample, c) p_centroid = c - return point, centroids.index(p_centroid) + return sample, centroids.index(p_centroid) def assign_clusters(input_list, centroids): @@ -37,16 +60,16 @@ def update_centroids(clusters, centroids): i = 0 while i < len(centroids): cluster = clusters.filter(lambda x: x[1] == i) - sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y) - average_point = sum_cluster / cluster.length() + sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda a, b: tuple(map(operator.add, a, b))) + average_point = [x/clusters.length() for x in sum_cluster] centroid = clusters.reduce( - lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0] + lambda x, y: x if distance2D(average_point, x[0]) < distance2D(average_point, y[0]) else y)[0] new_centroids.append(centroid) i += 1 return new_centroids -def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): +def max_dist(pair_a, pair_b): """ Return the tuple with the maximum distance """ @@ -70,9 +93,9 @@ def k_means_init(input_list: List, n_cluster: int): centroids.append(c1) for _ in range(n_cluster - 1): - dist = input_list.map(lambda x: x.distance(centroids[0])) + dist = input_list.map(lambda sample: distance2D(sample, centroids[0])) for i in range(1, len(centroids)): - temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index])) + temp_dist = input_list.map(lambda sample, index=i: distance2D(sample, centroids[index])) dist = dist.map2(lambda x, y: y if y < x else x, temp_dist) zip_list = input_list.zip(dist) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 0a3b171..65febba 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,7 +27,8 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_list(pyske_list_class, size, clusters) + #input_list = util.rand_point_list(pyske_list_class, size, clusters) + input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index d6517fa..2a7327b 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -109,6 +109,23 @@ def rand_point_list(cls, size, clusters): return cls.init(lambda i: x[i], size) +def rand_2D_sample_list(cls, size , clusters): + """ + Return a randomly generated list of 2D sample. + + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :param clusters: number of clusters + :return: a list of the given class + """ + print(clusters) + x, y_true = make_blobs(n_samples=size, centers=clusters) + x = x.tolist() + x = list(map(lambda y: (y[0], y[1]), x)) + return cls.init(lambda i: x[i], size) + + def print_experiment(result, timing, execute, iteration=None): """ Print the result and timing of the experiment. From c617aade25a15c196258ad3464930c6ebf67b37d Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:16:39 +0200 Subject: [PATCH 12/53] Add point_interface and changing the class Point to Point_2D --- pyske/core/util/{point.py => point_2D.py} | 35 ++++++++++++----------- pyske/core/util/point_Interface.py | 25 ++++++++++++++++ pyske/examples/list/k_means.py | 16 +++++------ pyske/examples/list/util.py | 4 +-- 4 files changed, 53 insertions(+), 27 deletions(-) rename pyske/core/util/{point.py => point_2D.py} (61%) create mode 100644 pyske/core/util/point_Interface.py diff --git a/pyske/core/util/point.py b/pyske/core/util/point_2D.py similarity index 61% rename from pyske/core/util/point.py rename to pyske/core/util/point_2D.py index b4c43f0..6a0dfd5 100644 --- a/pyske/core/util/point.py +++ b/pyske/core/util/point_2D.py @@ -3,10 +3,11 @@ """ from math import sqrt +from pyske.core.util.point_Interface import Point_Interface -class Point(object): - """A class to represent a point""" +class Point_2D(Point_Interface): + """A class to represent a 2D point""" def __init__(self, x=0, y=0): self.__x = x @@ -16,7 +17,7 @@ def __repr__(self): return "(%s, %s)" % (self.__x, self.__y) def __eq__(self, other): - if isinstance(other, Point): + if isinstance(other, Point_2D): return self.__x == other.x and self.__y == other.__y return False @@ -26,13 +27,13 @@ def __add__(self, other): Examples:: - >>> p1 = Point(5,5) - >>> p2 = Point(5,7) + >>> p1 = Point_2D(5,5) + >>> p2 = Point_2D(5,7) >>> p1 + p2 (10, 12) """ - if isinstance(other, Point): - return Point(self.x + other.x, self.y + other.y) + if isinstance(other, Point_2D): + return Point_2D(self.x + other.x, self.y + other.y) def __mul__(self, other): """ @@ -40,21 +41,21 @@ def __mul__(self, other): Examples:: - >>> p1 = Point(5,5) - >>> p2 = Point(5,7) + >>> p1 = Point_2D(5,5) + >>> p2 = Point_2D(5,7) >>> p1 * 5 (25, 25) >>> p1 * p2 (25, 35) """ - if isinstance(other, Point): - return Point(self.x * other.x, self.y * other.y) + if isinstance(other, Point_2D): + return Point_2D(self.x * other.x, self.y * other.y) if isinstance(other, int) or isinstance(other, float): - return Point(self.x * other, self.y * other) + return Point_2D(self.x * other, self.y * other) def __truediv__(self, other): if isinstance(other, int): - return Point(self.x / other, self.y / other) + return Point_2D(self.x / other, self.y / other) @property def x(self): @@ -66,15 +67,15 @@ def y(self): """Y getter""" return self.__y - def distance(self, other: 'Point'): + def distance(self, other: 'Point_2D'): """ Returns the distance from another point. Examples:: - >>> from pyske.core.util.point import Point - >>> p1 = Point(5,5) - >>> p2 = Point(5,7) + >>> from pyske.core.util.point_2D import Point_2D + >>> p1 = Point_2D(5,5) + >>> p2 = Point_2D(5,7) >>> p1.distance(p2) 2.0 diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py new file mode 100644 index 0000000..aa3be5d --- /dev/null +++ b/pyske/core/util/point_Interface.py @@ -0,0 +1,25 @@ +""" +A module to represent a point +""" + + +class Point_Interface: + """Point interface to represent point of n dimensions""" + + def __repr__(self): + pass + + def __eq__(self, other): + pass + + def __add__(self, other): + pass + + def __mul__(self, other): + pass + + def __truediv__(self, other): + pass + + def distance(self, other): + pass diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index f5da8a0..42af105 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -6,10 +6,10 @@ from pyske.core.interface import List from pyske.core.list import SList -from pyske.core.util.point import Point +from pyske.core.util.point_2D import Point_2D -def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]: +def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]: """ Get the centroid index of the closest centroid """ @@ -22,14 +22,14 @@ def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]: return point, centroids.index(p_centroid) -def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]: +def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]: """ Assign each point to a cluster """ return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): +def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]): """ Update centroids of clusters """ @@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point] return new_centroids -def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): +def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]): """ Return the tuple with the maximum distance """ @@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): return pair_b -def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]: +def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]: """ K-means++ initialisation @@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]: return centroids -def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10) -> SList[SList[Point]]: +def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10) -> SList[SList[Point_2D]]: """ K-means algorithm on a list of point diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 703dfbb..e9e9e3c 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -101,12 +101,12 @@ def rand_point_list(cls, size, clusters): :param clusters: number of clusters :return: a list of the given class """ - from pyske.core.util.point import Point + from pyske.core.util.point_2D import Point_2D from pyske.core import Distribution x, _ = make_blobs(n_samples=size, centers=clusters) x = x.tolist() - x = list(map(lambda y: Point(y[0], y[1]), x)) + x = list(map(lambda y: Point_2D(y[0], y[1]), x)) distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) From ff39d0b502325d502a00fb9dbd26b2c33bbfdf82 Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:22:12 +0200 Subject: [PATCH 13/53] FIX: input_list type form Tuple to Point_2D --- pyske/examples/list/k_means_main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 65febba..0a3b171 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,8 +27,7 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - #input_list = util.rand_point_list(pyske_list_class, size, clusters) - input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters) + input_list = util.rand_point_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means From 32f557ec6cb0fa29af29e5e097b620498d8448ee Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:38:04 +0200 Subject: [PATCH 14/53] Add class point_3D.py --- pyske/core/util/point_2D.py | 4 +- pyske/core/util/point_3D.py | 79 +++++++++++++++++++++++++++++ pyske/examples/list/k_means_main.py | 2 +- pyske/examples/list/util.py | 26 ++++------ 4 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 pyske/core/util/point_3D.py diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py index 6a0dfd5..f6f5f7f 100644 --- a/pyske/core/util/point_2D.py +++ b/pyske/core/util/point_2D.py @@ -1,5 +1,5 @@ """ -A module to represent a point +A module to represent a 2D point """ from math import sqrt @@ -18,7 +18,7 @@ def __repr__(self): def __eq__(self, other): if isinstance(other, Point_2D): - return self.__x == other.x and self.__y == other.__y + return self.__x == other.__x and self.__y == other.__y return False def __add__(self, other): diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py new file mode 100644 index 0000000..9cde20f --- /dev/null +++ b/pyske/core/util/point_3D.py @@ -0,0 +1,79 @@ +""" +A module to represent a 3D point +""" + +from math import sqrt +from pyske.core.util.point_Interface import Point_Interface + + +class Point_3D(Point_Interface): + """A class to represent a 3D point""" + + def __init__(self, x=0, y=0, z=0): + self.__x = x + self.__y = y + self.__z = z + + def __repr__(self): + return "(%s, %s, %s)" % (self.__x, self.__y, self.__z) + + def __eq__(self, other): + if isinstance(other, Point_3D): + return self.__x == other.__x and self.__y == other.__y and self.__z == other.__z + return False + + def __add__(self, other): + """ + Addition of two points + + Examples:: + + >>> p1 = Point_3D(5,5,2) + >>> p2 = Point_3D(5,7,1) + >>> p1 + p2 + (10, 12, 3) + """ + if isinstance(other, Point_3D): + return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z) + + def __mul__(self, other): + pass + + def __truediv__(self, other): + pass + + @property + def x(self): + """X getter""" + return self.__x + + @property + def y(self): + """Y getter""" + return self.__y + + @property + def z(self): + """Z getter""" + return self.z + + def distance(self, other): + """ + Returns the distance from another 3D point. + + Examples:: + + >>> from pyske.core.util.point_2D import Point_2D + >>> p1 = Point_3D(5,5,2) + >>> p2 = Point_3D(5,7,1) + >>> p1.distance(p2) + 2.24 + + :param other: a point + :return: distance from other point + + """ + dx = self.__x - other.x + dy = self.__y - other.y + dz = self.__x - other.z + return sqrt(dx ** 2 + dy ** 2 + dz ** 2) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 0a3b171..b6b20fa 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,7 +27,7 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_list(pyske_list_class, size, clusters) + input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index bdb68a7..648fe9d 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -91,9 +91,9 @@ def rand_list(cls, size): return cls.init(lambda _: float(random.randint(-100, 100)), size) -def rand_point_list(cls, size, clusters): +def rand_point_2D_list(cls, size, clusters): """ - Return a randomly generated list of points. + Return a randomly generated list of 2D points. :param cls: the class of the generated list. :param size: a positive number @@ -110,22 +110,16 @@ def rand_point_list(cls, size, clusters): distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) - -def rand_2D_sample_list(cls, size , clusters): +def rand_point_3D_list(cls, size, clusters): """ - Return a randomly generated list of 2D sample. + Return a randomly generated list of 3D points. - :param cls: the class of the generated list. - :param size: a positive number - Precondition: size >= 0 - :param clusters: number of clusters - :return: a list of the given class - """ - print(clusters) - x, y_true = make_blobs(n_samples=size, centers=clusters) - x = x.tolist() - x = list(map(lambda y: (y[0], y[1]), x)) - return cls.init(lambda i: x[i], size) + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :param clusters: number of clusters + :return: a list of the given class + """ def print_experiment(result, timing, execute, iteration=None): From 7e6966d8534c82393966b83456aa3974822c4c61 Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:42:05 +0200 Subject: [PATCH 15/53] rand_point_2D_list / rand_point_3D_list --- pyske/examples/list/util.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 648fe9d..8124598 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -110,16 +110,25 @@ def rand_point_2D_list(cls, size, clusters): distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) + def rand_point_3D_list(cls, size, clusters): """ - Return a randomly generated list of 3D points. + Return a randomly generated list of 2D points. + + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :param clusters: number of clusters + :return: a list of the given class + """ + from pyske.core.util.point_3D import Point_3D + from pyske.core import Distribution - :param cls: the class of the generated list. - :param size: a positive number - Precondition: size >= 0 - :param clusters: number of clusters - :return: a list of the given class - """ + x, _ = make_blobs(n_samples=size, centers=clusters) + x = x.tolist() + x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x)) + distr = Distribution().balanced(size) + return cls.from_seq(x).distribute(distr) def print_experiment(result, timing, execute, iteration=None): From 9b147eba58a6549ee59202f2ad5060918133674c Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 15:30:48 +0200 Subject: [PATCH 16/53] Point_3D update --- pyske/core/util/point_3D.py | 22 +++++++++++++++++++--- pyske/examples/list/k_means_main.py | 7 +++++-- pyske/examples/list/util.py | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py index 9cde20f..c6e1aa5 100644 --- a/pyske/core/util/point_3D.py +++ b/pyske/core/util/point_3D.py @@ -37,10 +37,26 @@ def __add__(self, other): return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z) def __mul__(self, other): - pass + """ + Multiplication by a point or a scalar + + Examples:: + + >>> p1 = Point_3D(5,5,2) + >>> p2 = Point_3D(5,7,1) + >>> p1 * 5 + (25, 25, 10) + >>> p1 * p2 + (25, 35, 2) + """ + if isinstance(other, Point_3D): + return Point_3D(self.x * other.x, self.y * other.y, self.z * other.z) + if isinstance(other, int) or isinstance(other, float): + return Point_3D(self.x * other, self.y * other, self.z * other.z) def __truediv__(self, other): - pass + if isinstance(other, int): + return Point_3D(self.x / other, self.y / other, self.z / other) @property def x(self): @@ -55,7 +71,7 @@ def y(self): @property def z(self): """Z getter""" - return self.z + return self.__z def distance(self, other): """ diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index b6b20fa..29eb81d 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,7 +27,10 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) + + # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) + input_list = util.rand_point_3D_list(pyske_list_class, size, clusters) + timing = Timing() execute = util.select_execute(choice) example = k_means @@ -37,7 +40,7 @@ result = example(input_list, k_means_init, clusters) timing.stop() util.print_experiment("", timing.get(), execute, iteration) - #if parallel.PID == 0: + # if parallel.PID == 0: # for i in range((len(result))): # plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) # plt.show() diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 8124598..5279386 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -124,7 +124,7 @@ def rand_point_3D_list(cls, size, clusters): from pyske.core.util.point_3D import Point_3D from pyske.core import Distribution - x, _ = make_blobs(n_samples=size, centers=clusters) + x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3) x = x.tolist() x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x)) distr = Distribution().balanced(size) From 5b89f49ec00fb655c9dd5794c5cd75a731b7644f Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 15:43:55 +0200 Subject: [PATCH 17/53] Typing Point_2D -> Point_Interface --- pyske/examples/list/k_means.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 42af105..52b4e2b 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -6,10 +6,10 @@ from pyske.core.interface import List from pyske.core.list import SList -from pyske.core.util.point_2D import Point_2D +from pyske.core.util.point_Interface import Point_Interface -def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]: +def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]: """ Get the centroid index of the closest centroid """ @@ -22,14 +22,14 @@ def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D return point, centroids.index(p_centroid) -def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]: +def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]: """ Assign each point to a cluster """ return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]): +def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]): """ Update centroids of clusters """ @@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Poin return new_centroids -def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]): +def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interface, float]): """ Return the tuple with the maximum distance """ @@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]): return pair_b -def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]: +def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]: """ K-means++ initialisation @@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]: return centroids -def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10) -> SList[SList[Point_2D]]: +def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10) -> SList[SList[Point_Interface]]: """ K-means algorithm on a list of point From eb792d573bd6ccd067c78620e2236a0972c83fc7 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 28 May 2021 17:11:19 +0200 Subject: [PATCH 18/53] optimization update_centroids --- pyske/examples/list/k_means.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index f5da8a0..cc28b11 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -29,20 +29,18 @@ def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tu return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): +def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): """ Update centroids of clusters """ - new_centroids = SList([]) - i = 0 - while i < len(centroids): - cluster = clusters.filter(lambda x: x[1] == i) - sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y) - average_point = sum_cluster / cluster.length() - centroid = clusters.reduce( - lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0] - new_centroids.append(centroid) - i += 1 + + new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids)) + + new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1), + lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else ( + z if y[1] != i else y))) + new_centroids = new_centroids.map(lambda x: x[0] / x[2]) + return new_centroids From 467b33bf8134354324827be18c3c9a96d707935c Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 28 May 2021 17:37:05 +0200 Subject: [PATCH 19/53] refactoring because of new point implementation --- pyske/examples/list/k_means.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 7a47e0b..6d1ac4e 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -34,7 +34,7 @@ def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SLi Update centroids of clusters """ - new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids)) + new_centroids = SList.init(lambda _: (_, _, _), len(centroids)) new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1), lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else ( From 8c2cf82d97f2f2462b60b3b6502326dd72958f25 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 31 May 2021 11:16:25 +0200 Subject: [PATCH 20/53] use of parallelism random choice first centroid --- pyske/examples/list/k_means.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 6d1ac4e..1b224f8 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -7,6 +7,7 @@ from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point_Interface import Point_Interface +from pyske.core.util.par import procs def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]: @@ -63,7 +64,9 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi :return: n_cluster centroids """ centroids = SList([]) - first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)] + first_centroid = input_list.get_partition()\ + .map(lambda l: l[random.randint(0, l.length() - 1)])\ + .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])] centroids.append(first_centroid) for _ in range(n_cluster - 1): From 27a5039d749c16598b4fb8aa8dbbfb5191cc1ddd Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 1 Jun 2021 17:16:47 +0200 Subject: [PATCH 21/53] add point dimensions in k-means-main's options --- pyske/examples/list/k_means_main.py | 6 ++-- pyske/examples/list/util.py | 45 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 29eb81d..163f761 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -19,17 +19,17 @@ parser.add_argument("--iter", help="number of iterations", type=int, default=30) parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) + parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) args = parser.parse_args() size = args.size num_iter = args.iter choice = args.data clusters = args.clusters + dimensions = args.dimensions pyske_list_class = util.select_pyske_list(choice) - - # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) - input_list = util.rand_point_3D_list(pyske_list_class, size, clusters) + input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions) timing = Timing() execute = util.select_execute(choice) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 5279386..ef79d51 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -3,6 +3,7 @@ """ from sklearn.datasets import make_blobs +from pyske.core import Distribution PAR = 'parallel' SEQ = 'sequential' @@ -90,43 +91,39 @@ def rand_list(cls, size): import random return cls.init(lambda _: float(random.randint(-100, 100)), size) - -def rand_point_2D_list(cls, size, clusters): +def select_point_dimensions(dimensions): """ - Return a randomly generated list of 2D points. + Return a PySke list class. - :param cls: the class of the generated list. - :param size: a positive number - Precondition: size >= 0 - :param clusters: number of clusters - :return: a list of the given class + :param dimensions: point dimensions + Precondition: dimensions >= 2 + :return: a Point """ - from pyske.core.util.point_2D import Point_2D - from pyske.core import Distribution - - x, _ = make_blobs(n_samples=size, centers=clusters) - x = x.tolist() - x = list(map(lambda y: Point_2D(y[0], y[1]), x)) - distr = Distribution().balanced(size) - return cls.from_seq(x).distribute(distr) - + # pylint: disable=import-outside-toplevel + if dimensions == 2: + from pyske.core.util.point_2D import Point_2D as PointClass + elif dimensions == 3: + from pyske.core.util.point_3D import Point_3D as PointClass + else: + from pyske.core.util.point_2D import Point_2D as PointClass + return PointClass -def rand_point_3D_list(cls, size, clusters): +def rand_point_list(cls, size, clusters, dimensions): """ - Return a randomly generated list of 2D points. + Return a randomly generated list of points. :param cls: the class of the generated list. :param size: a positive number Precondition: size >= 0 :param clusters: number of clusters + :param dimensions: point dimensions + Precondition: dimensions >= 2 :return: a list of the given class """ - from pyske.core.util.point_3D import Point_3D - from pyske.core import Distribution - - x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3) + x, _ = make_blobs(n_samples=size, centers=clusters, n_features=dimensions) x = x.tolist() - x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x)) + pointclass = select_point_dimensions(dimensions) + x = list(map(lambda y: pointclass(*y), x)) distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) From 84f2daaa554b88110374334e5c98ce5c9421bd15 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 2 Jun 2021 11:42:41 +0200 Subject: [PATCH 22/53] interface convention --- pyske/core/util/point_Interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py index aa3be5d..6196c47 100644 --- a/pyske/core/util/point_Interface.py +++ b/pyske/core/util/point_Interface.py @@ -1,9 +1,9 @@ """ A module to represent a point """ +from abc import ABC - -class Point_Interface: +class Point_Interface(ABC): """Point interface to represent point of n dimensions""" def __repr__(self): From 82b7a7decbd6dcfffa6ca5a6709940948c173a08 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 4 Jun 2021 14:55:56 +0200 Subject: [PATCH 23/53] parallel optimization update_centroids --- pyske/examples/list/k_means.py | 66 +++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 1b224f8..f238ffa 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -10,7 +10,8 @@ from pyske.core.util.par import procs -def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]: +def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> \ + Tuple[Point_Interface, int]: """ Get the centroid index of the closest centroid """ @@ -23,24 +24,37 @@ def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> return point, centroids.index(p_centroid) -def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]: +def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> \ + List[Tuple[Point_Interface, int]]: """ Assign each point to a cluster """ return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]): +def update_centroids(clusters: List[Tuple[Point_Interface, int]], + centroids: SList[Point_Interface]): """ Update centroids of clusters """ - new_centroids = SList.init(lambda _: (_, _, _), len(centroids)) - - new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1), - lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else ( - z if y[1] != i else y))) - new_centroids = new_centroids.map(lambda x: x[0] / x[2]) + def centroids_list_update(list_to_update, item): + if isinstance(item, SList): + list_to_update = list_to_update.map2(lambda a_pair, b_pair: (a_pair[0] + b_pair[0], + a_pair[1] + b_pair[1]), + item) + else: + index = item[1] + point = item[0] + list_to_update[index] = (list_to_update[index][0] + point, + list_to_update[index][1] + 1) + return list_to_update + + point_class = type(centroids[0]) + neutral_list = SList.init(lambda _: (point_class(), 0), len(centroids)) + new_centroids = clusters.reduce(lambda a_item, b_item: + centroids_list_update(a_item, b_item), neutral_list) + new_centroids = new_centroids.map(lambda x: x[0] / x[1]) return new_centroids @@ -58,15 +72,15 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi """ K-means++ initialisation - :param input_list: a list of point - :param n_cluster: number of cluster + :param input_list: a list of points + :param n_cluster: number of clusters - :return: n_cluster centroids + :return: list of centroids """ centroids = SList([]) - first_centroid = input_list.get_partition()\ - .map(lambda l: l[random.randint(0, l.length() - 1)])\ - .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])] + first_centroid = input_list.get_partition() \ + .map(lambda l: l[random.randint(0, l.length() - 1)]) \ + .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])] centroids.append(first_centroid) for _ in range(n_cluster - 1): @@ -82,17 +96,18 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi return centroids -def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10) -> SList[SList[Point_Interface]]: +def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], + n_cluster: int, + max_iter: int = 10) -> List[Tuple[Point_Interface, int]]: """ - K-means algorithm on a list of point + K-means algorithm on a list of points - :param input_list: a list of point - :param n_cluster: number of cluster - :param max_iter: number of iteration + :param input_list: a list of points + :param n_cluster: number of clusters + :param max_iter: number of iterations :param init_function: a function that initialize centroids - :return: 2 dimensions list of points + :return: a list of tuples with the point and his cluster index """ centroids = init_function(input_list, n_cluster) @@ -104,9 +119,4 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in j = j + 1 - clusters2d = SList([]) - for i in range(len(centroids)): - clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster) - .map(lambda x: x[0]).to_seq() - ) - return clusters2d + return clusters From 08a4dd6bde88d90e1329ae6ce4387f5fb35c9bdd Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 4 Jun 2021 15:11:08 +0200 Subject: [PATCH 24/53] adding option to show clusters graph of 2D points --- pyske/examples/list/k_means_main.py | 11 +++++------ pyske/examples/list/util.py | 21 +++++++++++++++++---- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 163f761..41fddc4 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -2,12 +2,10 @@ Execution of k_means """ import argparse -import matplotlib.pyplot as plt from pyske.core import Timing from pyske.examples.list.k_means import k_means, k_means_init from pyske.examples.list import util -from pyske.core.support import parallel PAR = 'parallel' SEQ = 'sequential' @@ -20,6 +18,8 @@ parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) + parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", + action="store_true") args = parser.parse_args() size = args.size @@ -27,6 +27,7 @@ choice = args.data clusters = args.clusters dimensions = args.dimensions + show_clusters = args.show_clusters pyske_list_class = util.select_pyske_list(choice) input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions) @@ -40,7 +41,5 @@ result = example(input_list, k_means_init, clusters) timing.stop() util.print_experiment("", timing.get(), execute, iteration) - # if parallel.PID == 0: - # for i in range((len(result))): - # plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) - # plt.show() + if show_clusters and dimensions == 2: + util.print_2D_result(result.to_seq()) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index ef79d51..e560a16 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -1,9 +1,13 @@ """ Utility functions for PySke examples """ +from typing import Tuple +import matplotlib.pyplot as plt from sklearn.datasets import make_blobs -from pyske.core import Distribution +from pyske.core import Distribution, SList +from pyske.core.support import parallel +from pyske.core.util.point_2D import Point_2D PAR = 'parallel' SEQ = 'sequential' @@ -100,9 +104,7 @@ def select_point_dimensions(dimensions): :return: a Point """ # pylint: disable=import-outside-toplevel - if dimensions == 2: - from pyske.core.util.point_2D import Point_2D as PointClass - elif dimensions == 3: + if dimensions == 3: from pyske.core.util.point_3D import Point_3D as PointClass else: from pyske.core.util.point_2D import Point_2D as PointClass @@ -127,6 +129,17 @@ def rand_point_list(cls, size, clusters, dimensions): distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) +def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]): + """ + Print experiment of 2 dimension points k-means clustering + """ + if parallel.PID == 0: + x = clusters_list.map(lambda pair: pair[0].x) + y = clusters_list.map(lambda pair: pair[0].y) + colors = clusters_list.map(lambda pair: pair[1]) + plt.scatter(x, y, c=colors) + plt.show() + def print_experiment(result, timing, execute, iteration=None): """ From f6f46cd4b3e85fa943a627d868330eef03f5eb90 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 7 Jun 2021 16:05:27 +0200 Subject: [PATCH 25/53] k-means clustering documentation --- docs/api.rst | 60 ++++++++++++++++++++++++++++- docs/conf.py | 10 +++-- pyske/examples/list/k_means.py | 4 +- pyske/examples/list/k_means_main.py | 11 +----- pyske/examples/list/util.py | 17 +++++++- 5 files changed, 84 insertions(+), 18 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 2a9f345..19a69e2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,2 +1,60 @@ PySke API -========= \ No newline at end of file +========= + +Pyske API offer applications implemented with list and tree skeletons. +The user can use the sequential or parallel version. +The parallel version allows a faster execution time when its launched on several processors or computers. + +Dot Product +----------- + +Discrete Fast Fourier Transform +------------------------------- + +K-means Clustering +------------------ + +K-means clustering is an unsupervised algorithm that aims to partition group of points in k clusters. + +K-means function +^^^^^^^^^^^^^^^^ + +.. py:module:: pyske.examples.list.k_means + +.. autofunction:: k_means + +Here the implementation of the 2 dimensions point class. + +.. autoclass:: pyske.core.util.point_2D.Point_2D + :members: + :special-members: + :show-inheritance: + :private-members: + :member-order: bysource + +Initialization functions +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: k_means_init + +Running Example +^^^^^^^^^^^^^^^^^^^^ + +.. argparse:: + :module: pyske.examples.list.util + :func: k_means_parser + :prog: python3 k_means_main.py + + +Maximum Prefix Sum +------------------ + +Maximum Segment Sum +------------------- + +Parallel Regular Sampling Sort +------------------------------ + +Variance Example +---------------- + diff --git a/docs/conf.py b/docs/conf.py index 8e83820..fe8f596 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,9 +10,9 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) +import os +import sys +sys.path.insert(0, os.path.abspath('../.')) # -- Project information ----------------------------------------------------- @@ -31,6 +31,8 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "sphinx.ext.autodoc", + "sphinxarg.ext" ] # Add any paths that contain templates here, relative to this directory. @@ -52,4 +54,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index f238ffa..a1eab32 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -70,7 +70,7 @@ def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interfac def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]: """ - K-means++ initialisation + K-means++ initialization :param input_list: a list of points :param n_cluster: number of clusters @@ -103,9 +103,9 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in K-means algorithm on a list of points :param input_list: a list of points + :param init_function: a function that initialize centroids :param n_cluster: number of clusters :param max_iter: number of iterations - :param init_function: a function that initialize centroids :return: a list of tuples with the point and his cluster index """ diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 41fddc4..3687f0c 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -1,7 +1,6 @@ """ Execution of k_means """ -import argparse from pyske.core import Timing from pyske.examples.list.k_means import k_means, k_means_init @@ -10,16 +9,10 @@ PAR = 'parallel' SEQ = 'sequential' + if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) - parser.add_argument("--iter", help="number of iterations", type=int, default=30) - parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) - parser.add_argument("--clusters", help="number of clusters", type=int, default=3) - parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) - parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", - action="store_true") + parser = util. k_means_parser() args = parser.parse_args() size = args.size diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index e560a16..2e3da87 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -3,6 +3,7 @@ """ from typing import Tuple import matplotlib.pyplot as plt +import argparse from sklearn.datasets import make_blobs from pyske.core import Distribution, SList @@ -26,8 +27,6 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): :param data_arg: (default True) flag to select argument --data :return: (size, iter, ['parallel' | 'sequential']) """ - # pylint: disable=import-outside-toplevel - import argparse parser = argparse.ArgumentParser() if size_arg: parser.add_argument("--size", help="size of the list to generate", @@ -50,6 +49,20 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): return size, num_iter, data_type +def k_means_parser(): + """ + Parse command line for k-means example. + """ + parser = argparse.ArgumentParser() + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) + parser.add_argument("--iter", help="number of iterations", type=int, default=30) + parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) + parser.add_argument("--clusters", help="number of clusters", type=int, default=3) + parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) + parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", + action="store_true") + return parser + def select_pyske_list(choice): """ Return a PySke list class. From 42c750632199f47d51a286655363ba8ce929a3ca Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Tue, 8 Jun 2021 12:18:44 +0200 Subject: [PATCH 26/53] 3d representation for Point_3D clusters --- pyske/examples/list/k_means_main.py | 4 ++++ pyske/examples/list/util.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 3687f0c..93c885a 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -36,3 +36,7 @@ util.print_experiment("", timing.get(), execute, iteration) if show_clusters and dimensions == 2: util.print_2D_result(result.to_seq()) + elif show_clusters and dimensions == 3: + util.print_3D_result(result.to_seq()) + + diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 2e3da87..721f855 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -3,12 +3,14 @@ """ from typing import Tuple import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import axes3d import argparse from sklearn.datasets import make_blobs from pyske.core import Distribution, SList from pyske.core.support import parallel from pyske.core.util.point_2D import Point_2D +from pyske.core.util.point_3D import Point_3D PAR = 'parallel' SEQ = 'sequential' @@ -153,6 +155,26 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]): plt.scatter(x, y, c=colors) plt.show() +def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]): + """ + Print experiment of 3 dimension points k-means clustering + """ + if parallel.PID == 0: + x = clusters_list.map(lambda pair: pair[0].x) + y = clusters_list.map(lambda pair: pair[0].y) + z = clusters_list.map(lambda pair: pair[0].z) + colors = clusters_list.map(lambda pair: pair[1]) + + # Tracé du résultat en 3D + fig = plt.figure() + ax = fig.gca(projection='3d') # Affichage en 3D + ax.scatter(x, y, z, label='Courbe', marker='d') # Tracé des points 3D + plt.title("Points 3D") + ax.set_xlabel('X') + ax.set_ylabel('Y') + ax.set_zlabel('Z') + plt.tight_layout() + plt.show() def print_experiment(result, timing, execute, iteration=None): """ From 810c54b30394e297e2c62ef2a5987b086bbd2d8f Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 12:57:39 +0200 Subject: [PATCH 27/53] error subtraction in distance --- pyske/core/util/point_3D.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py index c6e1aa5..678710d 100644 --- a/pyske/core/util/point_3D.py +++ b/pyske/core/util/point_3D.py @@ -91,5 +91,5 @@ def distance(self, other): """ dx = self.__x - other.x dy = self.__y - other.y - dz = self.__x - other.z + dz = self.__z - other.z return sqrt(dx ** 2 + dy ** 2 + dz ** 2) From 5cba1e966b00bf01344537f1fe4c9e5fc82babd6 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 14:31:27 +0200 Subject: [PATCH 28/53] adding colors 3D graph result, fix warning matplotlib --- pyske/examples/list/k_means_main.py | 12 ++++++------ pyske/examples/list/util.py | 13 +++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 93c885a..c8b7782 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -12,7 +12,7 @@ if __name__ == '__main__': - parser = util. k_means_parser() + parser = util.k_means_parser() args = parser.parse_args() size = args.size @@ -34,9 +34,9 @@ result = example(input_list, k_means_init, clusters) timing.stop() util.print_experiment("", timing.get(), execute, iteration) - if show_clusters and dimensions == 2: - util.print_2D_result(result.to_seq()) - elif show_clusters and dimensions == 3: - util.print_3D_result(result.to_seq()) - + if show_clusters: + if dimensions == 2: + util.print_2D_result(result.to_seq()) + if dimensions == 3: + util.print_3D_result(result.to_seq()) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 721f855..1ddca0f 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -1,10 +1,11 @@ """ Utility functions for PySke examples """ + from typing import Tuple -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import axes3d + import argparse +import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from pyske.core import Distribution, SList @@ -157,8 +158,8 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]): def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]): """ - Print experiment of 3 dimension points k-means clustering - """ + Print experiment of 3 dimension points k-means clustering + """ if parallel.PID == 0: x = clusters_list.map(lambda pair: pair[0].x) y = clusters_list.map(lambda pair: pair[0].y) @@ -167,8 +168,8 @@ def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]): # Tracé du résultat en 3D fig = plt.figure() - ax = fig.gca(projection='3d') # Affichage en 3D - ax.scatter(x, y, z, label='Courbe', marker='d') # Tracé des points 3D + ax = fig.add_subplot(projection='3d') # Affichage en 3D + ax.scatter(x, y, z, label='Courbe', marker='d', c=colors) # Tracé des points 3D plt.title("Points 3D") ax.set_xlabel('X') ax.set_ylabel('Y') From eb16d4c72ed89bec88826fa8125774fb60807dfb Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 15:05:23 +0200 Subject: [PATCH 29/53] adding Point Interface section --- docs/api.rst | 24 +++++++++++++++--------- pyske/core/util/point_2D.py | 3 +++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 19a69e2..864486e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -3,7 +3,7 @@ PySke API Pyske API offer applications implemented with list and tree skeletons. The user can use the sequential or parallel version. -The parallel version allows a faster execution time when its launched on several processors or computers. +The parallel version allows a faster execution time when its launched on several processors, cores or computers. Dot Product ----------- @@ -23,20 +23,26 @@ K-means function .. autofunction:: k_means -Here the implementation of the 2 dimensions point class. +Initialization functions +^^^^^^^^^^^^^^^^^^^^^^^^ + +This is the standard method that initializes the centroids. This method chooses the centroids in order that each point is as far as possible from the other. + +.. autofunction:: k_means_init + + +Point Interface +^^^^^^^^^^^^^^^ + +K-means algorithm takes a list of points in parameters. For now two versions implement this class, one for 2 dimension points and another for 3 dimension points. + +Point 2D class implementation: .. autoclass:: pyske.core.util.point_2D.Point_2D :members: :special-members: - :show-inheritance: - :private-members: :member-order: bysource -Initialization functions -^^^^^^^^^^^^^^^^^^^^^^^^ - -.. autofunction:: k_means_init - Running Example ^^^^^^^^^^^^^^^^^^^^ diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py index f6f5f7f..d0bfeca 100644 --- a/pyske/core/util/point_2D.py +++ b/pyske/core/util/point_2D.py @@ -17,6 +17,9 @@ def __repr__(self): return "(%s, %s)" % (self.__x, self.__y) def __eq__(self, other): + """ + Equality between two points + """ if isinstance(other, Point_2D): return self.__x == other.__x and self.__y == other.__y return False From 529498ef384ae91989a9d88ff4817518f58e4be9 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 15:15:33 +0200 Subject: [PATCH 30/53] change show-clusters display message --- pyske/examples/list/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 1ddca0f..4964c93 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -57,12 +57,12 @@ def k_means_parser(): Parse command line for k-means example. """ parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) + parser.add_argument("--size", help="size of the list to generate", type=int, default=5_000) parser.add_argument("--iter", help="number of iterations", type=int, default=30) parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) - parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", + parser.add_argument("--show-clusters", help="display the clusters graph of 2D or 3D points", action="store_true") return parser From 34ff5ceaff07ecd98292ed0d75ef9e63df368e01 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 15 Jun 2021 15:31:43 +0200 Subject: [PATCH 31/53] array module --- pyske/core/array/__init__.py | 0 pyske/core/array/parray2d.py | 10 ++++++++++ 2 files changed, 10 insertions(+) create mode 100644 pyske/core/array/__init__.py create mode 100644 pyske/core/array/parray2d.py diff --git a/pyske/core/array/__init__.py b/pyske/core/array/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py new file mode 100644 index 0000000..dcb1c5d --- /dev/null +++ b/pyske/core/array/parray2d.py @@ -0,0 +1,10 @@ +""" +A module of parallel arrays and associated skeletons + +class PArray2D: parallel arrays. +""" + +class PArray2D: + """ + Distributed arrays + """ From d072ac2c40afae7c2cb6a132db1ec00ba6d183d3 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 16 Jun 2021 17:01:54 +0200 Subject: [PATCH 32/53] init, str method --- pyske/core/array/parray2d.py | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index dcb1c5d..4d0366b 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -3,8 +3,54 @@ class PArray2D: parallel arrays. """ +from typing import Callable + +from pyske.core.support import parallel as parimpl + +_PID: int = parimpl.PID +_NPROCS: int = parimpl.NPROCS +_COMM = parimpl.COMM + class PArray2D: + # pylint: disable=protected-access """ Distributed arrays """ + + def __init__(self): + self.__global_index = 0 + self.__local_index = 0 + self.__content = [] + self.__distribution = [0 for _ in range(0, _NPROCS)] + + def __str__(self) -> str: + return "PID[" + str(_PID) + "]:\n" + \ + " global_index: " + str(self.__global_index) + "\n" + \ + " local_index: " + str(self.__local_index) + "\n" + \ + " distribution: " + str(self.__distribution) + "\n" + \ + " content: " + str(self.__content) + "\n" + + @staticmethod + def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int = _NPROCS): + assert _NPROCS <= col_size + assert _NPROCS <= line_size + + parray2d = PArray2D() + parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) + + local_line_size = parimpl.local_size(_PID, line_size) + + lines_start_index = local_line_size * _PID + lines_stop_index = lines_start_index + local_line_size - 1 + colums_start_index = 0 + colums_stop_index = col_size - 1 + + parray2d.__local_index = ((lines_start_index, lines_stop_index), + (colums_start_index, colums_stop_index)) + + parray2d.__content = [value_at(i) for i in range(lines_start_index * col_size, + (lines_stop_index + 1) * col_size)] + parray2d.__distribution = [parimpl.local_size(_PID, line_size) * col_size for _ in + range(0, _NPROCS)] + return parray2d From c0203ad99c4f698386d175ff92e4f7be9e7c1370 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 16 Jun 2021 17:02:48 +0200 Subject: [PATCH 33/53] array hello_world --- pyske/examples/array/__init__.py | 0 pyske/examples/array/hello_world.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 pyske/examples/array/__init__.py create mode 100644 pyske/examples/array/hello_world.py diff --git a/pyske/examples/array/__init__.py b/pyske/examples/array/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py new file mode 100644 index 0000000..886e553 --- /dev/null +++ b/pyske/examples/array/hello_world.py @@ -0,0 +1,14 @@ +""" +Example: various manipulations on a parallel array +""" + +from pyske.core.array.parray2d import PArray2D + + +def __main(): + parray2d = PArray2D.init(lambda x: x, 6, 12) + print(parray2d) + + +if __name__ == '__main__': + __main() From 1779397eecc585e0dc6a484f62ea5d11aac7eb16 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 17 Jun 2021 15:12:01 +0200 Subject: [PATCH 34/53] allgather for distribution --- pyske/core/array/parray2d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 4d0366b..d7042dc 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -51,6 +51,6 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int parray2d.__content = [value_at(i) for i in range(lines_start_index * col_size, (lines_stop_index + 1) * col_size)] - parray2d.__distribution = [parimpl.local_size(_PID, line_size) * col_size for _ in - range(0, _NPROCS)] + parray2d.__distribution = _COMM.allgather(parray2d.__local_index) + return parray2d From 42fc4fc45261048b5163442db167592adf8171fd Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 17 Jun 2021 18:56:01 +0200 Subject: [PATCH 35/53] distribution lines to colums --- pyske/core/array/parray2d.py | 49 +++++++++++++++++++++++++++-- pyske/examples/array/hello_world.py | 2 ++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index d7042dc..2a48299 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -40,9 +40,14 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) local_line_size = parimpl.local_size(_PID, line_size) - - lines_start_index = local_line_size * _PID + if _PID == 0: + lines_start_index = local_line_size * _PID + else: + lines_start_index = _COMM.recv(source=_PID - 1) + 1 lines_stop_index = lines_start_index + local_line_size - 1 + if _PID != _NPROCS - 1: + _COMM.send(lines_stop_index, _PID + 1) + colums_start_index = 0 colums_stop_index = col_size - 1 @@ -54,3 +59,43 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int parray2d.__distribution = _COMM.allgather(parray2d.__local_index) return parray2d + + def distribute(self: 'PArray2D') -> 'PArray2D': + parray2d = PArray2D() + parray2d.__global_index = self.__global_index + + col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 + line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 + + local_col_size = parimpl.local_size(_PID, col_size) + if _PID == 0: + colums_start_index = local_col_size * _PID + else: + colums_start_index = _COMM.recv(source=_PID - 1) + 1 + colums_stop_index = colums_start_index + local_col_size - 1 + if _PID != _NPROCS - 1: + _COMM.send(colums_stop_index, _PID + 1) + + lines_start_index = 0 + lines_stop_index = line_size - 1 + + parray2d.__local_index = ((lines_start_index, lines_stop_index), + (colums_start_index, colums_stop_index)) + + parray2d.__distribution = _COMM.allgather(parray2d.__local_index) + + # update content for each process + for i in range(0, _NPROCS): + content_to_send = [] + for j in range(len(self.__content)): + if j % col_size in range(parray2d.__distribution[i][1][0], + parray2d.__distribution[i][1][1] + 1): + content_to_send.append(self.__content[j]) + if i == _PID: + parray2d.__content = _COMM.gather(content_to_send, i) + # flatten the list + parray2d.__content = [item for items in parray2d.__content for item in items] + else: + _COMM.gather(content_to_send, i) + + return parray2d diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 886e553..66cabec 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -7,6 +7,8 @@ def __main(): parray2d = PArray2D.init(lambda x: x, 6, 12) + #print(parray2d) + parray2d = parray2d.distribute() print(parray2d) From 2acc26a07d4250d65023d911866ab13776b98197 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 21 Jun 2021 11:27:13 +0200 Subject: [PATCH 36/53] use of enum for distribution choice --- pyske/core/array/parray2d.py | 72 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 2a48299..0e8e517 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -4,6 +4,7 @@ class PArray2D: parallel arrays. """ from typing import Callable +from enum import Enum from pyske.core.support import parallel as parimpl @@ -11,6 +12,34 @@ class PArray2D: parallel arrays. _NPROCS: int = parimpl.NPROCS _COMM = parimpl.COMM +class Distribution(Enum): + LINE = 'LINE' + COLUMN = 'COLUMN' + + +def _local_index(distribution: str, col_size: int, line_size: int): + if distribution == Distribution.LINE: + local_size = parimpl.local_size(_PID, line_size) + b_start_index = 0 + b_stop_index = col_size - 1 + else: + local_size = parimpl.local_size(_PID, col_size) + b_start_index = 0 + b_stop_index = line_size - 1 + + if _PID == 0: + a_start_index = local_size * _PID + else: + a_start_index = _COMM.recv(source=_PID - 1) + 1 + a_stop_index = a_start_index + local_size - 1 + if _PID != _NPROCS - 1: + _COMM.send(a_stop_index, _PID + 1) + + if distribution == Distribution.LINE: + return (a_start_index, a_stop_index), (b_start_index, b_stop_index) + else: + return (b_start_index, b_stop_index), (a_start_index, a_stop_index) + class PArray2D: # pylint: disable=protected-access @@ -19,10 +48,11 @@ class PArray2D: """ def __init__(self): - self.__global_index = 0 - self.__local_index = 0 + self.__global_index = ((-1, -1), (-1, -1)) + self.__local_index = ((-1, -1), (-1, -1)) self.__content = [] self.__distribution = [0 for _ in range(0, _NPROCS)] + self.__distribution_direction = Distribution.LINE def __str__(self) -> str: return "PID[" + str(_PID) + "]:\n" + \ @@ -39,50 +69,28 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int parray2d = PArray2D() parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) - local_line_size = parimpl.local_size(_PID, line_size) - if _PID == 0: - lines_start_index = local_line_size * _PID - else: - lines_start_index = _COMM.recv(source=_PID - 1) + 1 - lines_stop_index = lines_start_index + local_line_size - 1 - if _PID != _NPROCS - 1: - _COMM.send(lines_stop_index, _PID + 1) + parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size) - colums_start_index = 0 - colums_stop_index = col_size - 1 - - parray2d.__local_index = ((lines_start_index, lines_stop_index), - (colums_start_index, colums_stop_index)) - - parray2d.__content = [value_at(i) for i in range(lines_start_index * col_size, - (lines_stop_index + 1) * col_size)] + parray2d.__content = [value_at(i) for i in range(parray2d.__local_index[0][0] * col_size, + (parray2d.__local_index[0][1] + 1) * col_size)] parray2d.__distribution = _COMM.allgather(parray2d.__local_index) return parray2d def distribute(self: 'PArray2D') -> 'PArray2D': + """ + Distribute line to column + """ parray2d = PArray2D() parray2d.__global_index = self.__global_index col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 - local_col_size = parimpl.local_size(_PID, col_size) - if _PID == 0: - colums_start_index = local_col_size * _PID - else: - colums_start_index = _COMM.recv(source=_PID - 1) + 1 - colums_stop_index = colums_start_index + local_col_size - 1 - if _PID != _NPROCS - 1: - _COMM.send(colums_stop_index, _PID + 1) - - lines_start_index = 0 - lines_stop_index = line_size - 1 - - parray2d.__local_index = ((lines_start_index, lines_stop_index), - (colums_start_index, colums_stop_index)) + parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size) parray2d.__distribution = _COMM.allgather(parray2d.__local_index) + parray2d.__distribution_direction = Distribution.COLUMN # update content for each process for i in range(0, _NPROCS): From 3d219178a50c47e57fb2214cdf801bd3227f1233 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 21 Jun 2021 14:27:26 +0200 Subject: [PATCH 37/53] changes in local_index --- pyske/core/array/parray2d.py | 48 ++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 0e8e517..b4814b8 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -6,6 +6,7 @@ class PArray2D: parallel arrays. from typing import Callable from enum import Enum +from pyske.core import SList from pyske.core.support import parallel as parimpl _PID: int = parimpl.PID @@ -17,28 +18,17 @@ class Distribution(Enum): COLUMN = 'COLUMN' -def _local_index(distribution: str, col_size: int, line_size: int): +def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int): + local_sizes = SList([]) + for i in range(_NPROCS): + if distribution == Distribution.LINE: + local_sizes.append(parimpl.local_size(i, line_size)) + else: + local_sizes.append(parimpl.local_size(i, col_size)) + start_indexes = local_sizes.scanl(lambda x, y: x + y, 0) if distribution == Distribution.LINE: - local_size = parimpl.local_size(_PID, line_size) - b_start_index = 0 - b_stop_index = col_size - 1 - else: - local_size = parimpl.local_size(_PID, col_size) - b_start_index = 0 - b_stop_index = line_size - 1 - - if _PID == 0: - a_start_index = local_size * _PID - else: - a_start_index = _COMM.recv(source=_PID - 1) + 1 - a_stop_index = a_start_index + local_size - 1 - if _PID != _NPROCS - 1: - _COMM.send(a_stop_index, _PID + 1) - - if distribution == Distribution.LINE: - return (a_start_index, a_stop_index), (b_start_index, b_stop_index) - else: - return (b_start_index, b_stop_index), (a_start_index, a_stop_index) + return (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1), (0, col_size - 1) + return (0, line_size - 1), (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1) class PArray2D: @@ -69,11 +59,14 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int parray2d = PArray2D() parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) - parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size) + parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size, _PID) parray2d.__content = [value_at(i) for i in range(parray2d.__local_index[0][0] * col_size, - (parray2d.__local_index[0][1] + 1) * col_size)] - parray2d.__distribution = _COMM.allgather(parray2d.__local_index) + (parray2d.__local_index[0][ + 1] + 1) * col_size)] + parray2d.__distribution = [ + _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in + range(_NPROCS)] return parray2d @@ -87,10 +80,11 @@ def distribute(self: 'PArray2D') -> 'PArray2D': col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 - parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size) - - parray2d.__distribution = _COMM.allgather(parray2d.__local_index) + parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID) parray2d.__distribution_direction = Distribution.COLUMN + parray2d.__distribution = [ + _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in + range(_NPROCS)] # update content for each process for i in range(0, _NPROCS): From 724b88f09b40a535ed715a1c6ff8bf9ea9c991ce Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 21 Jun 2021 15:29:44 +0200 Subject: [PATCH 38/53] callable init function with line and column parameters --- pyske/core/array/parray2d.py | 9 +++++---- pyske/examples/array/hello_world.py | 4 +++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index b4814b8..bc9e776 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -52,7 +52,7 @@ def __str__(self) -> str: " content: " + str(self.__content) + "\n" @staticmethod - def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int = _NPROCS): + def init(value_at: Callable[[int, int], int], col_size: int = _NPROCS, line_size: int = _NPROCS): assert _NPROCS <= col_size assert _NPROCS <= line_size @@ -61,9 +61,10 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size, _PID) - parray2d.__content = [value_at(i) for i in range(parray2d.__local_index[0][0] * col_size, - (parray2d.__local_index[0][ - 1] + 1) * col_size)] + for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1): + for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1): + parray2d.__content.append(value_at(line, column)) + parray2d.__distribution = [ _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in range(_NPROCS)] diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 66cabec..d599ce0 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -6,7 +6,9 @@ def __main(): - parray2d = PArray2D.init(lambda x: x, 6, 12) + col_size = 6 + line_size = 12 + parray2d = PArray2D.init(lambda line, column: line * col_size + column, col_size, line_size) #print(parray2d) parray2d = parray2d.distribute() print(parray2d) From 84cc587c9296a17844c45ac20e27ae55da034d90 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 21 Jun 2021 18:18:03 +0200 Subject: [PATCH 39/53] init column and line distribution --- pyske/core/array/parray2d.py | 40 +++++++++++++++++++++++++++-- pyske/examples/array/hello_world.py | 16 +++++++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index bc9e776..eee6a5c 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -17,7 +17,6 @@ class Distribution(Enum): LINE = 'LINE' COLUMN = 'COLUMN' - def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int): local_sizes = SList([]) for i in range(_NPROCS): @@ -52,7 +51,15 @@ def __str__(self) -> str: " content: " + str(self.__content) + "\n" @staticmethod - def init(value_at: Callable[[int, int], int], col_size: int = _NPROCS, line_size: int = _NPROCS): + def init_line(value_at: Callable[[int, int], int], col_size: int = _NPROCS, + line_size: int = _NPROCS): + """ + Return an array built using a function per line on each processor + + :param value_at: binary function + :return: an 2d array of the given line and column size, where for all valid line column + i, j, the value at this index is value_at(i, j) + """ assert _NPROCS <= col_size assert _NPROCS <= line_size @@ -71,6 +78,35 @@ def init(value_at: Callable[[int, int], int], col_size: int = _NPROCS, line_size return parray2d + @staticmethod + def init_column(value_at: Callable[[int, int], int], col_size: int = _NPROCS, + line_size: int = _NPROCS): + """ + Return an array built using a function per column on each processor + + :param value_at: binary function + :return: an 2d array of the given line and column size, where for all valid line column + i, j, the value at this index is value_at(i, j) + """ + assert _NPROCS <= col_size + assert _NPROCS <= line_size + + parray2d = PArray2D() + parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) + + parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID) + + for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1): + for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1): + parray2d.__content.append(value_at(line, column)) + + parray2d.__distribution_direction = Distribution.COLUMN + parray2d.__distribution = [ + _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in + range(_NPROCS)] + + return parray2d + def distribute(self: 'PArray2D') -> 'PArray2D': """ Distribute line to column diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index d599ce0..6064f00 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -8,10 +8,18 @@ def __main(): col_size = 6 line_size = 12 - parray2d = PArray2D.init(lambda line, column: line * col_size + column, col_size, line_size) - #print(parray2d) - parray2d = parray2d.distribute() - print(parray2d) + + print("Line initialization") + parray2d_line_init = PArray2D.init_line(lambda line, column: line * col_size + column, col_size, line_size) + print(parray2d_line_init) + + print("Line to column distribution") + parray2d_column = parray2d_line_init.distribute() + print(parray2d_column) + + print("Column initialization") + parray2d_column_init = PArray2D.init_column(lambda line, column: line * col_size + column, col_size, line_size) + print(parray2d_column_init) if __name__ == '__main__': From efcd451188a9fc2cffa4ed6c310c995cdb4a3797 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 22 Jun 2021 10:34:56 +0200 Subject: [PATCH 40/53] map function, merge of init function, generic type --- pyske/core/array/parray2d.py | 66 ++++++++++++----------------- pyske/examples/array/hello_world.py | 6 +-- 2 files changed, 30 insertions(+), 42 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index eee6a5c..9f91457 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -3,7 +3,7 @@ class PArray2D: parallel arrays. """ -from typing import Callable +from typing import Callable, TypeVar, Generic from enum import Enum from pyske.core import SList @@ -13,6 +13,9 @@ class PArray2D: parallel arrays. _NPROCS: int = parimpl.NPROCS _COMM = parimpl.COMM +T = TypeVar('T') # pylint: disable=invalid-name +V = TypeVar('V') # pylint: disable=invalid-name + class Distribution(Enum): LINE = 'LINE' COLUMN = 'COLUMN' @@ -30,20 +33,20 @@ def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int): return (0, line_size - 1), (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1) -class PArray2D: +class PArray2D(Generic[T]): # pylint: disable=protected-access """ Distributed arrays """ - def __init__(self): + def __init__(self: 'PArray2D[T]'): self.__global_index = ((-1, -1), (-1, -1)) self.__local_index = ((-1, -1), (-1, -1)) self.__content = [] - self.__distribution = [0 for _ in range(0, _NPROCS)] + self.__distribution = [((-1, -1), (-1, -1)) for _ in range(0, _NPROCS)] self.__distribution_direction = Distribution.LINE - def __str__(self) -> str: + def __str__(self: 'PArray2D[T]') -> str: return "PID[" + str(_PID) + "]:\n" + \ " global_index: " + str(self.__global_index) + "\n" + \ " local_index: " + str(self.__local_index) + "\n" + \ @@ -51,12 +54,16 @@ def __str__(self) -> str: " content: " + str(self.__content) + "\n" @staticmethod - def init_line(value_at: Callable[[int, int], int], col_size: int = _NPROCS, - line_size: int = _NPROCS): + def init(value_at: Callable[[int, int], V], distribution: Distribution, + col_size: int = _NPROCS, + line_size: int = _NPROCS) -> 'PArray2D[V]': """ Return an array built using a function per line on each processor :param value_at: binary function + :param distribution: the distribution direction (LINE, COLUMN) + :param col_size: number of columns + :param line_size: number of lines :return: an 2d array of the given line and column size, where for all valid line column i, j, the value at this index is value_at(i, j) """ @@ -66,48 +73,19 @@ def init_line(value_at: Callable[[int, int], int], col_size: int = _NPROCS, parray2d = PArray2D() parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) - parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size, _PID) - - for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1): - for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1): - parray2d.__content.append(value_at(line, column)) - - parray2d.__distribution = [ - _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in - range(_NPROCS)] - - return parray2d - - @staticmethod - def init_column(value_at: Callable[[int, int], int], col_size: int = _NPROCS, - line_size: int = _NPROCS): - """ - Return an array built using a function per column on each processor - - :param value_at: binary function - :return: an 2d array of the given line and column size, where for all valid line column - i, j, the value at this index is value_at(i, j) - """ - assert _NPROCS <= col_size - assert _NPROCS <= line_size - - parray2d = PArray2D() - parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1)) - - parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID) + parray2d.__local_index = _local_index(distribution, col_size, line_size, _PID) for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1): for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1): parray2d.__content.append(value_at(line, column)) - - parray2d.__distribution_direction = Distribution.COLUMN + parray2d.__distribution_direction = Distribution.LINE parray2d.__distribution = [ _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in range(_NPROCS)] return parray2d - def distribute(self: 'PArray2D') -> 'PArray2D': + def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': """ Distribute line to column """ @@ -138,3 +116,13 @@ def distribute(self: 'PArray2D') -> 'PArray2D': _COMM.gather(content_to_send, i) return parray2d + + def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]': + """ + Apply a function to all the elements. + + The returned array has the same shape (same size, same distribution) + than the initial array. + """ + self.__content = [unary_op(elem) for elem in self.__content] + return self diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 6064f00..270c55a 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -2,7 +2,7 @@ Example: various manipulations on a parallel array """ -from pyske.core.array.parray2d import PArray2D +from pyske.core.array.parray2d import PArray2D, Distribution def __main(): @@ -10,7 +10,7 @@ def __main(): line_size = 12 print("Line initialization") - parray2d_line_init = PArray2D.init_line(lambda line, column: line * col_size + column, col_size, line_size) + parray2d_line_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.LINE, col_size, line_size) print(parray2d_line_init) print("Line to column distribution") @@ -18,7 +18,7 @@ def __main(): print(parray2d_column) print("Column initialization") - parray2d_column_init = PArray2D.init_column(lambda line, column: line * col_size + column, col_size, line_size) + parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.COLUMN, col_size, line_size) print(parray2d_column_init) From f318b51e9f46f2b442dcaeefd126178fc1d47c1a Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 22 Jun 2021 11:55:21 +0200 Subject: [PATCH 41/53] reduce function --- pyske/core/array/parray2d.py | 27 ++++++++++++++++++++++++++- pyske/examples/array/hello_world.py | 11 +++++++++-- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 9f91457..226b7ae 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -3,7 +3,8 @@ class PArray2D: parallel arrays. """ -from typing import Callable, TypeVar, Generic +import functools +from typing import Callable, TypeVar, Generic, Optional from enum import Enum from pyske.core import SList @@ -16,10 +17,12 @@ class PArray2D: parallel arrays. T = TypeVar('T') # pylint: disable=invalid-name V = TypeVar('V') # pylint: disable=invalid-name + class Distribution(Enum): LINE = 'LINE' COLUMN = 'COLUMN' + def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int): local_sizes = SList([]) for i in range(_NPROCS): @@ -126,3 +129,25 @@ def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]': """ self.__content = [unary_op(elem) for elem in self.__content] return self + + def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], + neutral: Optional[T] = None) -> T: + """ + Reduce an array of value to one value. + + :param binary_op: a binary associative and commutative operation + :param neutral: (optional): + a value that should be a neutral element for the operation, + i.e. for all element e, + ``binary_op(neutral, e) == binary_op(e, neutral) == e``. + If this argument is omitted the list should not be empty. + :return: a value + """ + if neutral is None: + assert self.__global_index != ((-1, -1), (-1, -1)) + partial = functools.reduce(binary_op, self.__content) + partials = _COMM.allgather(partial) + return functools.reduce(binary_op, partials) + partial = functools.reduce(binary_op, self.__content, neutral) + partials = _COMM.allgather(partial) + return functools.reduce(binary_op, partials, neutral) diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 270c55a..ed38f4b 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -10,7 +10,8 @@ def __main(): line_size = 12 print("Line initialization") - parray2d_line_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.LINE, col_size, line_size) + parray2d_line_init = PArray2D.init(lambda line, column: line * col_size + column, + Distribution.LINE, col_size, line_size) print(parray2d_line_init) print("Line to column distribution") @@ -18,9 +19,15 @@ def __main(): print(parray2d_column) print("Column initialization") - parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.COLUMN, col_size, line_size) + parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column, + Distribution.COLUMN, col_size, line_size) print(parray2d_column_init) + print(parray2d_column_init.reduce(lambda x, y: x + y, 0)) + print(parray2d_line_init.reduce(lambda x, y: x + y, 0)) + print(parray2d_column.reduce(lambda x, y: x + y, 0)) + print(PArray2D().reduce(lambda x, y: x + y, 0)) + if __name__ == '__main__': __main() From 8282befe53fece4a9263c226b11cbfbe241803bd Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 23 Jun 2021 10:40:21 +0200 Subject: [PATCH 42/53] array interface --- pyske/core/array/array_interface.py | 100 ++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 pyske/core/array/array_interface.py diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py new file mode 100644 index 0000000..1709262 --- /dev/null +++ b/pyske/core/array/array_interface.py @@ -0,0 +1,100 @@ +""" +Interface for PySke array. + +Interfaces: Array2D. +""" + +from abc import ABC, abstractmethod +from typing import Callable, Generic, TypeVar, Optional + +from pyske.core.array.parray2d import Distribution +# pylint: disable=unused-import +from pyske.core.interface import List +from pyske.core.support import parallel as parimpl + +T = TypeVar('T') # pylint: disable=invalid-name +V = TypeVar('V') # pylint: disable=invalid-name + +_PID: int = parimpl.PID +_NPROCS: int = parimpl.NPROCS +_COMM = parimpl.COMM + +class Array2D(ABC, Generic[T]): + """ + PySke array2d (interface) + + Static methods: + init. + + Methods: + map, reduce, distribute, + get_partition. + """ + + @abstractmethod + def __init__(self: 'Array2D[T]') -> None: + """ + Return an empty list. + """ + + @staticmethod + @abstractmethod + def init(value_at: Callable[[int, int], V], distribution: Distribution, + col_size: int = _NPROCS, + line_size: int = _NPROCS) -> 'Array2D[V]': + """ + Return an array built using a function per line on each processor + + :param value_at: binary function + :param distribution: the distribution direction (LINE, COLUMN) + :param col_size: number of columns + :param line_size: number of lines + :return: an 2d array of the given line and column size, where for all valid line column + i, j, the value at this index is value_at(i, j) + """ + + @abstractmethod + def distribute(self: 'Array2D[T]') -> 'Array2D[T]': + """ + Copy the array while changing its distribution. + + In sequential, it just returns ``self``. In parallel, communications + are performed to meet the new distribution. + + :return: an array containing the same elements. + """ + + @abstractmethod + def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]': + """ + Apply a function to all the elements. + + The returned array has the same shape (same size, same distribution) + than the initial array. + + :param unary_op: function to apply to elements + :return: a new array + """ + + @abstractmethod + def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], + neutral: Optional[T] = None) -> T: + """ + Reduce an array of value to one value. + + :param binary_op: a binary associative and commutative operation + :param neutral: (optional): + a value that should be a neutral element for the operation, + i.e. for all element e, + ``binary_op(neutral, e) == binary_op(e, neutral) == e``. + If this argument is omitted the list should not be empty. + :return: a value + """ + + @abstractmethod + def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]': + """ + Make the distribution visible. + + :return: a list of array. + """ From 0cec3c4c97c830c2b9bde7c3beaeeae84b4d71a9 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 23 Jun 2021 18:31:37 +0200 Subject: [PATCH 43/53] sarray2d class, changes parray2d content with sarray2d --- pyske/core/array/array_interface.py | 15 +++--- pyske/core/array/parray2d.py | 51 +++++++++++-------- pyske/core/array/sarray2d.py | 79 +++++++++++++++++++++++++++++ pyske/examples/array/hello_world.py | 15 +++++- 4 files changed, 132 insertions(+), 28 deletions(-) create mode 100644 pyske/core/array/sarray2d.py diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index 1709262..4b28a60 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -5,9 +5,9 @@ """ from abc import ABC, abstractmethod +from enum import Enum from typing import Callable, Generic, TypeVar, Optional -from pyske.core.array.parray2d import Distribution # pylint: disable=unused-import from pyske.core.interface import List from pyske.core.support import parallel as parimpl @@ -19,6 +19,10 @@ _NPROCS: int = parimpl.NPROCS _COMM = parimpl.COMM +class Distribution(Enum): + LINE = 'LINE' + COLUMN = 'COLUMN' + class Array2D(ABC, Generic[T]): """ PySke array2d (interface) @@ -39,14 +43,13 @@ def __init__(self: 'Array2D[T]') -> None: @staticmethod @abstractmethod - def init(value_at: Callable[[int, int], V], distribution: Distribution, - col_size: int = _NPROCS, - line_size: int = _NPROCS) -> 'Array2D[V]': + def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size: int, + line_size: int) -> 'Array2D[V]': """ Return an array built using a function per line on each processor :param value_at: binary function - :param distribution: the distribution direction (LINE, COLUMN) + :param distribution: the distribution direction (LINE, COLUMN), leave empty for sequential array :param col_size: number of columns :param line_size: number of lines :return: an 2d array of the given line and column size, where for all valid line column @@ -77,7 +80,7 @@ def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]': """ @abstractmethod - def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], + def reduce(self: 'Array2D[T]', binary_op: Callable[[T, T], T], neutral: Optional[T] = None) -> T: """ Reduce an array of value to one value. diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 226b7ae..73df1ff 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -3,11 +3,13 @@ class PArray2D: parallel arrays. """ -import functools from typing import Callable, TypeVar, Generic, Optional from enum import Enum from pyske.core import SList +from pyske.core.array import array_interface +from pyske.core.array.array_interface import Distribution +from pyske.core.array.sarray2d import SArray2D from pyske.core.support import parallel as parimpl _PID: int = parimpl.PID @@ -18,11 +20,6 @@ class PArray2D: parallel arrays. V = TypeVar('V') # pylint: disable=invalid-name -class Distribution(Enum): - LINE = 'LINE' - COLUMN = 'COLUMN' - - def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int): local_sizes = SList([]) for i in range(_NPROCS): @@ -36,16 +33,17 @@ def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int): return (0, line_size - 1), (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1) -class PArray2D(Generic[T]): +class PArray2D(array_interface.Array2D, Generic[T]): # pylint: disable=protected-access """ Distributed arrays """ def __init__(self: 'PArray2D[T]'): + super().__init__() self.__global_index = ((-1, -1), (-1, -1)) self.__local_index = ((-1, -1), (-1, -1)) - self.__content = [] + self.__content = SArray2D([], -1, -1) self.__distribution = [((-1, -1), (-1, -1)) for _ in range(0, _NPROCS)] self.__distribution_direction = Distribution.LINE @@ -54,7 +52,7 @@ def __str__(self: 'PArray2D[T]') -> str: " global_index: " + str(self.__global_index) + "\n" + \ " local_index: " + str(self.__local_index) + "\n" + \ " distribution: " + str(self.__distribution) + "\n" + \ - " content: " + str(self.__content) + "\n" + " content: \n" + str(self.__content) + "\n" @staticmethod def init(value_at: Callable[[int, int], V], distribution: Distribution, @@ -78,10 +76,15 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution, parray2d.__local_index = _local_index(distribution, col_size, line_size, _PID) + content = [] for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1): for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1): - parray2d.__content.append(value_at(line, column)) - parray2d.__distribution_direction = Distribution.LINE + content.append(value_at(line, column)) + local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1 + local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1 + parray2d.__content = SArray2D(content, local_line_size, local_col_size) + + parray2d.__distribution_direction = distribution parray2d.__distribution = [ _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in range(_NPROCS)] @@ -103,6 +106,8 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': parray2d.__distribution = [ _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in range(_NPROCS)] + local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1 + local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1 # update content for each process for i in range(0, _NPROCS): @@ -110,11 +115,12 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': for j in range(len(self.__content)): if j % col_size in range(parray2d.__distribution[i][1][0], parray2d.__distribution[i][1][1] + 1): - content_to_send.append(self.__content[j]) + content_to_send.append(self.__content.values[j]) if i == _PID: - parray2d.__content = _COMM.gather(content_to_send, i) + content = _COMM.gather(content_to_send, i) # flatten the list - parray2d.__content = [item for items in parray2d.__content for item in items] + content = [item for items in content for item in items] + parray2d.__content = SArray2D(content, local_line_size, local_col_size) else: _COMM.gather(content_to_send, i) @@ -127,7 +133,7 @@ def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]': The returned array has the same shape (same size, same distribution) than the initial array. """ - self.__content = [unary_op(elem) for elem in self.__content] + self.__content = self.__content.map(unary_op) return self def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], @@ -145,9 +151,12 @@ def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], """ if neutral is None: assert self.__global_index != ((-1, -1), (-1, -1)) - partial = functools.reduce(binary_op, self.__content) - partials = _COMM.allgather(partial) - return functools.reduce(binary_op, partials) - partial = functools.reduce(binary_op, self.__content, neutral) - partials = _COMM.allgather(partial) - return functools.reduce(binary_op, partials, neutral) + partial = self.__content.reduce(binary_op) + partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size) + else: + partial = self.__content.reduce(binary_op, neutral) + partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size) + return partials.reduce(binary_op, neutral) + + def get_partition(self: 'PArray2D[T]') -> 'SList[PArray2D[T]]': + pass diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py new file mode 100644 index 0000000..5ea30be --- /dev/null +++ b/pyske/core/array/sarray2d.py @@ -0,0 +1,79 @@ +""" +A module of sequential arrays and associated skeletons + +class SArray2D: sequential arrays. +""" +import functools +from typing import TypeVar, Generic, Callable, Optional + +# pylint: disable=unused-import +from pyske.core import SList +from pyske.core.array.array_interface import Array2D, Distribution + +T = TypeVar('T') # pylint: disable=invalid-name +V = TypeVar('V') # pylint: disable=invalid-name + + +class SArray2D(Array2D, Generic[T]): + """ + Sequential arrays + """ + + def __init__(self, content: list, line_size, col_size): + super().__init__() + self.__line_size = line_size + self.__column_size = col_size + self.__values = content + + @property + def values(self): + return self.__values + + @property + def line_size(self): + return self.__line_size + + @property + def column_size(self): + return self.__column_size + + def __str__(self): + content = "" + for i in range(self.__line_size): + content += "[ " + for j in range(self.__column_size): + content += str(self.__values[i * self.__column_size + j]) + " " + content += "]" + content += "\n" + return content + + def __len__(self): + return self.__column_size * self.__line_size + + @staticmethod + def init(value_at: Callable[[int, int], V], _: Distribution, col_size: int, + line_size: int) -> 'SArray2D[V]': + assert col_size > 0 + assert line_size > 0 + content = [] + for line in range(line_size): + for column in range(col_size): + content.append(value_at(line, column)) + sarray2d = SArray2D(content, line_size, col_size) + return sarray2d + + def map(self: 'SArray2D[T]', unary_op: Callable[[T], V]) -> 'SArray2D[V]': + content = list(map(unary_op, self.__values)) + return SArray2D(content, self.__line_size, self.__column_size) + + def reduce(self: 'SArray2D[T]', binary_op: Callable[[T, T], T], + neutral: Optional[T] = None) -> T: + if neutral is None: + return functools.reduce(binary_op, self.__values) + return functools.reduce(binary_op, self.__values, neutral) + + def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]': + pass + + def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]': + return self diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index ed38f4b..382dbb2 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -2,7 +2,9 @@ Example: various manipulations on a parallel array """ -from pyske.core.array.parray2d import PArray2D, Distribution +from pyske.core.array.parray2d import PArray2D +from pyske.core.array.array_interface import Distribution +from pyske.core.array.sarray2d import SArray2D def __main(): @@ -23,11 +25,22 @@ def __main(): Distribution.COLUMN, col_size, line_size) print(parray2d_column_init) + print("Reduce Test") print(parray2d_column_init.reduce(lambda x, y: x + y, 0)) + print(parray2d_column_init.reduce(lambda x, y: x + y)) print(parray2d_line_init.reduce(lambda x, y: x + y, 0)) print(parray2d_column.reduce(lambda x, y: x + y, 0)) print(PArray2D().reduce(lambda x, y: x + y, 0)) + print("Mapped array") + parray2d_map = parray2d_line_init.map(lambda x: x + 1) + print(parray2d_map) + + print("Sarray initialization") + sarray2d = SArray2D.init(lambda line, column: line * col_size + column, Distribution.LINE, + col_size, line_size) + print(sarray2d) + if __name__ == '__main__': __main() From 07fd6476e860c16a3dabaf695069470a90f86a8e Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 24 Jun 2021 12:09:20 +0200 Subject: [PATCH 44/53] array get_partition --- pyske/core/array/array_interface.py | 2 +- pyske/core/array/parray2d.py | 12 +++++++----- pyske/core/array/sarray2d.py | 5 ++++- pyske/examples/array/hello_world.py | 3 +++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index 4b28a60..8e28e2d 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -38,7 +38,7 @@ class Array2D(ABC, Generic[T]): @abstractmethod def __init__(self: 'Array2D[T]') -> None: """ - Return an empty list. + Return an empty array. """ @staticmethod diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 73df1ff..4695c28 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -6,7 +6,7 @@ class PArray2D: parallel arrays. from typing import Callable, TypeVar, Generic, Optional from enum import Enum -from pyske.core import SList +from pyske.core import SList, PList from pyske.core.array import array_interface from pyske.core.array.array_interface import Distribution from pyske.core.array.sarray2d import SArray2D @@ -152,11 +152,13 @@ def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], if neutral is None: assert self.__global_index != ((-1, -1), (-1, -1)) partial = self.__content.reduce(binary_op) - partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size) else: partial = self.__content.reduce(binary_op, neutral) - partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size) + partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, + self.__content.column_size) return partials.reduce(binary_op, neutral) - def get_partition(self: 'PArray2D[T]') -> 'SList[PArray2D[T]]': - pass + def get_partition(self: 'PArray2D[T]') -> 'PList[SArray2D[T]]': + contents = _COMM.allgather(self.__content) + p_list = PList().init(lambda i: contents[i], _NPROCS) + return p_list diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py index 5ea30be..71a6f22 100644 --- a/pyske/core/array/sarray2d.py +++ b/pyske/core/array/sarray2d.py @@ -47,6 +47,9 @@ def __str__(self): content += "\n" return content + def __repr__(self): + return str(self) + def __len__(self): return self.__column_size * self.__line_size @@ -73,7 +76,7 @@ def reduce(self: 'SArray2D[T]', binary_op: Callable[[T, T], T], return functools.reduce(binary_op, self.__values, neutral) def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]': - pass + return SList([self]) def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]': return self diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 382dbb2..2d79649 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -41,6 +41,9 @@ def __main(): col_size, line_size) print(sarray2d) + print("Get partition") + print(parray2d_column_init.get_partition()) + if __name__ == '__main__': __main() From aea60636517673f6465f037d78b9d8e38eef9853 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 24 Jun 2021 15:14:47 +0200 Subject: [PATCH 45/53] doctest, docstring array interface --- pyske/core/array/array_interface.py | 48 +++++++++++++++++++++++++++-- pyske/core/array/parray2d.py | 37 +++++----------------- pyske/core/array/sarray2d.py | 21 ++++++++++--- 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index 8e28e2d..808edfb 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -46,10 +46,21 @@ def __init__(self: 'Array2D[T]') -> None: def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size: int, line_size: int) -> 'Array2D[V]': """ - Return an array built using a function per line on each processor + Return an array built using a function + + Example:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.array_interface import Distribution + >>> number_line = 2 + >>> number_column = 2 + >>> init_function = lambda line, column: line * number_column + column + >>> SArray2D.init(init_function, Distribution.LINE, number_column, number_line) + ( 0 1 ) + ( 2 3 ) :param value_at: binary function - :param distribution: the distribution direction (LINE, COLUMN), leave empty for sequential array + :param distribution: the distribution direction (LINE, COLUMN) :param col_size: number of columns :param line_size: number of lines :return: an 2d array of the given line and column size, where for all valid line column @@ -62,7 +73,7 @@ def distribute(self: 'Array2D[T]') -> 'Array2D[T]': Copy the array while changing its distribution. In sequential, it just returns ``self``. In parallel, communications - are performed to meet the new distribution. + are performed to meet line or column distribution. :return: an array containing the same elements. """ @@ -75,6 +86,16 @@ def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]': The returned array has the same shape (same size, same distribution) than the initial array. + Examples:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.array_interface import Distribution + >>> col_size = 2 + >>> line_size = 2 + >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).map(lambda x: x + 1) + ( 2 2 ) + ( 2 2 ) + :param unary_op: function to apply to elements :return: a new array """ @@ -85,6 +106,17 @@ def reduce(self: 'Array2D[T]', binary_op: Callable[[T, T], T], """ Reduce an array of value to one value. + Examples:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.parray2d import PArray2D + >>> from pyske.core.array.array_interface import Distribution + >>> parray2d = PArray2D.init(lambda i, j: 1, Distribution.COLUMN, col_size=2, line_size=2) + >>> parray2d.reduce(lambda x, y: x + y) + 4 + >>> SArray2D().reduce(lambda x, y: x + y, 0) + 0 + :param binary_op: a binary associative and commutative operation :param neutral: (optional): a value that should be a neutral element for the operation, @@ -99,5 +131,15 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]': """ Make the distribution visible. + Examples:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.array_interface import Distribution + >>> col_size = 2 + >>> line_size = 2 + >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).get_partition() + [( 1 1 ) + ( 1 1 )] + :return: a list of array. """ diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 4695c28..6410f66 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -37,6 +37,13 @@ class PArray2D(array_interface.Array2D, Generic[T]): # pylint: disable=protected-access """ Distributed arrays + + Static methods from interface IArray: + init. + + Methods from interface IArray: + map,reduce, + get_partition,distribute """ def __init__(self: 'PArray2D[T]'): @@ -58,16 +65,6 @@ def __str__(self: 'PArray2D[T]') -> str: def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size: int = _NPROCS, line_size: int = _NPROCS) -> 'PArray2D[V]': - """ - Return an array built using a function per line on each processor - - :param value_at: binary function - :param distribution: the distribution direction (LINE, COLUMN) - :param col_size: number of columns - :param line_size: number of lines - :return: an 2d array of the given line and column size, where for all valid line column - i, j, the value at this index is value_at(i, j) - """ assert _NPROCS <= col_size assert _NPROCS <= line_size @@ -92,9 +89,6 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution, return parray2d def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': - """ - Distribute line to column - """ parray2d = PArray2D() parray2d.__global_index = self.__global_index @@ -127,28 +121,11 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': return parray2d def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]': - """ - Apply a function to all the elements. - - The returned array has the same shape (same size, same distribution) - than the initial array. - """ self.__content = self.__content.map(unary_op) return self def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], neutral: Optional[T] = None) -> T: - """ - Reduce an array of value to one value. - - :param binary_op: a binary associative and commutative operation - :param neutral: (optional): - a value that should be a neutral element for the operation, - i.e. for all element e, - ``binary_op(neutral, e) == binary_op(e, neutral) == e``. - If this argument is omitted the list should not be empty. - :return: a value - """ if neutral is None: assert self.__global_index != ((-1, -1), (-1, -1)) partial = self.__content.reduce(binary_op) diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py index 71a6f22..edd8b96 100644 --- a/pyske/core/array/sarray2d.py +++ b/pyske/core/array/sarray2d.py @@ -17,10 +17,20 @@ class SArray2D: sequential arrays. class SArray2D(Array2D, Generic[T]): """ Sequential arrays + + Static methods from interface IArray: + init. + + Methods from interface IArray: + map,reduce, + get_partition,distribute + """ - def __init__(self, content: list, line_size, col_size): + def __init__(self, content=None, line_size=-1, col_size=-1): super().__init__() + if content is None: + content = [] self.__line_size = line_size self.__column_size = col_size self.__values = content @@ -40,11 +50,12 @@ def column_size(self): def __str__(self): content = "" for i in range(self.__line_size): - content += "[ " + content += "(" for j in range(self.__column_size): - content += str(self.__values[i * self.__column_size + j]) + " " - content += "]" - content += "\n" + content += '%4s' % self.__values[i * self.__column_size + j] + content += '%4s' % ')' + if i != self.line_size - 1: + content += '\n' return content def __repr__(self): From d2717a1048904345ffd69de12872f65c3aa8afa9 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 25 Jun 2021 11:20:51 +0200 Subject: [PATCH 46/53] map2 skeleton --- pyske/core/array/array_interface.py | 25 +++++++++++++++++++++++++ pyske/core/array/parray2d.py | 21 +++++++++++++++++++-- pyske/core/array/sarray2d.py | 8 ++++++++ pyske/examples/array/hello_world.py | 7 +++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index 808edfb..c38b920 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -13,6 +13,7 @@ from pyske.core.support import parallel as parimpl T = TypeVar('T') # pylint: disable=invalid-name +U = TypeVar('U') # pylint: disable=invalid-name V = TypeVar('V') # pylint: disable=invalid-name _PID: int = parimpl.PID @@ -143,3 +144,27 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]': :return: a list of array. """ + + @abstractmethod + def map2(self: 'Array2D[T]', binary_op: Callable[[T, U], V], + a_array: 'Array2D[U]') -> 'Array2D[V]': + """ + Apply a function to all the elements of ``self`` and an array. + + The returned array has the same shape (same size, same distribution) + than the initial arrays. + + Examples:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.array_interface import Distribution + >>> sarray2d = SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size = 2, line_size = 2) + >>> sarray2d.map2(lambda x, y: x + y, sarray2d) + ( 2 2 ) + ( 2 2 ) + + :param binary_op: function to apply to each pair of elements + :param a_array: the second array. + The second array must have same column and line size than `self`. + :return: a new array. + """ diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 6410f66..9d67a45 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -17,6 +17,7 @@ class PArray2D: parallel arrays. _COMM = parimpl.COMM T = TypeVar('T') # pylint: disable=invalid-name +U = TypeVar('U') # pylint: disable=invalid-name V = TypeVar('V') # pylint: disable=invalid-name @@ -54,6 +55,14 @@ def __init__(self: 'PArray2D[T]'): self.__distribution = [((-1, -1), (-1, -1)) for _ in range(0, _NPROCS)] self.__distribution_direction = Distribution.LINE + def __get_shape(self: 'PArray2D[T]') -> 'PArray2D': + p_array2d = PArray2D() + p_array2d.__global_index = self.__global_index + p_array2d.__local_index = self.__local_index + p_array2d.__distribution = self.__distribution + p_array2d.__distribution_direction = self.__distribution_direction + return p_array2d + def __str__(self: 'PArray2D[T]') -> str: return "PID[" + str(_PID) + "]:\n" + \ " global_index: " + str(self.__global_index) + "\n" + \ @@ -121,8 +130,9 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': return parray2d def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]': - self.__content = self.__content.map(unary_op) - return self + p_array2d = self.__get_shape() + p_array2d.__content = self.__content.map(unary_op) + return p_array2d def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T], neutral: Optional[T] = None) -> T: @@ -139,3 +149,10 @@ def get_partition(self: 'PArray2D[T]') -> 'PList[SArray2D[T]]': contents = _COMM.allgather(self.__content) p_list = PList().init(lambda i: contents[i], _NPROCS) return p_list + + def map2(self: 'PArray2D[T]', binary_op: Callable[[T, U], V], + a_array: 'PArray2D[U]') -> 'PArray2D[V]': + assert self.__distribution == a_array.__distribution + p_array2d = self.__get_shape() + p_array2d.__content = self.__content.map2(binary_op, a_array.__content) + return p_array2d diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py index edd8b96..cda9ffd 100644 --- a/pyske/core/array/sarray2d.py +++ b/pyske/core/array/sarray2d.py @@ -11,6 +11,7 @@ class SArray2D: sequential arrays. from pyske.core.array.array_interface import Array2D, Distribution T = TypeVar('T') # pylint: disable=invalid-name +U = TypeVar('U') # pylint: disable=invalid-name V = TypeVar('V') # pylint: disable=invalid-name @@ -91,3 +92,10 @@ def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]': def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]': return self + + def map2(self: 'SArray2D[T]', binary_op: Callable[[T, U], V], + a_array: 'SArray2D[U]') -> 'SArray2D[V]': + assert self.__line_size == a_array.line_size + assert self.__column_size == a_array.column_size + content = [binary_op(left, right) for (left, right) in zip(self.__values, a_array.values)] + return SArray2D(content, self.__line_size, self.__column_size) diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 2d79649..3c9a55b 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -44,6 +44,13 @@ def __main(): print("Get partition") print(parray2d_column_init.get_partition()) + b_sarray2d = SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size, line_size) + + print("Map2 array") + print(sarray2d.map2(lambda x, y: x + y, b_sarray2d)) + print(parray2d_line_init.map2(lambda x, y: x + y, parray2d_line_init)) + print(parray2d_column_init.map2(lambda x, y: x + y, parray2d_column)) + if __name__ == '__main__': __main() From 22481844af843f04292fbd106276c222c5ab0930 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 28 Jun 2021 11:53:10 +0200 Subject: [PATCH 47/53] adding to_seq skeleton --- pyske/core/array/array_interface.py | 30 +++++++++++++++++++++++++++++ pyske/core/array/parray2d.py | 9 +++++++++ pyske/core/array/sarray2d.py | 9 +++++++++ pyske/examples/array/hello_world.py | 3 +++ 4 files changed, 51 insertions(+) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index c38b920..a4ba0e7 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -76,6 +76,14 @@ def distribute(self: 'Array2D[T]') -> 'Array2D[T]': In sequential, it just returns ``self``. In parallel, communications are performed to meet line or column distribution. + Examples:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> sarray2d = SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2) + >>> sarray2d.distribute() + ( 1 1 ) + ( 1 1 ) + :return: an array containing the same elements. """ @@ -168,3 +176,25 @@ def map2(self: 'Array2D[T]', binary_op: Callable[[T, U], V], The second array must have same column and line size than `self`. :return: a new array. """ + + @abstractmethod + def to_seq(self: 'Array2D[T]') -> 'Array2D[T]': + """ + Return a sequential array with same content. + + The distribution must be per line. + + Examples:: + + >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.parray2d import PArray2D + >>> from pyske.core.array.array_interface import Distribution + >>> PArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2).to_seq() + ( 1 1 ) + ( 1 1 ) + >>> SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size = 2, line_size = 2).to_seq() + ( 1 1 ) + ( 1 1 ) + + :return: a sequential array. + """ diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 9d67a45..446afea 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -156,3 +156,12 @@ def map2(self: 'PArray2D[T]', binary_op: Callable[[T, U], V], p_array2d = self.__get_shape() p_array2d.__content = self.__content.map2(binary_op, a_array.__content) return p_array2d + + def to_seq(self: 'PArray2D[T]') -> 'SArray2D[T]': + assert self.__distribution_direction == Distribution.LINE + col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 + line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 + content = self.get_partition()\ + .reduce(lambda a_sarray, b_sarray: SArray2D.concat(a_sarray, b_sarray), + SArray2D([], 0, 0)).values + return SArray2D(content, line_size, col_size) diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py index cda9ffd..e9f75f5 100644 --- a/pyske/core/array/sarray2d.py +++ b/pyske/core/array/sarray2d.py @@ -99,3 +99,12 @@ def map2(self: 'SArray2D[T]', binary_op: Callable[[T, U], V], assert self.__column_size == a_array.column_size content = [binary_op(left, right) for (left, right) in zip(self.__values, a_array.values)] return SArray2D(content, self.__line_size, self.__column_size) + + def to_seq(self: 'SArray2D[T]') -> 'SArray2D[T]': + return self + + @staticmethod + def concat(a_sarray: 'SArray2D[T]', b_sarray: 'SArray2D[T]') -> 'SArray2D[T]': + line_size = a_sarray.line_size + b_sarray.line_size + col_size = a_sarray.column_size + return SArray2D(a_sarray.values + b_sarray.values, line_size, col_size) diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 3c9a55b..65e10f2 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -51,6 +51,9 @@ def __main(): print(parray2d_line_init.map2(lambda x, y: x + y, parray2d_line_init)) print(parray2d_column_init.map2(lambda x, y: x + y, parray2d_column)) + print("To seq") + print(parray2d_line_init.to_seq()) + if __name__ == '__main__': __main() From 91a8978bc559da37d603d031ab87b2b534df6749 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 28 Jun 2021 12:19:37 +0200 Subject: [PATCH 48/53] new doctests with to_seq --- pyske/core/array/array_interface.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index a4ba0e7..c34d744 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -79,6 +79,7 @@ def distribute(self: 'Array2D[T]') -> 'Array2D[T]': Examples:: >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.array_interface import Distribution >>> sarray2d = SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2) >>> sarray2d.distribute() ( 1 1 ) @@ -98,12 +99,17 @@ def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]': Examples:: >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.parray2d import PArray2D >>> from pyske.core.array.array_interface import Distribution >>> col_size = 2 >>> line_size = 2 >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).map(lambda x: x + 1) ( 2 2 ) ( 2 2 ) + >>> parray2d = PArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2).map(lambda x: x + 1) + >>> parray2d.to_seq() + ( 2 2 ) + ( 2 2 ) :param unary_op: function to apply to elements :return: a new array @@ -143,12 +149,21 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]': Examples:: >>> from pyske.core.array.sarray2d import SArray2D + >>> from pyske.core.array.parray2d import PArray2D >>> from pyske.core.array.array_interface import Distribution + >>> from pyske.core.util import par >>> col_size = 2 >>> line_size = 2 - >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).get_partition() - [( 1 1 ) - ( 1 1 )] + >>> init_function = lambda line, column: line * col_size + column + >>> SArray2D.init(init_function, Distribution.LINE, col_size, line_size).get_partition() + [( 0 1 ) + ( 2 3 )] + >>> parray2d = PArray2D.init(init_function, Distribution.LINE, col_size=2, line_size=2) + >>> parray2d.get_partition().to_seq() if par.procs() == [0, 1] else [(0, 1), (2, 3)] + [(0, 1), (2, 3)] + + + :return: a list of array. """ From e0e02a0e3e52ed986ad05e15242665666fd179d1 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 28 Jun 2021 12:37:53 +0200 Subject: [PATCH 49/53] correction doctest get_partition --- pyske/core/array/array_interface.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index c34d744..acb03f8 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -159,11 +159,8 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]': [( 0 1 ) ( 2 3 )] >>> parray2d = PArray2D.init(init_function, Distribution.LINE, col_size=2, line_size=2) - >>> parray2d.get_partition().to_seq() if par.procs() == [0, 1] else [(0, 1), (2, 3)] - [(0, 1), (2, 3)] - - - + >>> parray2d.get_partition().to_seq() if par.procs() == [0, 1] else '[( 0 1 ), ( 2 3 )]' + '[( 0 1 ), ( 2 3 )]' :return: a list of array. """ From b51b9d3c7a1e746e3ce39b6b8e467bca25a8b91d Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 28 Jun 2021 19:24:06 +0200 Subject: [PATCH 50/53] column to line distribution --- pyske/core/array/array_interface.py | 6 +-- pyske/core/array/parray2d.py | 74 +++++++++++++++++++++-------- pyske/examples/array/hello_world.py | 8 +++- 3 files changed, 61 insertions(+), 27 deletions(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index acb03f8..a366011 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -69,7 +69,7 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size """ @abstractmethod - def distribute(self: 'Array2D[T]') -> 'Array2D[T]': + def distribute(self: 'Array2D[T]', distribution_direction: Distribution) -> 'Array2D[T]': """ Copy the array while changing its distribution. @@ -194,14 +194,12 @@ def to_seq(self: 'Array2D[T]') -> 'Array2D[T]': """ Return a sequential array with same content. - The distribution must be per line. - Examples:: >>> from pyske.core.array.sarray2d import SArray2D >>> from pyske.core.array.parray2d import PArray2D >>> from pyske.core.array.array_interface import Distribution - >>> PArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2).to_seq() + >>> PArray2D.init(lambda i, j: 1, Distribution.COLUMN, col_size=2, line_size=2).to_seq() ( 1 1 ) ( 1 1 ) >>> SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size = 2, line_size = 2).to_seq() diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py index 446afea..28e88c0 100644 --- a/pyske/core/array/parray2d.py +++ b/pyske/core/array/parray2d.py @@ -97,36 +97,66 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution, return parray2d - def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]': - parray2d = PArray2D() - parray2d.__global_index = self.__global_index - + def __distribute_column(self: 'PArray2D[T]', new_parray: 'PArray2D[T]', local_line_size, + local_col_size): col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 - line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 - - parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID) - parray2d.__distribution_direction = Distribution.COLUMN - parray2d.__distribution = [ - _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in - range(_NPROCS)] - local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1 - local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1 - # update content for each process for i in range(0, _NPROCS): content_to_send = [] for j in range(len(self.__content)): - if j % col_size in range(parray2d.__distribution[i][1][0], - parray2d.__distribution[i][1][1] + 1): + if j % col_size in range(new_parray.__distribution[i][1][0], + new_parray.__distribution[i][1][1] + 1): content_to_send.append(self.__content.values[j]) if i == _PID: content = _COMM.gather(content_to_send, i) # flatten the list content = [item for items in content for item in items] - parray2d.__content = SArray2D(content, local_line_size, local_col_size) + new_parray.__content = SArray2D(content, local_line_size, local_col_size) else: _COMM.gather(content_to_send, i) + return new_parray + + def __distribute_line(self: 'PArray2D[T]', new_parray: 'PArray2D[T]', local_line_size, + local_col_size, old_distribution): + # update content for each process + for i in range(0, _NPROCS): + content = [] + old_local_col_size = old_distribution[_PID][1][1] - old_distribution[_PID][1][0] + 1 + start_index = new_parray.__distribution[i][0][0] * old_local_col_size + stop_index = (new_parray.__distribution[i][0][1] + 1) * old_local_col_size + for j in range(start_index, stop_index, old_local_col_size): + content_to_send = self.__content.values[j:j+old_local_col_size] + content_to_send = _COMM.allgather(content_to_send) + content.extend([item for items in content_to_send for item in items]) + if i == _PID: + new_parray.__content = SArray2D(content, local_line_size, local_col_size) + + return new_parray + + def distribute(self: 'PArray2D[T]', distribution_direction: Distribution) -> 'PArray2D[T]': + if distribution_direction == self.__distribution_direction: + return self + parray2d = PArray2D() + parray2d.__global_index = self.__global_index + + line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 + col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 + + old_distribution = self.__distribution + + parray2d.__local_index = _local_index(distribution_direction, col_size, line_size, _PID) + parray2d.__distribution_direction = distribution_direction + parray2d.__distribution = [ + _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in + range(_NPROCS)] + local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1 + local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1 + if distribution_direction == Distribution.COLUMN: + parray2d = self.__distribute_column(parray2d, local_line_size, local_col_size) + else: + parray2d = self.__distribute_line(parray2d, local_line_size, local_col_size, old_distribution) + return parray2d def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]': @@ -158,10 +188,12 @@ def map2(self: 'PArray2D[T]', binary_op: Callable[[T, U], V], return p_array2d def to_seq(self: 'PArray2D[T]') -> 'SArray2D[T]': - assert self.__distribution_direction == Distribution.LINE - col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1 - line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1 - content = self.get_partition()\ + parray2d = self + if self.__distribution_direction == Distribution.COLUMN: + parray2d = self.distribute(Distribution.LINE) + col_size = parray2d.__global_index[1][1] - parray2d.__global_index[1][0] + 1 + line_size = parray2d.__global_index[0][1] - parray2d.__global_index[0][0] + 1 + content = parray2d.get_partition() \ .reduce(lambda a_sarray, b_sarray: SArray2D.concat(a_sarray, b_sarray), SArray2D([], 0, 0)).values return SArray2D(content, line_size, col_size) diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py index 65e10f2..7f4f4eb 100644 --- a/pyske/examples/array/hello_world.py +++ b/pyske/examples/array/hello_world.py @@ -17,9 +17,13 @@ def __main(): print(parray2d_line_init) print("Line to column distribution") - parray2d_column = parray2d_line_init.distribute() + parray2d_column = parray2d_line_init.distribute(Distribution.COLUMN) print(parray2d_column) + print("Column to line distribution") + parray2d_line = parray2d_column.distribute(Distribution.LINE) + print(parray2d_line) + print("Column initialization") parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.COLUMN, col_size, line_size) @@ -52,7 +56,7 @@ def __main(): print(parray2d_column_init.map2(lambda x, y: x + y, parray2d_column)) print("To seq") - print(parray2d_line_init.to_seq()) + print(parray2d_column.to_seq()) if __name__ == '__main__': From afc28a0bfc35b0fd456f0b51cbc775e4805601b7 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 29 Jun 2021 13:50:52 +0200 Subject: [PATCH 51/53] bad signature distribute --- pyske/core/array/sarray2d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py index e9f75f5..432df3f 100644 --- a/pyske/core/array/sarray2d.py +++ b/pyske/core/array/sarray2d.py @@ -90,7 +90,7 @@ def reduce(self: 'SArray2D[T]', binary_op: Callable[[T, T], T], def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]': return SList([self]) - def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]': + def distribute(self: 'SArray2D[T]', distribution_direction: Distribution) -> 'SArray2D[T]': return self def map2(self: 'SArray2D[T]', binary_op: Callable[[T, U], V], From 543bf9cb95deb6d45bebd2e52c71279e043c5005 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 30 Jun 2021 10:27:09 +0200 Subject: [PATCH 52/53] distribute signature correction --- pyske/core/array/array_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index a366011..ecc6afb 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -84,7 +84,7 @@ def distribute(self: 'Array2D[T]', distribution_direction: Distribution) -> 'Arr >>> sarray2d.distribute() ( 1 1 ) ( 1 1 ) - + :param distribution_direction: the distribution direction (LINE, COLUMN) :return: an array containing the same elements. """ From c16c720ec1bfcaf22d1bb12cbeb480e8f7369f4f Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 30 Jun 2021 13:58:38 +0200 Subject: [PATCH 53/53] missing parameter doctest distribute --- pyske/core/array/array_interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py index ecc6afb..35d3a24 100644 --- a/pyske/core/array/array_interface.py +++ b/pyske/core/array/array_interface.py @@ -81,9 +81,10 @@ def distribute(self: 'Array2D[T]', distribution_direction: Distribution) -> 'Arr >>> from pyske.core.array.sarray2d import SArray2D >>> from pyske.core.array.array_interface import Distribution >>> sarray2d = SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2) - >>> sarray2d.distribute() + >>> sarray2d.distribute(Distribution.COLUMN) ( 1 1 ) ( 1 1 ) + :param distribution_direction: the distribution direction (LINE, COLUMN) :return: an array containing the same elements. """