From ce230dba7ec01dc4bed42848035c4c8559909f70 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 5 May 2021 17:05:56 +0200 Subject: [PATCH 01/34] Class Point / K-means algorithm --- pyske/core/util/point.py | 51 ++++++++++++ pyske/examples/list/k_means.py | 121 ++++++++++++++++++++++++++++ pyske/examples/list/k_means_main.py | 25 ++++++ pyske/examples/list/util.py | 14 ++++ 4 files changed, 211 insertions(+) create mode 100644 pyske/core/util/point.py create mode 100644 pyske/examples/list/k_means.py create mode 100644 pyske/examples/list/k_means_main.py diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py new file mode 100644 index 0000000..0a5d2fc --- /dev/null +++ b/pyske/core/util/point.py @@ -0,0 +1,51 @@ +""" +A module to represent a point +""" + +from math import sqrt + + +class Point(object): + """A class to represent a point""" + + def __init__(self, x, y): + self.__x = x + self.__y = y + + def __repr__(self): + return "(%s, %s)" % (self.__x, self.__y) + + def __eq__(self, other): + if isinstance(other, Point): + return self.__x == other.x and self.__y == other.__y + return False + + @property + def x(self): + """X getter""" + return self.__x + + @property + def y(self): + """Y getter""" + return self.__y + + def distance(self, other: 'Point'): + """ + Returns the distance from another point. + + Examples:: + + >>> from pyske.core.util.point import Point + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1.distance(p2) + 2.0 + + :param other: a point + :return: distance from other point + + """ + dx = self.__x - other.x + dy = self.__y - other.y + return sqrt(dx ** 2 + dy ** 2) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py new file mode 100644 index 0000000..69dbdbc --- /dev/null +++ b/pyske/examples/list/k_means.py @@ -0,0 +1,121 @@ +""" +K-Means +""" + +from pyske.core.interface import List +from pyske.core.list import SList +import random +import matplotlib.pyplot as plt +from pyske.core.util.point import Point + + +def cluster_index(p, centroids): + """ + Get the centroid index of the closest centroid + """ + min_dist = float("inf") + p_centroid = centroids[0] + for c in centroids: + if p.distance(c) < min_dist: + min_dist = p.distance(c) + p_centroid = c + return centroids.index(p_centroid) + + +def make_clusters(input_list, centroids): + """ + Append all points to the cluster with the minimal distance from its centroid + """ + clusters = [[] for c in centroids] + for p in input_list.to_seq(): + index = cluster_index(p, centroids) + clusters[index].append(p) + return clusters + + +def coords_average(cluster): + """ + Get the coordinates average of all points in one cluster + """ + x_average = sum([p.x for p in cluster]) / len(cluster) + y_average = sum([p.y for p in cluster]) / len(cluster) + return Point(x_average, y_average) + + +def get_new_centroid(cluster): + """ + Get closest point to average of point coordinates + """ + average_point = coords_average(cluster) + min_dist = float("inf") + new_centroid = cluster[0] + for p in cluster: + if p.distance(average_point) < min_dist: + min_dist = p.distance(average_point) + new_centroid = p + return new_centroid + + +def define_centroids(clusters): + """ + Redefine centroids of clusters + """ + centroids = [] + for cluster in clusters: + centroids.append(get_new_centroid(cluster)) + return centroids + + +def k_means_init(input_list: List, n_cluster: int): + """ + K-means++ initialisation + + :param input_list: a list of point + :param n_cluster: number of cluster + + :return: n_cluster centroids + """ + centroids = SList([]) + c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)] + centroids.append(c1) + + for c in range(n_cluster - 1): + dist = input_list.map(lambda x: x.distance(centroids[0])) + for i in range(1, len(centroids)): + temp_dist = input_list.map(lambda x: x.distance(centroids[i])) + dist = dist.map2(lambda x, y: min(x, y), temp_dist) + + index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())] + next_centroid = input_list.to_seq()[index_max[0]] + centroids.append(next_centroid) + + return centroids + + +def k_means(input_list: List, n_cluster: int, max_iter: int = 10): + """ + K-means algorithm on a list of point + + :param input_list: a list of point + :param n_cluster: number of cluster + :param max_iter: number of iteration + + :return: a list of class + """ + centroids = k_means_init(input_list, n_cluster) + j = 0 + while j < max_iter: + clusters = make_clusters(input_list, centroids) + plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()], + c='yellow') + clusters_color = ['green', 'blue', 'black', 'purple', 'brown'] + for i in range(len(clusters)): + plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]], + c=clusters_color[i]) + + centroids = define_centroids(clusters) + plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') + plt.show() + j = j + 1 + + return clusters diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py new file mode 100644 index 0000000..f2b8c0c --- /dev/null +++ b/pyske/examples/list/k_means_main.py @@ -0,0 +1,25 @@ +""" +Execution of k_means +""" +import gc + +from pyske.core import Timing +from pyske.examples.list.k_means import k_means +from pyske.examples.list import util + + +if __name__ == '__main__': + size, num_iter, choice = util.standard_parse_command_line() + pyske_list_class = util.select_pyske_list(choice) + input_list = util.rand_point_list(pyske_list_class, size) + timing = Timing() + execute = util.select_execute(choice) + example = k_means + execute(lambda: print('Version:\t', choice)) + gc.disable() + for iteration in range(1, 1 + num_iter): + timing.start() + result = example(input_list, 5) + timing.stop() + gc.collect() + util.print_experiment(result, timing.get(), execute, iteration) \ No newline at end of file diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 57bed0d..26dbb69 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -89,6 +89,20 @@ def rand_list(cls, size): return cls.init(lambda _: float(random.randint(-100, 100)), size) +def rand_point_list(cls, size): + """ + Return a randomly generated list of points. + + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :return: a list of the given class + """ + from pyske.core.util.point import Point + import random + return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size) + + def print_experiment(result, timing, execute, iteration=None): """ Print the result and timing of the experiment. From 0b33eacce3ceaa0dc1ed32ee8bad7b99b30d75e8 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 6 May 2021 16:33:41 +0200 Subject: [PATCH 02/34] number of clusters in parameters / test on datasets --- pyske/examples/list/k_means.py | 11 ++--------- pyske/examples/list/k_means_main.py | 27 +++++++++++++++++++++++---- pyske/examples/list/util.py | 11 ++++++++--- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 69dbdbc..d500fbb 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -100,22 +100,15 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10): :param n_cluster: number of cluster :param max_iter: number of iteration - :return: a list of class + :return: 2 dimension list of points """ centroids = k_means_init(input_list, n_cluster) j = 0 while j < max_iter: clusters = make_clusters(input_list, centroids) - plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()], - c='yellow') - clusters_color = ['green', 'blue', 'black', 'purple', 'brown'] - for i in range(len(clusters)): - plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]], - c=clusters_color[i]) centroids = define_centroids(clusters) - plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') - plt.show() + # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') j = j + 1 return clusters diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index f2b8c0c..0367361 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -6,12 +6,28 @@ from pyske.core import Timing from pyske.examples.list.k_means import k_means from pyske.examples.list import util +import matplotlib.pyplot as plt +import argparse +PAR = 'parallel' +SEQ = 'sequential' if __name__ == '__main__': - size, num_iter, choice = util.standard_parse_command_line() + + parser = argparse.ArgumentParser() + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000) + parser.add_argument("--iter", help="number of iterations", type=int, default=30) + parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) + parser.add_argument("--clusters", help="number of clusters", type=int, default=3) + + args = parser.parse_args() + size = args.size + num_iter = args.iter + choice = args.data + clusters = args.clusters + pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_list(pyske_list_class, size) + input_list = util.rand_point_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means @@ -19,7 +35,10 @@ gc.disable() for iteration in range(1, 1 + num_iter): timing.start() - result = example(input_list, 5) + result = example(input_list, clusters) timing.stop() gc.collect() - util.print_experiment(result, timing.get(), execute, iteration) \ No newline at end of file + util.print_experiment("", timing.get(), execute, iteration) + for i in range(len(result)): + plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) + plt.show() diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 26dbb69..965067f 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -2,6 +2,8 @@ Utility functions for PySke examples """ +from sklearn.datasets import make_blobs + PAR = 'parallel' SEQ = 'sequential' @@ -89,18 +91,21 @@ def rand_list(cls, size): return cls.init(lambda _: float(random.randint(-100, 100)), size) -def rand_point_list(cls, size): +def rand_point_list(cls, size, clusters): """ Return a randomly generated list of points. :param cls: the class of the generated list. :param size: a positive number Precondition: size >= 0 + :param clusters: number of clusters :return: a list of the given class """ from pyske.core.util.point import Point - import random - return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size) + x, y_true = make_blobs(n_samples=size, centers=clusters) + x = x.tolist() + x = list(map(lambda y: Point(y[0], y[1]), x)) + return cls.from_seq(x) def print_experiment(result, timing, execute, iteration=None): From 8b89af61f688b9b756ccfc0c29747c933ce106e3 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 11 May 2021 09:55:38 +0200 Subject: [PATCH 03/34] radon cc in k_means_init / pylinting --- pyske/examples/list/k_means.py | 29 +++++++++++++++++++---------- pyske/examples/list/k_means_main.py | 6 +++--- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index d500fbb..4b9ed26 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -1,11 +1,9 @@ """ K-Means """ - +import random from pyske.core.interface import List from pyske.core.list import SList -import random -import matplotlib.pyplot as plt from pyske.core.util.point import Point @@ -65,6 +63,17 @@ def define_centroids(clusters): centroids.append(get_new_centroid(cluster)) return centroids +def index_max_value(input_list: List): + """ + Return the index of the maximum value + """ + index_max = 0 + max_dist = 0 + for i in range(len(input_list.to_seq())): + if input_list.to_seq()[i] > max_dist: + max_dist = input_list.to_seq()[i] + index_max = i + return index_max def k_means_init(input_list: List, n_cluster: int): """ @@ -79,14 +88,14 @@ def k_means_init(input_list: List, n_cluster: int): c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)] centroids.append(c1) - for c in range(n_cluster - 1): + for _ in range(n_cluster - 1): dist = input_list.map(lambda x: x.distance(centroids[0])) for i in range(1, len(centroids)): - temp_dist = input_list.map(lambda x: x.distance(centroids[i])) - dist = dist.map2(lambda x, y: min(x, y), temp_dist) + temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index])) + dist = dist.map2(lambda x, y: y if y < x else x, temp_dist) - index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())] - next_centroid = input_list.to_seq()[index_max[0]] + index_max = index_max_value(dist) + next_centroid = input_list.to_seq()[index_max] centroids.append(next_centroid) return centroids @@ -100,13 +109,13 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10): :param n_cluster: number of cluster :param max_iter: number of iteration - :return: 2 dimension list of points + :return: 2 dimensions list of points """ + centroids = k_means_init(input_list, n_cluster) j = 0 while j < max_iter: clusters = make_clusters(input_list, centroids) - centroids = define_centroids(clusters) # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') j = j + 1 diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 0367361..f08a918 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -2,12 +2,12 @@ Execution of k_means """ import gc +import argparse +import matplotlib.pyplot as plt from pyske.core import Timing from pyske.examples.list.k_means import k_means from pyske.examples.list import util -import matplotlib.pyplot as plt -import argparse PAR = 'parallel' SEQ = 'sequential' @@ -15,7 +15,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000) + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) parser.add_argument("--iter", help="number of iterations", type=int, default=30) parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) From 4c7d8727d5ea23062681d352e0dc49987c64bf5a Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 25 May 2021 15:47:05 +0200 Subject: [PATCH 04/34] Default constructor, addition between two points, multiplication by scalar and by a point --- pyske/core/util/point.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py index 0a5d2fc..cb55684 100644 --- a/pyske/core/util/point.py +++ b/pyske/core/util/point.py @@ -8,7 +8,7 @@ class Point(object): """A class to represent a point""" - def __init__(self, x, y): + def __init__(self, x=0, y=0): self.__x = x self.__y = y @@ -20,6 +20,38 @@ def __eq__(self, other): return self.__x == other.x and self.__y == other.__y return False + def __add__(self, other): + """ + Addition of two points + + Examples:: + + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1 + p2 + (10, 12) + """ + if isinstance(other, Point): + return Point(self.x + other.x, self.y + other.y) + + def __mul__(self, other): + """ + Multiplication by a point or a scalar + + Examples:: + + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1 * 5 + (25, 25) + >>> p1 * p2 + (25, 35) + """ + if isinstance(other, Point): + return Point(self.x * other.x, self.y * other.y) + if isinstance(other, int) or isinstance(other, float): + return Point(self.x * other, self.y * other) + @property def x(self): """X getter""" From ee146229998184612db64bc0cb01ae1160fe0b80 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 25 May 2021 16:13:40 +0200 Subject: [PATCH 05/34] parrallel optimization in k_means_init --- pyske/examples/list/k_means.py | 41 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 4b9ed26..d7ab21f 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -2,20 +2,21 @@ K-Means """ import random +from typing import Callable, Tuple from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point import Point -def cluster_index(p, centroids): +def cluster_index(point, centroids): """ Get the centroid index of the closest centroid """ min_dist = float("inf") p_centroid = centroids[0] for c in centroids: - if p.distance(c) < min_dist: - min_dist = p.distance(c) + if point.distance(c) < min_dist: + min_dist = point.distance(c) p_centroid = c return centroids.index(p_centroid) @@ -54,7 +55,7 @@ def get_new_centroid(cluster): return new_centroid -def define_centroids(clusters): +def define_centroids(clusters): # Pas utile car tuple ( num_cluster, point ) """ Redefine centroids of clusters """ @@ -63,17 +64,16 @@ def define_centroids(clusters): centroids.append(get_new_centroid(cluster)) return centroids -def index_max_value(input_list: List): + +def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): """ - Return the index of the maximum value + Return the tuple with the maximum distance """ - index_max = 0 - max_dist = 0 - for i in range(len(input_list.to_seq())): - if input_list.to_seq()[i] > max_dist: - max_dist = input_list.to_seq()[i] - index_max = i - return index_max + if pair_a[1] > pair_b[1]: + return pair_a + else: + return pair_b + def k_means_init(input_list: List, n_cluster: int): """ @@ -94,30 +94,31 @@ def k_means_init(input_list: List, n_cluster: int): temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index])) dist = dist.map2(lambda x, y: y if y < x else x, temp_dist) - index_max = index_max_value(dist) - next_centroid = input_list.to_seq()[index_max] + zip_list = input_list.zip(dist) + next_centroid = zip_list.reduce(max_dist)[0] centroids.append(next_centroid) return centroids -def k_means(input_list: List, n_cluster: int, max_iter: int = 10): +def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10): """ K-means algorithm on a list of point :param input_list: a list of point :param n_cluster: number of cluster :param max_iter: number of iteration + :param init_function: a function that initialize centroids :return: 2 dimensions list of points """ - centroids = k_means_init(input_list, n_cluster) + centroids = init_function(input_list, n_cluster) j = 0 while j < max_iter: - clusters = make_clusters(input_list, centroids) - centroids = define_centroids(clusters) - # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red') + clusters = make_clusters(input_list, centroids) # assign_cluster + centroids = define_centroids(clusters) # update_centroids j = j + 1 return clusters From 057457c98096184e89eac1af93d6ec433eba9428 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 26 May 2021 17:04:36 +0200 Subject: [PATCH 06/34] fix: init instead of from_seq --- pyske/examples/list/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 965067f..d6517fa 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -102,10 +102,11 @@ def rand_point_list(cls, size, clusters): :return: a list of the given class """ from pyske.core.util.point import Point + print(clusters) x, y_true = make_blobs(n_samples=size, centers=clusters) x = x.tolist() x = list(map(lambda y: Point(y[0], y[1]), x)) - return cls.from_seq(x) + return cls.init(lambda i: x[i], size) def print_experiment(result, timing, execute, iteration=None): From 87000f9a8ec99b89a26c528e8ba3184838591f38 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 26 May 2021 17:05:11 +0200 Subject: [PATCH 07/34] Division of a point --- pyske/core/util/point.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py index cb55684..b4c43f0 100644 --- a/pyske/core/util/point.py +++ b/pyske/core/util/point.py @@ -52,6 +52,10 @@ def __mul__(self, other): if isinstance(other, int) or isinstance(other, float): return Point(self.x * other, self.y * other) + def __truediv__(self, other): + if isinstance(other, int): + return Point(self.x / other, self.y / other) + @property def x(self): """X getter""" From 0d9b023430548a3f91a804dd060c465f686e68df Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 26 May 2021 17:06:15 +0200 Subject: [PATCH 08/34] parallel optimization, assign_cluster and update_cluster --- pyske/examples/list/k_means.py | 62 +++++++++++------------------ pyske/examples/list/k_means_main.py | 15 ++++--- 2 files changed, 30 insertions(+), 47 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index d7ab21f..89cf643 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -18,51 +18,32 @@ def cluster_index(point, centroids): if point.distance(c) < min_dist: min_dist = point.distance(c) p_centroid = c - return centroids.index(p_centroid) + return point, centroids.index(p_centroid) -def make_clusters(input_list, centroids): +def assign_clusters(input_list, centroids): """ - Append all points to the cluster with the minimal distance from its centroid + Assign to each point to a cluster """ - clusters = [[] for c in centroids] - for p in input_list.to_seq(): - index = cluster_index(p, centroids) - clusters[index].append(p) - return clusters - -def coords_average(cluster): - """ - Get the coordinates average of all points in one cluster - """ - x_average = sum([p.x for p in cluster]) / len(cluster) - y_average = sum([p.y for p in cluster]) / len(cluster) - return Point(x_average, y_average) + return input_list.map(lambda x: cluster_index(x, centroids)) -def get_new_centroid(cluster): +def update_centroids(clusters, centroids): """ - Get closest point to average of point coordinates + Update centroids of clusters """ - average_point = coords_average(cluster) - min_dist = float("inf") - new_centroid = cluster[0] - for p in cluster: - if p.distance(average_point) < min_dist: - min_dist = p.distance(average_point) - new_centroid = p - return new_centroid - - -def define_centroids(clusters): # Pas utile car tuple ( num_cluster, point ) - """ - Redefine centroids of clusters - """ - centroids = [] - for cluster in clusters: - centroids.append(get_new_centroid(cluster)) - return centroids + new_centroids = SList([]) + i = 0 + while i < len(centroids): + cluster = clusters.filter(lambda x: x[1] == i) + sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y) + average_point = sum_cluster / cluster.length() + centroid = clusters.reduce( + lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0] + new_centroids.append(centroid) + i += 1 + return new_centroids def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): @@ -113,12 +94,15 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus :return: 2 dimensions list of points """ - centroids = init_function(input_list, n_cluster) + j = 0 + while j < max_iter: - clusters = make_clusters(input_list, centroids) # assign_cluster - centroids = define_centroids(clusters) # update_centroids + clusters = assign_clusters(input_list, centroids) + + centroids = update_centroids(clusters, centroids) + j = j + 1 return clusters diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index f08a918..0a3b171 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -1,13 +1,13 @@ """ Execution of k_means """ -import gc import argparse import matplotlib.pyplot as plt from pyske.core import Timing -from pyske.examples.list.k_means import k_means +from pyske.examples.list.k_means import k_means, k_means_init from pyske.examples.list import util +from pyske.core.support import parallel PAR = 'parallel' SEQ = 'sequential' @@ -32,13 +32,12 @@ execute = util.select_execute(choice) example = k_means execute(lambda: print('Version:\t', choice)) - gc.disable() for iteration in range(1, 1 + num_iter): timing.start() - result = example(input_list, clusters) + result = example(input_list, k_means_init, clusters) timing.stop() - gc.collect() util.print_experiment("", timing.get(), execute, iteration) - for i in range(len(result)): - plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) - plt.show() + #if parallel.PID == 0: + # for i in range((len(result))): + # plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) + # plt.show() From da4a4d6a5a62b4de6e5434b72c9862e89be67d4d Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 27 May 2021 14:34:55 +0200 Subject: [PATCH 09/34] pylinting, typing --- pyske/examples/list/k_means.py | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 89cf643..f5da8a0 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -3,33 +3,33 @@ """ import random from typing import Callable, Tuple + from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point import Point -def cluster_index(point, centroids): +def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]: """ Get the centroid index of the closest centroid """ min_dist = float("inf") p_centroid = centroids[0] - for c in centroids: - if point.distance(c) < min_dist: - min_dist = point.distance(c) - p_centroid = c + for centroid in centroids: + if point.distance(centroid) < min_dist: + min_dist = point.distance(centroid) + p_centroid = centroid return point, centroids.index(p_centroid) -def assign_clusters(input_list, centroids): +def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]: """ - Assign to each point to a cluster + Assign each point to a cluster """ - return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters, centroids): +def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): """ Update centroids of clusters """ @@ -52,11 +52,10 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): """ if pair_a[1] > pair_b[1]: return pair_a - else: - return pair_b + return pair_b -def k_means_init(input_list: List, n_cluster: int): +def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]: """ K-means++ initialisation @@ -66,8 +65,8 @@ def k_means_init(input_list: List, n_cluster: int): :return: n_cluster centroids """ centroids = SList([]) - c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)] - centroids.append(c1) + first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)] + centroids.append(first_centroid) for _ in range(n_cluster - 1): dist = input_list.map(lambda x: x.distance(centroids[0])) @@ -82,8 +81,8 @@ def k_means_init(input_list: List, n_cluster: int): return centroids -def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10): +def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10) -> SList[SList[Point]]: """ K-means algorithm on a list of point @@ -97,7 +96,6 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus centroids = init_function(input_list, n_cluster) j = 0 - while j < max_iter: clusters = assign_clusters(input_list, centroids) @@ -105,4 +103,9 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus j = j + 1 - return clusters + clusters2d = SList([]) + for i in range(len(centroids)): + clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster) + .map(lambda x: x[0]).to_seq() + ) + return clusters2d From 9f1e0fcead895f6aa292a0e497f56a99506e2306 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 27 May 2021 14:39:37 +0200 Subject: [PATCH 10/34] FIX: bad list initialization parallel list --- pyske/examples/list/util.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index d6517fa..703dfbb 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -102,11 +102,13 @@ def rand_point_list(cls, size, clusters): :return: a list of the given class """ from pyske.core.util.point import Point - print(clusters) - x, y_true = make_blobs(n_samples=size, centers=clusters) + from pyske.core import Distribution + + x, _ = make_blobs(n_samples=size, centers=clusters) x = x.tolist() x = list(map(lambda y: Point(y[0], y[1]), x)) - return cls.init(lambda i: x[i], size) + distr = Distribution().balanced(size) + return cls.from_seq(x).distribute(distr) def print_experiment(result, timing, execute, iteration=None): From a8d0385a486216141b46fb267616ca56fb1d4816 Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Thu, 27 May 2021 20:39:52 +0200 Subject: [PATCH 11/34] Changing sample type from custom type "Point" to Tuple --- pyske/examples/list/k_means.py | 43 ++++++++++++++++++++++------- pyske/examples/list/k_means_main.py | 3 +- pyske/examples/list/util.py | 17 ++++++++++++ 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 89cf643..dbddc8e 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -1,24 +1,47 @@ """ K-Means """ +import operator import random +from math import sqrt from typing import Callable, Tuple from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point import Point -def cluster_index(point, centroids): +def distance2D(sample_1, sample_2): + """ + return distance between 2d sample. + + Examples:: + + >>> from pyske.core.util.point import Point + >>> p1 = Point(5,5) + >>> p2 = Point(5,7) + >>> p1.distance(p2) + 2.0 + + :param other: a point + :return: distance from other point + + """ + dx = sample_1[0] - sample_2[0] + dy = sample_1[1] - sample_2[1] + return sqrt(dx ** 2 + dy ** 2) + + +def cluster_index(sample, centroids): """ Get the centroid index of the closest centroid """ min_dist = float("inf") p_centroid = centroids[0] for c in centroids: - if point.distance(c) < min_dist: - min_dist = point.distance(c) + if distance2D(sample, c) < min_dist: + min_dist = distance2D(sample, c) p_centroid = c - return point, centroids.index(p_centroid) + return sample, centroids.index(p_centroid) def assign_clusters(input_list, centroids): @@ -37,16 +60,16 @@ def update_centroids(clusters, centroids): i = 0 while i < len(centroids): cluster = clusters.filter(lambda x: x[1] == i) - sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y) - average_point = sum_cluster / cluster.length() + sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda a, b: tuple(map(operator.add, a, b))) + average_point = [x/clusters.length() for x in sum_cluster] centroid = clusters.reduce( - lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0] + lambda x, y: x if distance2D(average_point, x[0]) < distance2D(average_point, y[0]) else y)[0] new_centroids.append(centroid) i += 1 return new_centroids -def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): +def max_dist(pair_a, pair_b): """ Return the tuple with the maximum distance """ @@ -70,9 +93,9 @@ def k_means_init(input_list: List, n_cluster: int): centroids.append(c1) for _ in range(n_cluster - 1): - dist = input_list.map(lambda x: x.distance(centroids[0])) + dist = input_list.map(lambda sample: distance2D(sample, centroids[0])) for i in range(1, len(centroids)): - temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index])) + temp_dist = input_list.map(lambda sample, index=i: distance2D(sample, centroids[index])) dist = dist.map2(lambda x, y: y if y < x else x, temp_dist) zip_list = input_list.zip(dist) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 0a3b171..65febba 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,7 +27,8 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_list(pyske_list_class, size, clusters) + #input_list = util.rand_point_list(pyske_list_class, size, clusters) + input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index d6517fa..2a7327b 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -109,6 +109,23 @@ def rand_point_list(cls, size, clusters): return cls.init(lambda i: x[i], size) +def rand_2D_sample_list(cls, size , clusters): + """ + Return a randomly generated list of 2D sample. + + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :param clusters: number of clusters + :return: a list of the given class + """ + print(clusters) + x, y_true = make_blobs(n_samples=size, centers=clusters) + x = x.tolist() + x = list(map(lambda y: (y[0], y[1]), x)) + return cls.init(lambda i: x[i], size) + + def print_experiment(result, timing, execute, iteration=None): """ Print the result and timing of the experiment. From c617aade25a15c196258ad3464930c6ebf67b37d Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:16:39 +0200 Subject: [PATCH 12/34] Add point_interface and changing the class Point to Point_2D --- pyske/core/util/{point.py => point_2D.py} | 35 ++++++++++++----------- pyske/core/util/point_Interface.py | 25 ++++++++++++++++ pyske/examples/list/k_means.py | 16 +++++------ pyske/examples/list/util.py | 4 +-- 4 files changed, 53 insertions(+), 27 deletions(-) rename pyske/core/util/{point.py => point_2D.py} (61%) create mode 100644 pyske/core/util/point_Interface.py diff --git a/pyske/core/util/point.py b/pyske/core/util/point_2D.py similarity index 61% rename from pyske/core/util/point.py rename to pyske/core/util/point_2D.py index b4c43f0..6a0dfd5 100644 --- a/pyske/core/util/point.py +++ b/pyske/core/util/point_2D.py @@ -3,10 +3,11 @@ """ from math import sqrt +from pyske.core.util.point_Interface import Point_Interface -class Point(object): - """A class to represent a point""" +class Point_2D(Point_Interface): + """A class to represent a 2D point""" def __init__(self, x=0, y=0): self.__x = x @@ -16,7 +17,7 @@ def __repr__(self): return "(%s, %s)" % (self.__x, self.__y) def __eq__(self, other): - if isinstance(other, Point): + if isinstance(other, Point_2D): return self.__x == other.x and self.__y == other.__y return False @@ -26,13 +27,13 @@ def __add__(self, other): Examples:: - >>> p1 = Point(5,5) - >>> p2 = Point(5,7) + >>> p1 = Point_2D(5,5) + >>> p2 = Point_2D(5,7) >>> p1 + p2 (10, 12) """ - if isinstance(other, Point): - return Point(self.x + other.x, self.y + other.y) + if isinstance(other, Point_2D): + return Point_2D(self.x + other.x, self.y + other.y) def __mul__(self, other): """ @@ -40,21 +41,21 @@ def __mul__(self, other): Examples:: - >>> p1 = Point(5,5) - >>> p2 = Point(5,7) + >>> p1 = Point_2D(5,5) + >>> p2 = Point_2D(5,7) >>> p1 * 5 (25, 25) >>> p1 * p2 (25, 35) """ - if isinstance(other, Point): - return Point(self.x * other.x, self.y * other.y) + if isinstance(other, Point_2D): + return Point_2D(self.x * other.x, self.y * other.y) if isinstance(other, int) or isinstance(other, float): - return Point(self.x * other, self.y * other) + return Point_2D(self.x * other, self.y * other) def __truediv__(self, other): if isinstance(other, int): - return Point(self.x / other, self.y / other) + return Point_2D(self.x / other, self.y / other) @property def x(self): @@ -66,15 +67,15 @@ def y(self): """Y getter""" return self.__y - def distance(self, other: 'Point'): + def distance(self, other: 'Point_2D'): """ Returns the distance from another point. Examples:: - >>> from pyske.core.util.point import Point - >>> p1 = Point(5,5) - >>> p2 = Point(5,7) + >>> from pyske.core.util.point_2D import Point_2D + >>> p1 = Point_2D(5,5) + >>> p2 = Point_2D(5,7) >>> p1.distance(p2) 2.0 diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py new file mode 100644 index 0000000..aa3be5d --- /dev/null +++ b/pyske/core/util/point_Interface.py @@ -0,0 +1,25 @@ +""" +A module to represent a point +""" + + +class Point_Interface: + """Point interface to represent point of n dimensions""" + + def __repr__(self): + pass + + def __eq__(self, other): + pass + + def __add__(self, other): + pass + + def __mul__(self, other): + pass + + def __truediv__(self, other): + pass + + def distance(self, other): + pass diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index f5da8a0..42af105 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -6,10 +6,10 @@ from pyske.core.interface import List from pyske.core.list import SList -from pyske.core.util.point import Point +from pyske.core.util.point_2D import Point_2D -def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]: +def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]: """ Get the centroid index of the closest centroid """ @@ -22,14 +22,14 @@ def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]: return point, centroids.index(p_centroid) -def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]: +def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]: """ Assign each point to a cluster """ return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): +def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]): """ Update centroids of clusters """ @@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point] return new_centroids -def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): +def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]): """ Return the tuple with the maximum distance """ @@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]): return pair_b -def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]: +def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]: """ K-means++ initialisation @@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]: return centroids -def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10) -> SList[SList[Point]]: +def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10) -> SList[SList[Point_2D]]: """ K-means algorithm on a list of point diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 703dfbb..e9e9e3c 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -101,12 +101,12 @@ def rand_point_list(cls, size, clusters): :param clusters: number of clusters :return: a list of the given class """ - from pyske.core.util.point import Point + from pyske.core.util.point_2D import Point_2D from pyske.core import Distribution x, _ = make_blobs(n_samples=size, centers=clusters) x = x.tolist() - x = list(map(lambda y: Point(y[0], y[1]), x)) + x = list(map(lambda y: Point_2D(y[0], y[1]), x)) distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) From ff39d0b502325d502a00fb9dbd26b2c33bbfdf82 Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:22:12 +0200 Subject: [PATCH 13/34] FIX: input_list type form Tuple to Point_2D --- pyske/examples/list/k_means_main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 65febba..0a3b171 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,8 +27,7 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - #input_list = util.rand_point_list(pyske_list_class, size, clusters) - input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters) + input_list = util.rand_point_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means From 32f557ec6cb0fa29af29e5e097b620498d8448ee Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:38:04 +0200 Subject: [PATCH 14/34] Add class point_3D.py --- pyske/core/util/point_2D.py | 4 +- pyske/core/util/point_3D.py | 79 +++++++++++++++++++++++++++++ pyske/examples/list/k_means_main.py | 2 +- pyske/examples/list/util.py | 26 ++++------ 4 files changed, 92 insertions(+), 19 deletions(-) create mode 100644 pyske/core/util/point_3D.py diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py index 6a0dfd5..f6f5f7f 100644 --- a/pyske/core/util/point_2D.py +++ b/pyske/core/util/point_2D.py @@ -1,5 +1,5 @@ """ -A module to represent a point +A module to represent a 2D point """ from math import sqrt @@ -18,7 +18,7 @@ def __repr__(self): def __eq__(self, other): if isinstance(other, Point_2D): - return self.__x == other.x and self.__y == other.__y + return self.__x == other.__x and self.__y == other.__y return False def __add__(self, other): diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py new file mode 100644 index 0000000..9cde20f --- /dev/null +++ b/pyske/core/util/point_3D.py @@ -0,0 +1,79 @@ +""" +A module to represent a 3D point +""" + +from math import sqrt +from pyske.core.util.point_Interface import Point_Interface + + +class Point_3D(Point_Interface): + """A class to represent a 3D point""" + + def __init__(self, x=0, y=0, z=0): + self.__x = x + self.__y = y + self.__z = z + + def __repr__(self): + return "(%s, %s, %s)" % (self.__x, self.__y, self.__z) + + def __eq__(self, other): + if isinstance(other, Point_3D): + return self.__x == other.__x and self.__y == other.__y and self.__z == other.__z + return False + + def __add__(self, other): + """ + Addition of two points + + Examples:: + + >>> p1 = Point_3D(5,5,2) + >>> p2 = Point_3D(5,7,1) + >>> p1 + p2 + (10, 12, 3) + """ + if isinstance(other, Point_3D): + return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z) + + def __mul__(self, other): + pass + + def __truediv__(self, other): + pass + + @property + def x(self): + """X getter""" + return self.__x + + @property + def y(self): + """Y getter""" + return self.__y + + @property + def z(self): + """Z getter""" + return self.z + + def distance(self, other): + """ + Returns the distance from another 3D point. + + Examples:: + + >>> from pyske.core.util.point_2D import Point_2D + >>> p1 = Point_3D(5,5,2) + >>> p2 = Point_3D(5,7,1) + >>> p1.distance(p2) + 2.24 + + :param other: a point + :return: distance from other point + + """ + dx = self.__x - other.x + dy = self.__y - other.y + dz = self.__x - other.z + return sqrt(dx ** 2 + dy ** 2 + dz ** 2) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 0a3b171..b6b20fa 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,7 +27,7 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_list(pyske_list_class, size, clusters) + input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) timing = Timing() execute = util.select_execute(choice) example = k_means diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index bdb68a7..648fe9d 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -91,9 +91,9 @@ def rand_list(cls, size): return cls.init(lambda _: float(random.randint(-100, 100)), size) -def rand_point_list(cls, size, clusters): +def rand_point_2D_list(cls, size, clusters): """ - Return a randomly generated list of points. + Return a randomly generated list of 2D points. :param cls: the class of the generated list. :param size: a positive number @@ -110,22 +110,16 @@ def rand_point_list(cls, size, clusters): distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) - -def rand_2D_sample_list(cls, size , clusters): +def rand_point_3D_list(cls, size, clusters): """ - Return a randomly generated list of 2D sample. + Return a randomly generated list of 3D points. - :param cls: the class of the generated list. - :param size: a positive number - Precondition: size >= 0 - :param clusters: number of clusters - :return: a list of the given class - """ - print(clusters) - x, y_true = make_blobs(n_samples=size, centers=clusters) - x = x.tolist() - x = list(map(lambda y: (y[0], y[1]), x)) - return cls.init(lambda i: x[i], size) + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :param clusters: number of clusters + :return: a list of the given class + """ def print_experiment(result, timing, execute, iteration=None): From 7e6966d8534c82393966b83456aa3974822c4c61 Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 14:42:05 +0200 Subject: [PATCH 15/34] rand_point_2D_list / rand_point_3D_list --- pyske/examples/list/util.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 648fe9d..8124598 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -110,16 +110,25 @@ def rand_point_2D_list(cls, size, clusters): distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) + def rand_point_3D_list(cls, size, clusters): """ - Return a randomly generated list of 3D points. + Return a randomly generated list of 2D points. + + :param cls: the class of the generated list. + :param size: a positive number + Precondition: size >= 0 + :param clusters: number of clusters + :return: a list of the given class + """ + from pyske.core.util.point_3D import Point_3D + from pyske.core import Distribution - :param cls: the class of the generated list. - :param size: a positive number - Precondition: size >= 0 - :param clusters: number of clusters - :return: a list of the given class - """ + x, _ = make_blobs(n_samples=size, centers=clusters) + x = x.tolist() + x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x)) + distr = Distribution().balanced(size) + return cls.from_seq(x).distribute(distr) def print_experiment(result, timing, execute, iteration=None): From 9b147eba58a6549ee59202f2ad5060918133674c Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 15:30:48 +0200 Subject: [PATCH 16/34] Point_3D update --- pyske/core/util/point_3D.py | 22 +++++++++++++++++++--- pyske/examples/list/k_means_main.py | 7 +++++-- pyske/examples/list/util.py | 2 +- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py index 9cde20f..c6e1aa5 100644 --- a/pyske/core/util/point_3D.py +++ b/pyske/core/util/point_3D.py @@ -37,10 +37,26 @@ def __add__(self, other): return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z) def __mul__(self, other): - pass + """ + Multiplication by a point or a scalar + + Examples:: + + >>> p1 = Point_3D(5,5,2) + >>> p2 = Point_3D(5,7,1) + >>> p1 * 5 + (25, 25, 10) + >>> p1 * p2 + (25, 35, 2) + """ + if isinstance(other, Point_3D): + return Point_3D(self.x * other.x, self.y * other.y, self.z * other.z) + if isinstance(other, int) or isinstance(other, float): + return Point_3D(self.x * other, self.y * other, self.z * other.z) def __truediv__(self, other): - pass + if isinstance(other, int): + return Point_3D(self.x / other, self.y / other, self.z / other) @property def x(self): @@ -55,7 +71,7 @@ def y(self): @property def z(self): """Z getter""" - return self.z + return self.__z def distance(self, other): """ diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index b6b20fa..29eb81d 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -27,7 +27,10 @@ clusters = args.clusters pyske_list_class = util.select_pyske_list(choice) - input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) + + # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) + input_list = util.rand_point_3D_list(pyske_list_class, size, clusters) + timing = Timing() execute = util.select_execute(choice) example = k_means @@ -37,7 +40,7 @@ result = example(input_list, k_means_init, clusters) timing.stop() util.print_experiment("", timing.get(), execute, iteration) - #if parallel.PID == 0: + # if parallel.PID == 0: # for i in range((len(result))): # plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) # plt.show() diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 8124598..5279386 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -124,7 +124,7 @@ def rand_point_3D_list(cls, size, clusters): from pyske.core.util.point_3D import Point_3D from pyske.core import Distribution - x, _ = make_blobs(n_samples=size, centers=clusters) + x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3) x = x.tolist() x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x)) distr = Distribution().balanced(size) From 5b89f49ec00fb655c9dd5794c5cd75a731b7644f Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Fri, 28 May 2021 15:43:55 +0200 Subject: [PATCH 17/34] Typing Point_2D -> Point_Interface --- pyske/examples/list/k_means.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 42af105..52b4e2b 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -6,10 +6,10 @@ from pyske.core.interface import List from pyske.core.list import SList -from pyske.core.util.point_2D import Point_2D +from pyske.core.util.point_Interface import Point_Interface -def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]: +def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]: """ Get the centroid index of the closest centroid """ @@ -22,14 +22,14 @@ def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D return point, centroids.index(p_centroid) -def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]: +def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]: """ Assign each point to a cluster """ return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]): +def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]): """ Update centroids of clusters """ @@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Poin return new_centroids -def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]): +def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interface, float]): """ Return the tuple with the maximum distance """ @@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]): return pair_b -def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]: +def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]: """ K-means++ initialisation @@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]: return centroids -def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10) -> SList[SList[Point_2D]]: +def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int, + max_iter: int = 10) -> SList[SList[Point_Interface]]: """ K-means algorithm on a list of point From eb792d573bd6ccd067c78620e2236a0972c83fc7 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 28 May 2021 17:11:19 +0200 Subject: [PATCH 18/34] optimization update_centroids --- pyske/examples/list/k_means.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index f5da8a0..cc28b11 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -29,20 +29,18 @@ def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tu return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): +def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]): """ Update centroids of clusters """ - new_centroids = SList([]) - i = 0 - while i < len(centroids): - cluster = clusters.filter(lambda x: x[1] == i) - sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y) - average_point = sum_cluster / cluster.length() - centroid = clusters.reduce( - lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0] - new_centroids.append(centroid) - i += 1 + + new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids)) + + new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1), + lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else ( + z if y[1] != i else y))) + new_centroids = new_centroids.map(lambda x: x[0] / x[2]) + return new_centroids From 467b33bf8134354324827be18c3c9a96d707935c Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 28 May 2021 17:37:05 +0200 Subject: [PATCH 19/34] refactoring because of new point implementation --- pyske/examples/list/k_means.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 7a47e0b..6d1ac4e 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -34,7 +34,7 @@ def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SLi Update centroids of clusters """ - new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids)) + new_centroids = SList.init(lambda _: (_, _, _), len(centroids)) new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1), lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else ( From 8c2cf82d97f2f2462b60b3b6502326dd72958f25 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 31 May 2021 11:16:25 +0200 Subject: [PATCH 20/34] use of parallelism random choice first centroid --- pyske/examples/list/k_means.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 6d1ac4e..1b224f8 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -7,6 +7,7 @@ from pyske.core.interface import List from pyske.core.list import SList from pyske.core.util.point_Interface import Point_Interface +from pyske.core.util.par import procs def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]: @@ -63,7 +64,9 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi :return: n_cluster centroids """ centroids = SList([]) - first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)] + first_centroid = input_list.get_partition()\ + .map(lambda l: l[random.randint(0, l.length() - 1)])\ + .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])] centroids.append(first_centroid) for _ in range(n_cluster - 1): From 27a5039d749c16598b4fb8aa8dbbfb5191cc1ddd Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 1 Jun 2021 17:16:47 +0200 Subject: [PATCH 21/34] add point dimensions in k-means-main's options --- pyske/examples/list/k_means_main.py | 6 ++-- pyske/examples/list/util.py | 45 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 29eb81d..163f761 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -19,17 +19,17 @@ parser.add_argument("--iter", help="number of iterations", type=int, default=30) parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) + parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) args = parser.parse_args() size = args.size num_iter = args.iter choice = args.data clusters = args.clusters + dimensions = args.dimensions pyske_list_class = util.select_pyske_list(choice) - - # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters) - input_list = util.rand_point_3D_list(pyske_list_class, size, clusters) + input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions) timing = Timing() execute = util.select_execute(choice) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 5279386..ef79d51 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -3,6 +3,7 @@ """ from sklearn.datasets import make_blobs +from pyske.core import Distribution PAR = 'parallel' SEQ = 'sequential' @@ -90,43 +91,39 @@ def rand_list(cls, size): import random return cls.init(lambda _: float(random.randint(-100, 100)), size) - -def rand_point_2D_list(cls, size, clusters): +def select_point_dimensions(dimensions): """ - Return a randomly generated list of 2D points. + Return a PySke list class. - :param cls: the class of the generated list. - :param size: a positive number - Precondition: size >= 0 - :param clusters: number of clusters - :return: a list of the given class + :param dimensions: point dimensions + Precondition: dimensions >= 2 + :return: a Point """ - from pyske.core.util.point_2D import Point_2D - from pyske.core import Distribution - - x, _ = make_blobs(n_samples=size, centers=clusters) - x = x.tolist() - x = list(map(lambda y: Point_2D(y[0], y[1]), x)) - distr = Distribution().balanced(size) - return cls.from_seq(x).distribute(distr) - + # pylint: disable=import-outside-toplevel + if dimensions == 2: + from pyske.core.util.point_2D import Point_2D as PointClass + elif dimensions == 3: + from pyske.core.util.point_3D import Point_3D as PointClass + else: + from pyske.core.util.point_2D import Point_2D as PointClass + return PointClass -def rand_point_3D_list(cls, size, clusters): +def rand_point_list(cls, size, clusters, dimensions): """ - Return a randomly generated list of 2D points. + Return a randomly generated list of points. :param cls: the class of the generated list. :param size: a positive number Precondition: size >= 0 :param clusters: number of clusters + :param dimensions: point dimensions + Precondition: dimensions >= 2 :return: a list of the given class """ - from pyske.core.util.point_3D import Point_3D - from pyske.core import Distribution - - x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3) + x, _ = make_blobs(n_samples=size, centers=clusters, n_features=dimensions) x = x.tolist() - x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x)) + pointclass = select_point_dimensions(dimensions) + x = list(map(lambda y: pointclass(*y), x)) distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) From 84f2daaa554b88110374334e5c98ce5c9421bd15 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 2 Jun 2021 11:42:41 +0200 Subject: [PATCH 22/34] interface convention --- pyske/core/util/point_Interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py index aa3be5d..6196c47 100644 --- a/pyske/core/util/point_Interface.py +++ b/pyske/core/util/point_Interface.py @@ -1,9 +1,9 @@ """ A module to represent a point """ +from abc import ABC - -class Point_Interface: +class Point_Interface(ABC): """Point interface to represent point of n dimensions""" def __repr__(self): From 82b7a7decbd6dcfffa6ca5a6709940948c173a08 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 4 Jun 2021 14:55:56 +0200 Subject: [PATCH 23/34] parallel optimization update_centroids --- pyske/examples/list/k_means.py | 66 +++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index 1b224f8..f238ffa 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -10,7 +10,8 @@ from pyske.core.util.par import procs -def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]: +def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> \ + Tuple[Point_Interface, int]: """ Get the centroid index of the closest centroid """ @@ -23,24 +24,37 @@ def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> return point, centroids.index(p_centroid) -def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]: +def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> \ + List[Tuple[Point_Interface, int]]: """ Assign each point to a cluster """ return input_list.map(lambda x: cluster_index(x, centroids)) -def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]): +def update_centroids(clusters: List[Tuple[Point_Interface, int]], + centroids: SList[Point_Interface]): """ Update centroids of clusters """ - new_centroids = SList.init(lambda _: (_, _, _), len(centroids)) - - new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1), - lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else ( - z if y[1] != i else y))) - new_centroids = new_centroids.map(lambda x: x[0] / x[2]) + def centroids_list_update(list_to_update, item): + if isinstance(item, SList): + list_to_update = list_to_update.map2(lambda a_pair, b_pair: (a_pair[0] + b_pair[0], + a_pair[1] + b_pair[1]), + item) + else: + index = item[1] + point = item[0] + list_to_update[index] = (list_to_update[index][0] + point, + list_to_update[index][1] + 1) + return list_to_update + + point_class = type(centroids[0]) + neutral_list = SList.init(lambda _: (point_class(), 0), len(centroids)) + new_centroids = clusters.reduce(lambda a_item, b_item: + centroids_list_update(a_item, b_item), neutral_list) + new_centroids = new_centroids.map(lambda x: x[0] / x[1]) return new_centroids @@ -58,15 +72,15 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi """ K-means++ initialisation - :param input_list: a list of point - :param n_cluster: number of cluster + :param input_list: a list of points + :param n_cluster: number of clusters - :return: n_cluster centroids + :return: list of centroids """ centroids = SList([]) - first_centroid = input_list.get_partition()\ - .map(lambda l: l[random.randint(0, l.length() - 1)])\ - .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])] + first_centroid = input_list.get_partition() \ + .map(lambda l: l[random.randint(0, l.length() - 1)]) \ + .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])] centroids.append(first_centroid) for _ in range(n_cluster - 1): @@ -82,17 +96,18 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi return centroids -def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int, - max_iter: int = 10) -> SList[SList[Point_Interface]]: +def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], + n_cluster: int, + max_iter: int = 10) -> List[Tuple[Point_Interface, int]]: """ - K-means algorithm on a list of point + K-means algorithm on a list of points - :param input_list: a list of point - :param n_cluster: number of cluster - :param max_iter: number of iteration + :param input_list: a list of points + :param n_cluster: number of clusters + :param max_iter: number of iterations :param init_function: a function that initialize centroids - :return: 2 dimensions list of points + :return: a list of tuples with the point and his cluster index """ centroids = init_function(input_list, n_cluster) @@ -104,9 +119,4 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in j = j + 1 - clusters2d = SList([]) - for i in range(len(centroids)): - clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster) - .map(lambda x: x[0]).to_seq() - ) - return clusters2d + return clusters From 08a4dd6bde88d90e1329ae6ce4387f5fb35c9bdd Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Fri, 4 Jun 2021 15:11:08 +0200 Subject: [PATCH 24/34] adding option to show clusters graph of 2D points --- pyske/examples/list/k_means_main.py | 11 +++++------ pyske/examples/list/util.py | 21 +++++++++++++++++---- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 163f761..41fddc4 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -2,12 +2,10 @@ Execution of k_means """ import argparse -import matplotlib.pyplot as plt from pyske.core import Timing from pyske.examples.list.k_means import k_means, k_means_init from pyske.examples.list import util -from pyske.core.support import parallel PAR = 'parallel' SEQ = 'sequential' @@ -20,6 +18,8 @@ parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) + parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", + action="store_true") args = parser.parse_args() size = args.size @@ -27,6 +27,7 @@ choice = args.data clusters = args.clusters dimensions = args.dimensions + show_clusters = args.show_clusters pyske_list_class = util.select_pyske_list(choice) input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions) @@ -40,7 +41,5 @@ result = example(input_list, k_means_init, clusters) timing.stop() util.print_experiment("", timing.get(), execute, iteration) - # if parallel.PID == 0: - # for i in range((len(result))): - # plt.scatter([point.x for point in result[i]], [point.y for point in result[i]]) - # plt.show() + if show_clusters and dimensions == 2: + util.print_2D_result(result.to_seq()) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index ef79d51..e560a16 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -1,9 +1,13 @@ """ Utility functions for PySke examples """ +from typing import Tuple +import matplotlib.pyplot as plt from sklearn.datasets import make_blobs -from pyske.core import Distribution +from pyske.core import Distribution, SList +from pyske.core.support import parallel +from pyske.core.util.point_2D import Point_2D PAR = 'parallel' SEQ = 'sequential' @@ -100,9 +104,7 @@ def select_point_dimensions(dimensions): :return: a Point """ # pylint: disable=import-outside-toplevel - if dimensions == 2: - from pyske.core.util.point_2D import Point_2D as PointClass - elif dimensions == 3: + if dimensions == 3: from pyske.core.util.point_3D import Point_3D as PointClass else: from pyske.core.util.point_2D import Point_2D as PointClass @@ -127,6 +129,17 @@ def rand_point_list(cls, size, clusters, dimensions): distr = Distribution().balanced(size) return cls.from_seq(x).distribute(distr) +def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]): + """ + Print experiment of 2 dimension points k-means clustering + """ + if parallel.PID == 0: + x = clusters_list.map(lambda pair: pair[0].x) + y = clusters_list.map(lambda pair: pair[0].y) + colors = clusters_list.map(lambda pair: pair[1]) + plt.scatter(x, y, c=colors) + plt.show() + def print_experiment(result, timing, execute, iteration=None): """ From f6f46cd4b3e85fa943a627d868330eef03f5eb90 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Mon, 7 Jun 2021 16:05:27 +0200 Subject: [PATCH 25/34] k-means clustering documentation --- docs/api.rst | 60 ++++++++++++++++++++++++++++- docs/conf.py | 10 +++-- pyske/examples/list/k_means.py | 4 +- pyske/examples/list/k_means_main.py | 11 +----- pyske/examples/list/util.py | 17 +++++++- 5 files changed, 84 insertions(+), 18 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 2a9f345..19a69e2 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,2 +1,60 @@ PySke API -========= \ No newline at end of file +========= + +Pyske API offer applications implemented with list and tree skeletons. +The user can use the sequential or parallel version. +The parallel version allows a faster execution time when its launched on several processors or computers. + +Dot Product +----------- + +Discrete Fast Fourier Transform +------------------------------- + +K-means Clustering +------------------ + +K-means clustering is an unsupervised algorithm that aims to partition group of points in k clusters. + +K-means function +^^^^^^^^^^^^^^^^ + +.. py:module:: pyske.examples.list.k_means + +.. autofunction:: k_means + +Here the implementation of the 2 dimensions point class. + +.. autoclass:: pyske.core.util.point_2D.Point_2D + :members: + :special-members: + :show-inheritance: + :private-members: + :member-order: bysource + +Initialization functions +^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: k_means_init + +Running Example +^^^^^^^^^^^^^^^^^^^^ + +.. argparse:: + :module: pyske.examples.list.util + :func: k_means_parser + :prog: python3 k_means_main.py + + +Maximum Prefix Sum +------------------ + +Maximum Segment Sum +------------------- + +Parallel Regular Sampling Sort +------------------------------ + +Variance Example +---------------- + diff --git a/docs/conf.py b/docs/conf.py index 8e83820..fe8f596 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,9 +10,9 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) +import os +import sys +sys.path.insert(0, os.path.abspath('../.')) # -- Project information ----------------------------------------------------- @@ -31,6 +31,8 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "sphinx.ext.autodoc", + "sphinxarg.ext" ] # Add any paths that contain templates here, relative to this directory. @@ -52,4 +54,4 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py index f238ffa..a1eab32 100644 --- a/pyske/examples/list/k_means.py +++ b/pyske/examples/list/k_means.py @@ -70,7 +70,7 @@ def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interfac def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]: """ - K-means++ initialisation + K-means++ initialization :param input_list: a list of points :param n_cluster: number of clusters @@ -103,9 +103,9 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in K-means algorithm on a list of points :param input_list: a list of points + :param init_function: a function that initialize centroids :param n_cluster: number of clusters :param max_iter: number of iterations - :param init_function: a function that initialize centroids :return: a list of tuples with the point and his cluster index """ diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 41fddc4..3687f0c 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -1,7 +1,6 @@ """ Execution of k_means """ -import argparse from pyske.core import Timing from pyske.examples.list.k_means import k_means, k_means_init @@ -10,16 +9,10 @@ PAR = 'parallel' SEQ = 'sequential' + if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) - parser.add_argument("--iter", help="number of iterations", type=int, default=30) - parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) - parser.add_argument("--clusters", help="number of clusters", type=int, default=3) - parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) - parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", - action="store_true") + parser = util. k_means_parser() args = parser.parse_args() size = args.size diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index e560a16..2e3da87 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -3,6 +3,7 @@ """ from typing import Tuple import matplotlib.pyplot as plt +import argparse from sklearn.datasets import make_blobs from pyske.core import Distribution, SList @@ -26,8 +27,6 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): :param data_arg: (default True) flag to select argument --data :return: (size, iter, ['parallel' | 'sequential']) """ - # pylint: disable=import-outside-toplevel - import argparse parser = argparse.ArgumentParser() if size_arg: parser.add_argument("--size", help="size of the list to generate", @@ -50,6 +49,20 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): return size, num_iter, data_type +def k_means_parser(): + """ + Parse command line for k-means example. + """ + parser = argparse.ArgumentParser() + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) + parser.add_argument("--iter", help="number of iterations", type=int, default=30) + parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) + parser.add_argument("--clusters", help="number of clusters", type=int, default=3) + parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) + parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", + action="store_true") + return parser + def select_pyske_list(choice): """ Return a PySke list class. From 42c750632199f47d51a286655363ba8ce929a3ca Mon Sep 17 00:00:00 2001 From: Evan MULUMBA Date: Tue, 8 Jun 2021 12:18:44 +0200 Subject: [PATCH 26/34] 3d representation for Point_3D clusters --- pyske/examples/list/k_means_main.py | 4 ++++ pyske/examples/list/util.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 3687f0c..93c885a 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -36,3 +36,7 @@ util.print_experiment("", timing.get(), execute, iteration) if show_clusters and dimensions == 2: util.print_2D_result(result.to_seq()) + elif show_clusters and dimensions == 3: + util.print_3D_result(result.to_seq()) + + diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 2e3da87..721f855 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -3,12 +3,14 @@ """ from typing import Tuple import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import axes3d import argparse from sklearn.datasets import make_blobs from pyske.core import Distribution, SList from pyske.core.support import parallel from pyske.core.util.point_2D import Point_2D +from pyske.core.util.point_3D import Point_3D PAR = 'parallel' SEQ = 'sequential' @@ -153,6 +155,26 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]): plt.scatter(x, y, c=colors) plt.show() +def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]): + """ + Print experiment of 3 dimension points k-means clustering + """ + if parallel.PID == 0: + x = clusters_list.map(lambda pair: pair[0].x) + y = clusters_list.map(lambda pair: pair[0].y) + z = clusters_list.map(lambda pair: pair[0].z) + colors = clusters_list.map(lambda pair: pair[1]) + + # Tracé du résultat en 3D + fig = plt.figure() + ax = fig.gca(projection='3d') # Affichage en 3D + ax.scatter(x, y, z, label='Courbe', marker='d') # Tracé des points 3D + plt.title("Points 3D") + ax.set_xlabel('X') + ax.set_ylabel('Y') + ax.set_zlabel('Z') + plt.tight_layout() + plt.show() def print_experiment(result, timing, execute, iteration=None): """ From 810c54b30394e297e2c62ef2a5987b086bbd2d8f Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 12:57:39 +0200 Subject: [PATCH 27/34] error subtraction in distance --- pyske/core/util/point_3D.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py index c6e1aa5..678710d 100644 --- a/pyske/core/util/point_3D.py +++ b/pyske/core/util/point_3D.py @@ -91,5 +91,5 @@ def distance(self, other): """ dx = self.__x - other.x dy = self.__y - other.y - dz = self.__x - other.z + dz = self.__z - other.z return sqrt(dx ** 2 + dy ** 2 + dz ** 2) From 5cba1e966b00bf01344537f1fe4c9e5fc82babd6 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 14:31:27 +0200 Subject: [PATCH 28/34] adding colors 3D graph result, fix warning matplotlib --- pyske/examples/list/k_means_main.py | 12 ++++++------ pyske/examples/list/util.py | 13 +++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py index 93c885a..c8b7782 100644 --- a/pyske/examples/list/k_means_main.py +++ b/pyske/examples/list/k_means_main.py @@ -12,7 +12,7 @@ if __name__ == '__main__': - parser = util. k_means_parser() + parser = util.k_means_parser() args = parser.parse_args() size = args.size @@ -34,9 +34,9 @@ result = example(input_list, k_means_init, clusters) timing.stop() util.print_experiment("", timing.get(), execute, iteration) - if show_clusters and dimensions == 2: - util.print_2D_result(result.to_seq()) - elif show_clusters and dimensions == 3: - util.print_3D_result(result.to_seq()) - + if show_clusters: + if dimensions == 2: + util.print_2D_result(result.to_seq()) + if dimensions == 3: + util.print_3D_result(result.to_seq()) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 721f855..1ddca0f 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -1,10 +1,11 @@ """ Utility functions for PySke examples """ + from typing import Tuple -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import axes3d + import argparse +import matplotlib.pyplot as plt from sklearn.datasets import make_blobs from pyske.core import Distribution, SList @@ -157,8 +158,8 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]): def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]): """ - Print experiment of 3 dimension points k-means clustering - """ + Print experiment of 3 dimension points k-means clustering + """ if parallel.PID == 0: x = clusters_list.map(lambda pair: pair[0].x) y = clusters_list.map(lambda pair: pair[0].y) @@ -167,8 +168,8 @@ def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]): # Tracé du résultat en 3D fig = plt.figure() - ax = fig.gca(projection='3d') # Affichage en 3D - ax.scatter(x, y, z, label='Courbe', marker='d') # Tracé des points 3D + ax = fig.add_subplot(projection='3d') # Affichage en 3D + ax.scatter(x, y, z, label='Courbe', marker='d', c=colors) # Tracé des points 3D plt.title("Points 3D") ax.set_xlabel('X') ax.set_ylabel('Y') From eb16d4c72ed89bec88826fa8125774fb60807dfb Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 15:05:23 +0200 Subject: [PATCH 29/34] adding Point Interface section --- docs/api.rst | 24 +++++++++++++++--------- pyske/core/util/point_2D.py | 3 +++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 19a69e2..864486e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -3,7 +3,7 @@ PySke API Pyske API offer applications implemented with list and tree skeletons. The user can use the sequential or parallel version. -The parallel version allows a faster execution time when its launched on several processors or computers. +The parallel version allows a faster execution time when its launched on several processors, cores or computers. Dot Product ----------- @@ -23,20 +23,26 @@ K-means function .. autofunction:: k_means -Here the implementation of the 2 dimensions point class. +Initialization functions +^^^^^^^^^^^^^^^^^^^^^^^^ + +This is the standard method that initializes the centroids. This method chooses the centroids in order that each point is as far as possible from the other. + +.. autofunction:: k_means_init + + +Point Interface +^^^^^^^^^^^^^^^ + +K-means algorithm takes a list of points in parameters. For now two versions implement this class, one for 2 dimension points and another for 3 dimension points. + +Point 2D class implementation: .. autoclass:: pyske.core.util.point_2D.Point_2D :members: :special-members: - :show-inheritance: - :private-members: :member-order: bysource -Initialization functions -^^^^^^^^^^^^^^^^^^^^^^^^ - -.. autofunction:: k_means_init - Running Example ^^^^^^^^^^^^^^^^^^^^ diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py index f6f5f7f..d0bfeca 100644 --- a/pyske/core/util/point_2D.py +++ b/pyske/core/util/point_2D.py @@ -17,6 +17,9 @@ def __repr__(self): return "(%s, %s)" % (self.__x, self.__y) def __eq__(self, other): + """ + Equality between two points + """ if isinstance(other, Point_2D): return self.__x == other.__x and self.__y == other.__y return False From 529498ef384ae91989a9d88ff4817518f58e4be9 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Tue, 8 Jun 2021 15:15:33 +0200 Subject: [PATCH 30/34] change show-clusters display message --- pyske/examples/list/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 1ddca0f..4964c93 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -57,12 +57,12 @@ def k_means_parser(): Parse command line for k-means example. """ parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000) + parser.add_argument("--size", help="size of the list to generate", type=int, default=5_000) parser.add_argument("--iter", help="number of iterations", type=int, default=30) parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ) parser.add_argument("--clusters", help="number of clusters", type=int, default=3) parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) - parser.add_argument("--show-clusters", help="display the clusters graph of 2D points", + parser.add_argument("--show-clusters", help="display the clusters graph of 2D or 3D points", action="store_true") return parser From 04d1037c4672406742a2bb37e66cff9c0d237093 Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 9 Jun 2021 15:25:03 +0200 Subject: [PATCH 31/34] dot_product documentation --- docs/api.rst | 26 +++++++++++++++++++++++-- pyske/examples/list/dot_product_main.py | 10 ++-------- pyske/examples/list/util.py | 17 ++++++++++++++++ 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 864486e..a347320 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,13 +1,35 @@ PySke API ========= -Pyske API offer applications implemented with list and tree skeletons. +PySke API offer applications implemented with list and tree skeletons. The user can use the sequential or parallel version. The parallel version allows a faster execution time when its launched on several processors, cores or computers. Dot Product ----------- +.. py:module:: pyske.examples.list.dot_product + + +Dot Product function +^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: opt_dot_product + +Dot Product Variant +^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: dot_product + +Running Example +^^^^^^^^^^^^^^^ + +.. argparse:: + :module: pyske.examples.list.util + :func: dot_product_parser + :prog: python3 dot_product_main.py + + Discrete Fast Fourier Transform ------------------------------- @@ -44,7 +66,7 @@ Point 2D class implementation: :member-order: bysource Running Example -^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^ .. argparse:: :module: pyske.examples.list.util diff --git a/pyske/examples/list/dot_product_main.py b/pyske/examples/list/dot_product_main.py index e357322..782c146 100644 --- a/pyske/examples/list/dot_product_main.py +++ b/pyske/examples/list/dot_product_main.py @@ -2,14 +2,13 @@ Execution of dot_product.py """ -import argparse import gc import random from pyske.examples.list.dot_product import opt_dot_product, dot_product from pyske.core import par, Timing, PList as DPList from pyske.core.opt import fun as opt from pyske.core.opt.list import PList -from pyske.examples.list.util import rand_list, print_experiment +from pyske.examples.list.util import rand_list, print_experiment, dot_product_parser # -------------- Execution ----------------- @@ -26,12 +25,7 @@ def __compute(): return opt_dot_product(PList.raw(pl1), PList.raw(pl2), uncurry=opt.uncurry).run() # Command-line arguments parsing - parser = argparse.ArgumentParser() - parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000) - parser.add_argument("--iter", help="number of iterations", type=int, default=30) - parser.add_argument("--test", help="choice of the test", - choices=[_DIRECT, _HAND, _EVAL, _OPT], - default=_DIRECT) + parser = dot_product_parser() args = parser.parse_args() size = args.size test = args.test diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 4964c93..075321d 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -15,6 +15,10 @@ PAR = 'parallel' SEQ = 'sequential' +_DIRECT = '_DIRECT' +_HAND = 'hand_optimized' +_OPT = 'optimized' +_EVAL = 'evaluated' def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): @@ -66,6 +70,19 @@ def k_means_parser(): action="store_true") return parser +def dot_product_parser(): + """ + Parse command line for dot-product example. + """ + + parser = argparse.ArgumentParser() + parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000) + parser.add_argument("--iter", help="number of iterations", type=int, default=30) + parser.add_argument("--test", help="choice of the test", + choices=[_DIRECT, _HAND, _EVAL, _OPT], + default=_DIRECT) + return parser + def select_pyske_list(choice): """ Return a PySke list class. From 6c8eed974153b26da1c71831a16e9dcd9597fa0e Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Wed, 9 Jun 2021 18:03:03 +0200 Subject: [PATCH 32/34] adding for each example how to run it --- docs/api.rst | 52 ++++++++++++++++++++++++++++++------- docs/conf.py | 4 +-- docs/index.rst | 2 +- pyske/examples/list/util.py | 32 ++++++++++++++--------- 4 files changed, 66 insertions(+), 24 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index a347320..10d40ef 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,10 +1,13 @@ PySke API -========= +######### PySke API offer applications implemented with list and tree skeletons. The user can use the sequential or parallel version. The parallel version allows a faster execution time when its launched on several processors, cores or computers. +List Examples +============= + Dot Product ----------- @@ -24,15 +27,20 @@ Dot Product Variant Running Example ^^^^^^^^^^^^^^^ -.. argparse:: - :module: pyske.examples.list.util - :func: dot_product_parser - :prog: python3 dot_product_main.py +.. autoprogram:: pyske.examples.list.util:dot_product_parser() + :prog: dot_product_main.py Discrete Fast Fourier Transform ------------------------------- +Running Example +^^^^^^^^^^^^^^^ + +.. autoprogram:: pyske.examples.list.util:standard_parser(data_arg=False) + :prog: fft_main.py + + K-means Clustering ------------------ @@ -68,21 +76,47 @@ Point 2D class implementation: Running Example ^^^^^^^^^^^^^^^ -.. argparse:: - :module: pyske.examples.list.util - :func: k_means_parser - :prog: python3 k_means_main.py +.. autoprogram:: pyske.examples.list.util:k_means_parser() + :prog: k_means_main.py Maximum Prefix Sum ------------------ +Running Example +^^^^^^^^^^^^^^^ + +.. autoprogram:: pyske.examples.list.util:standard_parser() + :prog: maximum_prefix_sum_main.py + Maximum Segment Sum ------------------- +Running Example +^^^^^^^^^^^^^^^ + +.. autoprogram:: pyske.examples.list.util:standard_parser() + :prog: maximum_segment_sum_main.py + Parallel Regular Sampling Sort ------------------------------ +Running Example +^^^^^^^^^^^^^^^ + +.. autoprogram:: pyske.examples.list.util:standard_parser() + :prog: regular_sampling_sort_main.py + Variance Example ---------------- +Running Example +^^^^^^^^^^^^^^^ + +.. autoprogram:: pyske.examples.list.util:standard_parser() + :prog: variance_main.py + + +Tree Examples +============= + diff --git a/docs/conf.py b/docs/conf.py index fe8f596..b6fa59b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,7 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../.')) +sys.path.insert(0, os.path.abspath('../')) # -- Project information ----------------------------------------------------- @@ -32,7 +32,7 @@ # ones. extensions = [ "sphinx.ext.autodoc", - "sphinxarg.ext" + "sphinxcontrib.autoprogram" ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/index.rst b/docs/index.rst index 8115248..552c990 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,7 @@ Welcome to PySke's documentation! ================================= .. toctree:: - :maxdepth: 2 + :maxdepth: 3 :caption: Contents: intro diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py index 075321d..0ae417f 100644 --- a/pyske/examples/list/util.py +++ b/pyske/examples/list/util.py @@ -34,16 +34,7 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): :param data_arg: (default True) flag to select argument --data :return: (size, iter, ['parallel' | 'sequential']) """ - parser = argparse.ArgumentParser() - if size_arg: - parser.add_argument("--size", help="size of the list to generate", - type=int, default=1_000_000) - if iter_arg: - parser.add_argument("--iter", help="number of iterations", - type=int, default=30) - if data_arg: - parser.add_argument("--data", help="type of data structure", - choices=[PAR, SEQ], default=SEQ) + parser = standard_parser(size_arg, iter_arg, data_arg) size = num_iter = 0 data_type = PAR args = parser.parse_args() @@ -56,9 +47,25 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True): return size, num_iter, data_type +def standard_parser(size_arg=True, iter_arg=True, data_arg=True): + """ + Parser for standard example. + """ + parser = argparse.ArgumentParser() + if size_arg: + parser.add_argument("--size", help="size of the list to generate", + type=int, default=1_000_000) + if iter_arg: + parser.add_argument("--iter", help="number of iterations", + type=int, default=30) + if data_arg: + parser.add_argument("--data", help="type of data structure", + choices=[PAR, SEQ], default=SEQ) + return parser + def k_means_parser(): """ - Parse command line for k-means example. + Parser for k-means example. """ parser = argparse.ArgumentParser() parser.add_argument("--size", help="size of the list to generate", type=int, default=5_000) @@ -68,11 +75,12 @@ def k_means_parser(): parser.add_argument("--dimensions", help="point dimensions", type=int, default=2) parser.add_argument("--show-clusters", help="display the clusters graph of 2D or 3D points", action="store_true") + return parser def dot_product_parser(): """ - Parse command line for dot-product example. + Parse for dot-product example. """ parser = argparse.ArgumentParser() From c73d1f3f28214c5c35a1ffd06b2002299953c2ba Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 10 Jun 2021 15:38:06 +0200 Subject: [PATCH 33/34] fft documentation, how to run in parallel --- docs/api.rst | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 10d40ef..80a2276 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,6 +5,14 @@ PySke API offer applications implemented with list and tree skeletons. The user can use the sequential or parallel version. The parallel version allows a faster execution time when its launched on several processors, cores or computers. +Run examples with parallel computing: + + .. code-block:: console + + mpirun -np NB_CORES python3 PROGRAM_NAME [OPTIONS] + +Examples without :code:`--data` option are only runnable in parallel. + List Examples ============= @@ -14,14 +22,11 @@ Dot Product .. py:module:: pyske.examples.list.dot_product -Dot Product function -^^^^^^^^^^^^^^^^^^^^ +Dot Product functions +^^^^^^^^^^^^^^^^^^^^^ .. autofunction:: opt_dot_product -Dot Product Variant -^^^^^^^^^^^^^^^^^^^ - .. autofunction:: dot_product Running Example @@ -33,6 +38,12 @@ Running Example Discrete Fast Fourier Transform ------------------------------- +.. py:module:: pyske.examples.list.fft + +Fast Fourier Transform function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: fft Running Example ^^^^^^^^^^^^^^^ From fad0ab3b77b53990012118916d491c7c0d0102bc Mon Sep 17 00:00:00 2001 From: Besnard Clement Date: Thu, 10 Jun 2021 16:00:34 +0200 Subject: [PATCH 34/34] last examples documentation functions --- docs/api.rst | 28 ++++++++++++++++++++ pyske/examples/list/regular_sampling_sort.py | 3 +-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 80a2276..1b6eaa9 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -94,6 +94,13 @@ Running Example Maximum Prefix Sum ------------------ +.. py:module:: pyske.examples.list.maximum_prefix_sum + +Maximum Prefix Sum function +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: mps + Running Example ^^^^^^^^^^^^^^^ @@ -112,6 +119,20 @@ Running Example Parallel Regular Sampling Sort ------------------------------ +.. py:module:: pyske.examples.list.regular_sampling_sort + + +Broadcast function +^^^^^^^^^^^^^^^^^^ + +.. autofunction:: bcast + +Sort function +^^^^^^^^^^^^^ + +.. autofunction:: pssr + + Running Example ^^^^^^^^^^^^^^^ @@ -121,6 +142,13 @@ Running Example Variance Example ---------------- +.. py:module:: pyske.examples.list.variance + +Variance function +^^^^^^^^^^^^^^^^^ + +.. autofunction:: variance + Running Example ^^^^^^^^^^^^^^^ diff --git a/pyske/examples/list/regular_sampling_sort.py b/pyske/examples/list/regular_sampling_sort.py index 69feb8b..f816de0 100644 --- a/pyske/examples/list/regular_sampling_sort.py +++ b/pyske/examples/list/regular_sampling_sort.py @@ -18,8 +18,7 @@ def bcast(input_list: PList, src_pid: int) -> PList: Example:: >>> from pyske.core import PList, par - >>> bcast(PList.from_seq([42]), 0).to_seq() == \ - list(map(lambda _: 42, par.procs())) + >>> bcast(PList.from_seq([42]), 0).to_seq() == list(map(lambda _: 42, par.procs())) True :param input_list: a parallel list.