From ce230dba7ec01dc4bed42848035c4c8559909f70 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 5 May 2021 17:05:56 +0200
Subject: [PATCH 01/34] Class Point / K-means algorithm

---
 pyske/core/util/point.py            |  51 ++++++++++++
 pyske/examples/list/k_means.py      | 121 ++++++++++++++++++++++++++++
 pyske/examples/list/k_means_main.py |  25 ++++++
 pyske/examples/list/util.py         |  14 ++++
 4 files changed, 211 insertions(+)
 create mode 100644 pyske/core/util/point.py
 create mode 100644 pyske/examples/list/k_means.py
 create mode 100644 pyske/examples/list/k_means_main.py

diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py
new file mode 100644
index 0000000..0a5d2fc
--- /dev/null
+++ b/pyske/core/util/point.py
@@ -0,0 +1,51 @@
+"""
+A module to represent a point
+"""
+
+from math import sqrt
+
+
+class Point(object):
+    """A class to represent a point"""
+
+    def __init__(self, x, y):
+        self.__x = x
+        self.__y = y
+
+    def __repr__(self):
+        return "(%s, %s)" % (self.__x, self.__y)
+
+    def __eq__(self, other):
+        if isinstance(other, Point):
+            return self.__x == other.x and self.__y == other.__y
+        return False
+
+    @property
+    def x(self):
+        """X getter"""
+        return self.__x
+
+    @property
+    def y(self):
+        """Y getter"""
+        return self.__y
+
+    def distance(self, other: 'Point'):
+        """
+        Returns the distance from another point.
+
+        Examples::
+
+            >>> from pyske.core.util.point import Point
+            >>> p1 = Point(5,5)
+            >>> p2 = Point(5,7)
+            >>> p1.distance(p2)
+            2.0
+
+        :param other: a point
+        :return: distance from other point
+
+        """
+        dx = self.__x - other.x
+        dy = self.__y - other.y
+        return sqrt(dx ** 2 + dy ** 2)
diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
new file mode 100644
index 0000000..69dbdbc
--- /dev/null
+++ b/pyske/examples/list/k_means.py
@@ -0,0 +1,121 @@
+"""
+K-Means
+"""
+
+from pyske.core.interface import List
+from pyske.core.list import SList
+import random
+import matplotlib.pyplot as plt
+from pyske.core.util.point import Point
+
+
+def cluster_index(p, centroids):
+    """
+    Get the centroid index of the closest centroid
+    """
+    min_dist = float("inf")
+    p_centroid = centroids[0]
+    for c in centroids:
+        if p.distance(c) < min_dist:
+            min_dist = p.distance(c)
+            p_centroid = c
+    return centroids.index(p_centroid)
+
+
+def make_clusters(input_list, centroids):
+    """
+    Append all points to the cluster with the minimal distance from its centroid
+    """
+    clusters = [[] for c in centroids]
+    for p in input_list.to_seq():
+        index = cluster_index(p, centroids)
+        clusters[index].append(p)
+    return clusters
+
+
+def coords_average(cluster):
+    """
+    Get the coordinates average of all points in one cluster
+    """
+    x_average = sum([p.x for p in cluster]) / len(cluster)
+    y_average = sum([p.y for p in cluster]) / len(cluster)
+    return Point(x_average, y_average)
+
+
+def get_new_centroid(cluster):
+    """
+    Get closest point to average of point coordinates
+    """
+    average_point = coords_average(cluster)
+    min_dist = float("inf")
+    new_centroid = cluster[0]
+    for p in cluster:
+        if p.distance(average_point) < min_dist:
+            min_dist = p.distance(average_point)
+            new_centroid = p
+    return new_centroid
+
+
+def define_centroids(clusters):
+    """
+    Redefine centroids of clusters
+    """
+    centroids = []
+    for cluster in clusters:
+        centroids.append(get_new_centroid(cluster))
+    return centroids
+
+
+def k_means_init(input_list: List, n_cluster: int):
+    """
+    K-means++ initialisation
+
+    :param input_list: a list of point
+    :param n_cluster: number of cluster
+
+    :return: n_cluster centroids
+    """
+    centroids = SList([])
+    c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
+    centroids.append(c1)
+
+    for c in range(n_cluster - 1):
+        dist = input_list.map(lambda x: x.distance(centroids[0]))
+        for i in range(1, len(centroids)):
+            temp_dist = input_list.map(lambda x: x.distance(centroids[i]))
+            dist = dist.map2(lambda x, y: min(x, y), temp_dist)
+
+        index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())]
+        next_centroid = input_list.to_seq()[index_max[0]]
+        centroids.append(next_centroid)
+
+    return centroids
+
+
+def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
+    """
+    K-means algorithm on a list of point
+
+    :param input_list: a list of point
+    :param n_cluster: number of cluster
+    :param max_iter: number of iteration
+
+    :return: a list of class
+    """
+    centroids = k_means_init(input_list, n_cluster)
+    j = 0
+    while j < max_iter:
+        clusters = make_clusters(input_list, centroids)
+        plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()],
+                    c='yellow')
+        clusters_color = ['green', 'blue', 'black', 'purple', 'brown']
+        for i in range(len(clusters)):
+            plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]],
+                        c=clusters_color[i])
+
+        centroids = define_centroids(clusters)
+        plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
+        plt.show()
+        j = j + 1
+
+    return clusters
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
new file mode 100644
index 0000000..f2b8c0c
--- /dev/null
+++ b/pyske/examples/list/k_means_main.py
@@ -0,0 +1,25 @@
+"""
+Execution of k_means
+"""
+import gc
+
+from pyske.core import Timing
+from pyske.examples.list.k_means import k_means
+from pyske.examples.list import util
+
+
+if __name__ == '__main__':
+    size, num_iter, choice = util.standard_parse_command_line()
+    pyske_list_class = util.select_pyske_list(choice)
+    input_list = util.rand_point_list(pyske_list_class, size)
+    timing = Timing()
+    execute = util.select_execute(choice)
+    example = k_means
+    execute(lambda: print('Version:\t', choice))
+    gc.disable()
+    for iteration in range(1, 1 + num_iter):
+        timing.start()
+        result = example(input_list, 5)
+        timing.stop()
+        gc.collect()
+        util.print_experiment(result, timing.get(), execute, iteration)
\ No newline at end of file
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 57bed0d..26dbb69 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -89,6 +89,20 @@ def rand_list(cls, size):
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
 
+def rand_point_list(cls, size):
+    """
+    Return a randomly generated list of points.
+
+    :param cls: the class of the generated list.
+    :param size: a positive number
+        Precondition: size >= 0
+    :return: a list of the given class
+    """
+    from pyske.core.util.point import Point
+    import random
+    return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size)
+
+
 def print_experiment(result, timing, execute, iteration=None):
     """
     Print the result and timing of the experiment.

From 0b33eacce3ceaa0dc1ed32ee8bad7b99b30d75e8 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 6 May 2021 16:33:41 +0200
Subject: [PATCH 02/34] number of clusters in parameters / test on datasets

---
 pyske/examples/list/k_means.py      | 11 ++---------
 pyske/examples/list/k_means_main.py | 27 +++++++++++++++++++++++----
 pyske/examples/list/util.py         | 11 ++++++++---
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 69dbdbc..d500fbb 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -100,22 +100,15 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
     :param n_cluster: number of cluster
     :param max_iter: number of iteration
 
-    :return: a list of class
+    :return: 2 dimension list of points
     """
     centroids = k_means_init(input_list, n_cluster)
     j = 0
     while j < max_iter:
         clusters = make_clusters(input_list, centroids)
-        plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()],
-                    c='yellow')
-        clusters_color = ['green', 'blue', 'black', 'purple', 'brown']
-        for i in range(len(clusters)):
-            plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]],
-                        c=clusters_color[i])
 
         centroids = define_centroids(clusters)
-        plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
-        plt.show()
+        # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
         j = j + 1
 
     return clusters
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index f2b8c0c..0367361 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -6,12 +6,28 @@
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means
 from pyske.examples.list import util
+import matplotlib.pyplot as plt
+import argparse
 
+PAR = 'parallel'
+SEQ = 'sequential'
 
 if __name__ == '__main__':
-    size, num_iter, choice = util.standard_parse_command_line()
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000)
+    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
+    parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
+    parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
+
+    args = parser.parse_args()
+    size = args.size
+    num_iter = args.iter
+    choice = args.data
+    clusters = args.clusters
+
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_list(pyske_list_class, size)
+    input_list = util.rand_point_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
@@ -19,7 +35,10 @@
     gc.disable()
     for iteration in range(1, 1 + num_iter):
         timing.start()
-        result = example(input_list, 5)
+        result = example(input_list, clusters)
         timing.stop()
         gc.collect()
-        util.print_experiment(result, timing.get(), execute, iteration)
\ No newline at end of file
+        util.print_experiment("", timing.get(), execute, iteration)
+        for i in range(len(result)):
+            plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
+        plt.show()
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 26dbb69..965067f 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -2,6 +2,8 @@
 Utility functions for PySke examples
 """
 
+from sklearn.datasets import make_blobs
+
 PAR = 'parallel'
 SEQ = 'sequential'
 
@@ -89,18 +91,21 @@ def rand_list(cls, size):
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
 
-def rand_point_list(cls, size):
+def rand_point_list(cls, size, clusters):
     """
     Return a randomly generated list of points.
 
     :param cls: the class of the generated list.
     :param size: a positive number
         Precondition: size >= 0
+    :param clusters: number of clusters
     :return: a list of the given class
     """
     from pyske.core.util.point import Point
-    import random
-    return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size)
+    x, y_true = make_blobs(n_samples=size, centers=clusters)
+    x = x.tolist()
+    x = list(map(lambda y: Point(y[0], y[1]), x))
+    return cls.from_seq(x)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 8b89af61f688b9b756ccfc0c29747c933ce106e3 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 11 May 2021 09:55:38 +0200
Subject: [PATCH 03/34] radon cc in k_means_init / pylinting

---
 pyske/examples/list/k_means.py      | 29 +++++++++++++++++++----------
 pyske/examples/list/k_means_main.py |  6 +++---
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index d500fbb..4b9ed26 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -1,11 +1,9 @@
 """
 K-Means
 """
-
+import random
 from pyske.core.interface import List
 from pyske.core.list import SList
-import random
-import matplotlib.pyplot as plt
 from pyske.core.util.point import Point
 
 
@@ -65,6 +63,17 @@ def define_centroids(clusters):
         centroids.append(get_new_centroid(cluster))
     return centroids
 
+def index_max_value(input_list: List):
+    """
+    Return the index of the maximum value
+    """
+    index_max = 0
+    max_dist = 0
+    for i in range(len(input_list.to_seq())):
+        if input_list.to_seq()[i] > max_dist:
+            max_dist = input_list.to_seq()[i]
+            index_max = i
+    return index_max
 
 def k_means_init(input_list: List, n_cluster: int):
     """
@@ -79,14 +88,14 @@ def k_means_init(input_list: List, n_cluster: int):
     c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
     centroids.append(c1)
 
-    for c in range(n_cluster - 1):
+    for _ in range(n_cluster - 1):
         dist = input_list.map(lambda x: x.distance(centroids[0]))
         for i in range(1, len(centroids)):
-            temp_dist = input_list.map(lambda x: x.distance(centroids[i]))
-            dist = dist.map2(lambda x, y: min(x, y), temp_dist)
+            temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index]))
+            dist = dist.map2(lambda x, y: y if y < x else x, temp_dist)
 
-        index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())]
-        next_centroid = input_list.to_seq()[index_max[0]]
+        index_max = index_max_value(dist)
+        next_centroid = input_list.to_seq()[index_max]
         centroids.append(next_centroid)
 
     return centroids
@@ -100,13 +109,13 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
     :param n_cluster: number of cluster
     :param max_iter: number of iteration
 
-    :return: 2 dimension list of points
+    :return: 2 dimensions list of points
     """
+
     centroids = k_means_init(input_list, n_cluster)
     j = 0
     while j < max_iter:
         clusters = make_clusters(input_list, centroids)
-
         centroids = define_centroids(clusters)
         # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
         j = j + 1
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 0367361..f08a918 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -2,12 +2,12 @@
 Execution of k_means
 """
 import gc
+import argparse
+import matplotlib.pyplot as plt
 
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means
 from pyske.examples.list import util
-import matplotlib.pyplot as plt
-import argparse
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -15,7 +15,7 @@
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000)
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
     parser.add_argument("--iter", help="number of iterations", type=int, default=30)
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)

From 4c7d8727d5ea23062681d352e0dc49987c64bf5a Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 25 May 2021 15:47:05 +0200
Subject: [PATCH 04/34] Default constructor, addition between two points,
 multiplication by scalar and by a point

---
 pyske/core/util/point.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py
index 0a5d2fc..cb55684 100644
--- a/pyske/core/util/point.py
+++ b/pyske/core/util/point.py
@@ -8,7 +8,7 @@
 class Point(object):
     """A class to represent a point"""
 
-    def __init__(self, x, y):
+    def __init__(self, x=0, y=0):
         self.__x = x
         self.__y = y
 
@@ -20,6 +20,38 @@ def __eq__(self, other):
             return self.__x == other.x and self.__y == other.__y
         return False
 
+    def __add__(self, other):
+        """
+        Addition of two points
+
+        Examples::
+
+            >>> p1 = Point(5,5)
+            >>> p2 = Point(5,7)
+            >>> p1 + p2
+            (10, 12)
+        """
+        if isinstance(other, Point):
+            return Point(self.x + other.x, self.y + other.y)
+
+    def __mul__(self, other):
+        """
+        Multiplication by a point or a scalar
+
+        Examples::
+
+            >>> p1 = Point(5,5)
+            >>> p2 = Point(5,7)
+            >>> p1 * 5
+            (25, 25)
+            >>> p1 * p2
+            (25, 35)
+        """
+        if isinstance(other, Point):
+            return Point(self.x * other.x, self.y * other.y)
+        if isinstance(other, int) or isinstance(other, float):
+            return Point(self.x * other, self.y * other)
+
     @property
     def x(self):
         """X getter"""

From ee146229998184612db64bc0cb01ae1160fe0b80 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 25 May 2021 16:13:40 +0200
Subject: [PATCH 05/34] parrallel optimization in k_means_init

---
 pyske/examples/list/k_means.py | 41 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 4b9ed26..d7ab21f 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -2,20 +2,21 @@
 K-Means
 """
 import random
+from typing import Callable, Tuple
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point import Point
 
 
-def cluster_index(p, centroids):
+def cluster_index(point, centroids):
     """
     Get the centroid index of the closest centroid
     """
     min_dist = float("inf")
     p_centroid = centroids[0]
     for c in centroids:
-        if p.distance(c) < min_dist:
-            min_dist = p.distance(c)
+        if point.distance(c) < min_dist:
+            min_dist = point.distance(c)
             p_centroid = c
     return centroids.index(p_centroid)
 
@@ -54,7 +55,7 @@ def get_new_centroid(cluster):
     return new_centroid
 
 
-def define_centroids(clusters):
+def define_centroids(clusters):  # Pas utile car tuple ( num_cluster, point )
     """
     Redefine centroids of clusters
     """
@@ -63,17 +64,16 @@ def define_centroids(clusters):
         centroids.append(get_new_centroid(cluster))
     return centroids
 
-def index_max_value(input_list: List):
+
+def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
     """
-    Return the index of the maximum value
+    Return the tuple with the maximum distance
     """
-    index_max = 0
-    max_dist = 0
-    for i in range(len(input_list.to_seq())):
-        if input_list.to_seq()[i] > max_dist:
-            max_dist = input_list.to_seq()[i]
-            index_max = i
-    return index_max
+    if pair_a[1] > pair_b[1]:
+        return pair_a
+    else:
+        return pair_b
+
 
 def k_means_init(input_list: List, n_cluster: int):
     """
@@ -94,30 +94,31 @@ def k_means_init(input_list: List, n_cluster: int):
             temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index]))
             dist = dist.map2(lambda x, y: y if y < x else x, temp_dist)
 
-        index_max = index_max_value(dist)
-        next_centroid = input_list.to_seq()[index_max]
+        zip_list = input_list.zip(dist)
+        next_centroid = zip_list.reduce(max_dist)[0]
         centroids.append(next_centroid)
 
     return centroids
 
 
-def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
+def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10):
     """
     K-means algorithm on a list of point
 
     :param input_list: a list of point
     :param n_cluster: number of cluster
     :param max_iter: number of iteration
+    :param init_function: a function that initialize centroids
 
     :return: 2 dimensions list of points
     """
 
-    centroids = k_means_init(input_list, n_cluster)
+    centroids = init_function(input_list, n_cluster)
     j = 0
     while j < max_iter:
-        clusters = make_clusters(input_list, centroids)
-        centroids = define_centroids(clusters)
-        # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
+        clusters = make_clusters(input_list, centroids)  # assign_cluster
+        centroids = define_centroids(clusters)  # update_centroids
         j = j + 1
 
     return clusters

From 057457c98096184e89eac1af93d6ec433eba9428 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 26 May 2021 17:04:36 +0200
Subject: [PATCH 06/34] fix: init instead of from_seq

---
 pyske/examples/list/util.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 965067f..d6517fa 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -102,10 +102,11 @@ def rand_point_list(cls, size, clusters):
     :return: a list of the given class
     """
     from pyske.core.util.point import Point
+    print(clusters)
     x, y_true = make_blobs(n_samples=size, centers=clusters)
     x = x.tolist()
     x = list(map(lambda y: Point(y[0], y[1]), x))
-    return cls.from_seq(x)
+    return cls.init(lambda i: x[i], size)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 87000f9a8ec99b89a26c528e8ba3184838591f38 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 26 May 2021 17:05:11 +0200
Subject: [PATCH 07/34] Division of a point

---
 pyske/core/util/point.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py
index cb55684..b4c43f0 100644
--- a/pyske/core/util/point.py
+++ b/pyske/core/util/point.py
@@ -52,6 +52,10 @@ def __mul__(self, other):
         if isinstance(other, int) or isinstance(other, float):
             return Point(self.x * other, self.y * other)
 
+    def __truediv__(self, other):
+        if isinstance(other, int):
+            return Point(self.x / other, self.y / other)
+
     @property
     def x(self):
         """X getter"""

From 0d9b023430548a3f91a804dd060c465f686e68df Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 26 May 2021 17:06:15 +0200
Subject: [PATCH 08/34] parallel optimization, assign_cluster and
 update_cluster

---
 pyske/examples/list/k_means.py      | 62 +++++++++++------------------
 pyske/examples/list/k_means_main.py | 15 ++++---
 2 files changed, 30 insertions(+), 47 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index d7ab21f..89cf643 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -18,51 +18,32 @@ def cluster_index(point, centroids):
         if point.distance(c) < min_dist:
             min_dist = point.distance(c)
             p_centroid = c
-    return centroids.index(p_centroid)
+    return point, centroids.index(p_centroid)
 
 
-def make_clusters(input_list, centroids):
+def assign_clusters(input_list, centroids):
     """
-    Append all points to the cluster with the minimal distance from its centroid
+    Assign to each point to a cluster
     """
-    clusters = [[] for c in centroids]
-    for p in input_list.to_seq():
-        index = cluster_index(p, centroids)
-        clusters[index].append(p)
-    return clusters
-
 
-def coords_average(cluster):
-    """
-    Get the coordinates average of all points in one cluster
-    """
-    x_average = sum([p.x for p in cluster]) / len(cluster)
-    y_average = sum([p.y for p in cluster]) / len(cluster)
-    return Point(x_average, y_average)
+    return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def get_new_centroid(cluster):
+def update_centroids(clusters, centroids):
     """
-    Get closest point to average of point coordinates
+    Update centroids of clusters
     """
-    average_point = coords_average(cluster)
-    min_dist = float("inf")
-    new_centroid = cluster[0]
-    for p in cluster:
-        if p.distance(average_point) < min_dist:
-            min_dist = p.distance(average_point)
-            new_centroid = p
-    return new_centroid
-
-
-def define_centroids(clusters):  # Pas utile car tuple ( num_cluster, point )
-    """
-    Redefine centroids of clusters
-    """
-    centroids = []
-    for cluster in clusters:
-        centroids.append(get_new_centroid(cluster))
-    return centroids
+    new_centroids = SList([])
+    i = 0
+    while i < len(centroids):
+        cluster = clusters.filter(lambda x: x[1] == i)
+        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y)
+        average_point = sum_cluster / cluster.length()
+        centroid = clusters.reduce(
+            lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0]
+        new_centroids.append(centroid)
+        i += 1
+    return new_centroids
 
 
 def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
@@ -113,12 +94,15 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus
 
     :return: 2 dimensions list of points
     """
-
     centroids = init_function(input_list, n_cluster)
+
     j = 0
+
     while j < max_iter:
-        clusters = make_clusters(input_list, centroids)  # assign_cluster
-        centroids = define_centroids(clusters)  # update_centroids
+        clusters = assign_clusters(input_list, centroids)
+
+        centroids = update_centroids(clusters, centroids)
+
         j = j + 1
 
     return clusters
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index f08a918..0a3b171 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -1,13 +1,13 @@
 """
 Execution of k_means
 """
-import gc
 import argparse
 import matplotlib.pyplot as plt
 
 from pyske.core import Timing
-from pyske.examples.list.k_means import k_means
+from pyske.examples.list.k_means import k_means, k_means_init
 from pyske.examples.list import util
+from pyske.core.support import parallel
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -32,13 +32,12 @@
     execute = util.select_execute(choice)
     example = k_means
     execute(lambda: print('Version:\t', choice))
-    gc.disable()
     for iteration in range(1, 1 + num_iter):
         timing.start()
-        result = example(input_list, clusters)
+        result = example(input_list, k_means_init, clusters)
         timing.stop()
-        gc.collect()
         util.print_experiment("", timing.get(), execute, iteration)
-        for i in range(len(result)):
-            plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
-        plt.show()
+        #if parallel.PID == 0:
+        #    for i in range((len(result))):
+        #        plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
+        #    plt.show()

From da4a4d6a5a62b4de6e5434b72c9862e89be67d4d Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 27 May 2021 14:34:55 +0200
Subject: [PATCH 09/34] pylinting, typing

---
 pyske/examples/list/k_means.py | 39 ++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 89cf643..f5da8a0 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -3,33 +3,33 @@
 """
 import random
 from typing import Callable, Tuple
+
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point import Point
 
 
-def cluster_index(point, centroids):
+def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]:
     """
     Get the centroid index of the closest centroid
     """
     min_dist = float("inf")
     p_centroid = centroids[0]
-    for c in centroids:
-        if point.distance(c) < min_dist:
-            min_dist = point.distance(c)
-            p_centroid = c
+    for centroid in centroids:
+        if point.distance(centroid) < min_dist:
+            min_dist = point.distance(centroid)
+            p_centroid = centroid
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list, centroids):
+def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]:
     """
-    Assign to each point to a cluster
+    Assign each point to a cluster
     """
-
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters, centroids):
+def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]):
     """
     Update centroids of clusters
     """
@@ -52,11 +52,10 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
     """
     if pair_a[1] > pair_b[1]:
         return pair_a
-    else:
-        return pair_b
+    return pair_b
 
 
-def k_means_init(input_list: List, n_cluster: int):
+def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]:
     """
     K-means++ initialisation
 
@@ -66,8 +65,8 @@ def k_means_init(input_list: List, n_cluster: int):
     :return: n_cluster centroids
     """
     centroids = SList([])
-    c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
-    centroids.append(c1)
+    first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
+    centroids.append(first_centroid)
 
     for _ in range(n_cluster - 1):
         dist = input_list.map(lambda x: x.distance(centroids[0]))
@@ -82,8 +81,8 @@ def k_means_init(input_list: List, n_cluster: int):
     return centroids
 
 
-def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10):
+def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10) -> SList[SList[Point]]:
     """
     K-means algorithm on a list of point
 
@@ -97,7 +96,6 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus
     centroids = init_function(input_list, n_cluster)
 
     j = 0
-
     while j < max_iter:
         clusters = assign_clusters(input_list, centroids)
 
@@ -105,4 +103,9 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus
 
         j = j + 1
 
-    return clusters
+    clusters2d = SList([])
+    for i in range(len(centroids)):
+        clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster)
+                          .map(lambda x: x[0]).to_seq()
+                          )
+    return clusters2d

From 9f1e0fcead895f6aa292a0e497f56a99506e2306 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 27 May 2021 14:39:37 +0200
Subject: [PATCH 10/34] FIX: bad list initialization parallel list

---
 pyske/examples/list/util.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index d6517fa..703dfbb 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -102,11 +102,13 @@ def rand_point_list(cls, size, clusters):
     :return: a list of the given class
     """
     from pyske.core.util.point import Point
-    print(clusters)
-    x, y_true = make_blobs(n_samples=size, centers=clusters)
+    from pyske.core import Distribution
+
+    x, _ = make_blobs(n_samples=size, centers=clusters)
     x = x.tolist()
     x = list(map(lambda y: Point(y[0], y[1]), x))
-    return cls.init(lambda i: x[i], size)
+    distr = Distribution().balanced(size)
+    return cls.from_seq(x).distribute(distr)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From a8d0385a486216141b46fb267616ca56fb1d4816 Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Thu, 27 May 2021 20:39:52 +0200
Subject: [PATCH 11/34] Changing sample type from custom type "Point" to Tuple

---
 pyske/examples/list/k_means.py      | 43 ++++++++++++++++++++++-------
 pyske/examples/list/k_means_main.py |  3 +-
 pyske/examples/list/util.py         | 17 ++++++++++++
 3 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 89cf643..dbddc8e 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -1,24 +1,47 @@
 """
 K-Means
 """
+import operator
 import random
+from math import sqrt
 from typing import Callable, Tuple
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point import Point
 
 
-def cluster_index(point, centroids):
+def distance2D(sample_1, sample_2):
+    """
+            return distance between 2d sample.
+
+            Examples::
+
+                >>> from pyske.core.util.point import Point
+                >>> p1 = Point(5,5)
+                >>> p2 = Point(5,7)
+                >>> p1.distance(p2)
+                2.0
+
+            :param other: a point
+            :return: distance from other point
+
+            """
+    dx = sample_1[0] - sample_2[0]
+    dy = sample_1[1] - sample_2[1]
+    return sqrt(dx ** 2 + dy ** 2)
+
+
+def cluster_index(sample, centroids):
     """
     Get the centroid index of the closest centroid
     """
     min_dist = float("inf")
     p_centroid = centroids[0]
     for c in centroids:
-        if point.distance(c) < min_dist:
-            min_dist = point.distance(c)
+        if distance2D(sample, c) < min_dist:
+            min_dist = distance2D(sample, c)
             p_centroid = c
-    return point, centroids.index(p_centroid)
+    return sample, centroids.index(p_centroid)
 
 
 def assign_clusters(input_list, centroids):
@@ -37,16 +60,16 @@ def update_centroids(clusters, centroids):
     i = 0
     while i < len(centroids):
         cluster = clusters.filter(lambda x: x[1] == i)
-        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y)
-        average_point = sum_cluster / cluster.length()
+        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda a, b: tuple(map(operator.add, a, b)))
+        average_point = [x/clusters.length() for x in sum_cluster]
         centroid = clusters.reduce(
-            lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0]
+            lambda x, y: x if distance2D(average_point, x[0]) < distance2D(average_point, y[0]) else y)[0]
         new_centroids.append(centroid)
         i += 1
     return new_centroids
 
 
-def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
+def max_dist(pair_a, pair_b):
     """
     Return the tuple with the maximum distance
     """
@@ -70,9 +93,9 @@ def k_means_init(input_list: List, n_cluster: int):
     centroids.append(c1)
 
     for _ in range(n_cluster - 1):
-        dist = input_list.map(lambda x: x.distance(centroids[0]))
+        dist = input_list.map(lambda sample: distance2D(sample, centroids[0]))
         for i in range(1, len(centroids)):
-            temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index]))
+            temp_dist = input_list.map(lambda sample, index=i: distance2D(sample, centroids[index]))
             dist = dist.map2(lambda x, y: y if y < x else x, temp_dist)
 
         zip_list = input_list.zip(dist)
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 0a3b171..65febba 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,7 +27,8 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_list(pyske_list_class, size, clusters)
+    #input_list = util.rand_point_list(pyske_list_class, size, clusters)
+    input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index d6517fa..2a7327b 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -109,6 +109,23 @@ def rand_point_list(cls, size, clusters):
     return cls.init(lambda i: x[i], size)
 
 
+def rand_2D_sample_list(cls, size , clusters):
+    """
+    Return a randomly generated list of 2D sample.
+
+    :param cls: the class of the generated list.
+    :param size: a positive number
+        Precondition: size >= 0
+    :param clusters: number of clusters
+    :return: a list of the given class
+    """
+    print(clusters)
+    x, y_true = make_blobs(n_samples=size, centers=clusters)
+    x = x.tolist()
+    x = list(map(lambda y: (y[0], y[1]), x))
+    return cls.init(lambda i: x[i], size)
+
+
 def print_experiment(result, timing, execute, iteration=None):
     """
     Print the result and timing of the experiment.

From c617aade25a15c196258ad3464930c6ebf67b37d Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:16:39 +0200
Subject: [PATCH 12/34] Add point_interface and changing the class Point to
 Point_2D

---
 pyske/core/util/{point.py => point_2D.py} | 35 ++++++++++++-----------
 pyske/core/util/point_Interface.py        | 25 ++++++++++++++++
 pyske/examples/list/k_means.py            | 16 +++++------
 pyske/examples/list/util.py               |  4 +--
 4 files changed, 53 insertions(+), 27 deletions(-)
 rename pyske/core/util/{point.py => point_2D.py} (61%)
 create mode 100644 pyske/core/util/point_Interface.py

diff --git a/pyske/core/util/point.py b/pyske/core/util/point_2D.py
similarity index 61%
rename from pyske/core/util/point.py
rename to pyske/core/util/point_2D.py
index b4c43f0..6a0dfd5 100644
--- a/pyske/core/util/point.py
+++ b/pyske/core/util/point_2D.py
@@ -3,10 +3,11 @@
 """
 
 from math import sqrt
+from pyske.core.util.point_Interface import Point_Interface
 
 
-class Point(object):
-    """A class to represent a point"""
+class Point_2D(Point_Interface):
+    """A class to represent a 2D point"""
 
     def __init__(self, x=0, y=0):
         self.__x = x
@@ -16,7 +17,7 @@ def __repr__(self):
         return "(%s, %s)" % (self.__x, self.__y)
 
     def __eq__(self, other):
-        if isinstance(other, Point):
+        if isinstance(other, Point_2D):
             return self.__x == other.x and self.__y == other.__y
         return False
 
@@ -26,13 +27,13 @@ def __add__(self, other):
 
         Examples::
 
-            >>> p1 = Point(5,5)
-            >>> p2 = Point(5,7)
+            >>> p1 = Point_2D(5,5)
+            >>> p2 = Point_2D(5,7)
             >>> p1 + p2
             (10, 12)
         """
-        if isinstance(other, Point):
-            return Point(self.x + other.x, self.y + other.y)
+        if isinstance(other, Point_2D):
+            return Point_2D(self.x + other.x, self.y + other.y)
 
     def __mul__(self, other):
         """
@@ -40,21 +41,21 @@ def __mul__(self, other):
 
         Examples::
 
-            >>> p1 = Point(5,5)
-            >>> p2 = Point(5,7)
+            >>> p1 = Point_2D(5,5)
+            >>> p2 = Point_2D(5,7)
             >>> p1 * 5
             (25, 25)
             >>> p1 * p2
             (25, 35)
         """
-        if isinstance(other, Point):
-            return Point(self.x * other.x, self.y * other.y)
+        if isinstance(other, Point_2D):
+            return Point_2D(self.x * other.x, self.y * other.y)
         if isinstance(other, int) or isinstance(other, float):
-            return Point(self.x * other, self.y * other)
+            return Point_2D(self.x * other, self.y * other)
 
     def __truediv__(self, other):
         if isinstance(other, int):
-            return Point(self.x / other, self.y / other)
+            return Point_2D(self.x / other, self.y / other)
 
     @property
     def x(self):
@@ -66,15 +67,15 @@ def y(self):
         """Y getter"""
         return self.__y
 
-    def distance(self, other: 'Point'):
+    def distance(self, other: 'Point_2D'):
         """
         Returns the distance from another point.
 
         Examples::
 
-            >>> from pyske.core.util.point import Point
-            >>> p1 = Point(5,5)
-            >>> p2 = Point(5,7)
+            >>> from pyske.core.util.point_2D import Point_2D
+            >>> p1 = Point_2D(5,5)
+            >>> p2 = Point_2D(5,7)
             >>> p1.distance(p2)
             2.0
 
diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py
new file mode 100644
index 0000000..aa3be5d
--- /dev/null
+++ b/pyske/core/util/point_Interface.py
@@ -0,0 +1,25 @@
+"""
+A module to represent a point
+"""
+
+
+class Point_Interface:
+    """Point interface to represent point of n dimensions"""
+
+    def __repr__(self):
+        pass
+
+    def __eq__(self, other):
+        pass
+
+    def __add__(self, other):
+        pass
+
+    def __mul__(self, other):
+        pass
+
+    def __truediv__(self, other):
+        pass
+
+    def distance(self, other):
+        pass
diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index f5da8a0..42af105 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -6,10 +6,10 @@
 
 from pyske.core.interface import List
 from pyske.core.list import SList
-from pyske.core.util.point import Point
+from pyske.core.util.point_2D import Point_2D
 
 
-def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]:
+def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]:
     """
     Get the centroid index of the closest centroid
     """
@@ -22,14 +22,14 @@ def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]:
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]:
+def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]:
     """
     Assign each point to a cluster
     """
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]):
+def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]):
     """
     Update centroids of clusters
     """
@@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]
     return new_centroids
 
 
-def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
+def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]):
     """
     Return the tuple with the maximum distance
     """
@@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
     return pair_b
 
 
-def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]:
+def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]:
     """
     K-means++ initialisation
 
@@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]:
     return centroids
 
 
-def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10) -> SList[SList[Point]]:
+def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10) -> SList[SList[Point_2D]]:
     """
     K-means algorithm on a list of point
 
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 703dfbb..e9e9e3c 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -101,12 +101,12 @@ def rand_point_list(cls, size, clusters):
     :param clusters: number of clusters
     :return: a list of the given class
     """
-    from pyske.core.util.point import Point
+    from pyske.core.util.point_2D import Point_2D
     from pyske.core import Distribution
 
     x, _ = make_blobs(n_samples=size, centers=clusters)
     x = x.tolist()
-    x = list(map(lambda y: Point(y[0], y[1]), x))
+    x = list(map(lambda y: Point_2D(y[0], y[1]), x))
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 

From ff39d0b502325d502a00fb9dbd26b2c33bbfdf82 Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:22:12 +0200
Subject: [PATCH 13/34] FIX: input_list type form Tuple to Point_2D

---
 pyske/examples/list/k_means_main.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 65febba..0a3b171 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,8 +27,7 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    #input_list = util.rand_point_list(pyske_list_class, size, clusters)
-    input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means

From 32f557ec6cb0fa29af29e5e097b620498d8448ee Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:38:04 +0200
Subject: [PATCH 14/34] Add class point_3D.py

---
 pyske/core/util/point_2D.py         |  4 +-
 pyske/core/util/point_3D.py         | 79 +++++++++++++++++++++++++++++
 pyske/examples/list/k_means_main.py |  2 +-
 pyske/examples/list/util.py         | 26 ++++------
 4 files changed, 92 insertions(+), 19 deletions(-)
 create mode 100644 pyske/core/util/point_3D.py

diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py
index 6a0dfd5..f6f5f7f 100644
--- a/pyske/core/util/point_2D.py
+++ b/pyske/core/util/point_2D.py
@@ -1,5 +1,5 @@
 """
-A module to represent a point
+A module to represent a 2D point
 """
 
 from math import sqrt
@@ -18,7 +18,7 @@ def __repr__(self):
 
     def __eq__(self, other):
         if isinstance(other, Point_2D):
-            return self.__x == other.x and self.__y == other.__y
+            return self.__x == other.__x and self.__y == other.__y
         return False
 
     def __add__(self, other):
diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py
new file mode 100644
index 0000000..9cde20f
--- /dev/null
+++ b/pyske/core/util/point_3D.py
@@ -0,0 +1,79 @@
+"""
+A module to represent a 3D point
+"""
+
+from math import sqrt
+from pyske.core.util.point_Interface import Point_Interface
+
+
+class Point_3D(Point_Interface):
+    """A class to represent a 3D point"""
+
+    def __init__(self, x=0, y=0, z=0):
+        self.__x = x
+        self.__y = y
+        self.__z = z
+
+    def __repr__(self):
+        return "(%s, %s, %s)" % (self.__x, self.__y, self.__z)
+
+    def __eq__(self, other):
+        if isinstance(other, Point_3D):
+            return self.__x == other.__x and self.__y == other.__y and self.__z == other.__z
+        return False
+
+    def __add__(self, other):
+        """
+        Addition of two points
+
+        Examples::
+
+            >>> p1 = Point_3D(5,5,2)
+            >>> p2 = Point_3D(5,7,1)
+            >>> p1 + p2
+            (10, 12, 3)
+        """
+        if isinstance(other, Point_3D):
+            return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z)
+
+    def __mul__(self, other):
+        pass
+
+    def __truediv__(self, other):
+        pass
+
+    @property
+    def x(self):
+        """X getter"""
+        return self.__x
+
+    @property
+    def y(self):
+        """Y getter"""
+        return self.__y
+
+    @property
+    def z(self):
+        """Z getter"""
+        return self.z
+
+    def distance(self, other):
+        """
+        Returns the distance from another 3D point.
+
+        Examples::
+
+            >>> from pyske.core.util.point_2D import Point_2D
+            >>> p1 = Point_3D(5,5,2)
+            >>> p2 = Point_3D(5,7,1)
+            >>> p1.distance(p2)
+            2.24
+
+        :param other: a point
+        :return: distance from other point
+
+        """
+        dx = self.__x - other.x
+        dy = self.__y - other.y
+        dz = self.__x - other.z
+        return sqrt(dx ** 2 + dy ** 2 + dz ** 2)
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 0a3b171..b6b20fa 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,7 +27,7 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index bdb68a7..648fe9d 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -91,9 +91,9 @@ def rand_list(cls, size):
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
 
-def rand_point_list(cls, size, clusters):
+def rand_point_2D_list(cls, size, clusters):
     """
-    Return a randomly generated list of points.
+    Return a randomly generated list of 2D points.
 
     :param cls: the class of the generated list.
     :param size: a positive number
@@ -110,22 +110,16 @@ def rand_point_list(cls, size, clusters):
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 
-
-def rand_2D_sample_list(cls, size , clusters):
+def rand_point_3D_list(cls, size, clusters):
     """
-    Return a randomly generated list of 2D sample.
+        Return a randomly generated list of 3D points.
 
-    :param cls: the class of the generated list.
-    :param size: a positive number
-        Precondition: size >= 0
-    :param clusters: number of clusters
-    :return: a list of the given class
-    """
-    print(clusters)
-    x, y_true = make_blobs(n_samples=size, centers=clusters)
-    x = x.tolist()
-    x = list(map(lambda y: (y[0], y[1]), x))
-    return cls.init(lambda i: x[i], size)
+        :param cls: the class of the generated list.
+        :param size: a positive number
+            Precondition: size >= 0
+        :param clusters: number of clusters
+        :return: a list of the given class
+        """
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 7e6966d8534c82393966b83456aa3974822c4c61 Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:42:05 +0200
Subject: [PATCH 15/34] rand_point_2D_list / rand_point_3D_list

---
 pyske/examples/list/util.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 648fe9d..8124598 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -110,16 +110,25 @@ def rand_point_2D_list(cls, size, clusters):
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 
+
 def rand_point_3D_list(cls, size, clusters):
     """
-        Return a randomly generated list of 3D points.
+    Return a randomly generated list of 2D points.
+
+    :param cls: the class of the generated list.
+    :param size: a positive number
+        Precondition: size >= 0
+    :param clusters: number of clusters
+    :return: a list of the given class
+    """
+    from pyske.core.util.point_3D import Point_3D
+    from pyske.core import Distribution
 
-        :param cls: the class of the generated list.
-        :param size: a positive number
-            Precondition: size >= 0
-        :param clusters: number of clusters
-        :return: a list of the given class
-        """
+    x, _ = make_blobs(n_samples=size, centers=clusters)
+    x = x.tolist()
+    x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x))
+    distr = Distribution().balanced(size)
+    return cls.from_seq(x).distribute(distr)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 9b147eba58a6549ee59202f2ad5060918133674c Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 15:30:48 +0200
Subject: [PATCH 16/34] Point_3D update

---
 pyske/core/util/point_3D.py         | 22 +++++++++++++++++++---
 pyske/examples/list/k_means_main.py |  7 +++++--
 pyske/examples/list/util.py         |  2 +-
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py
index 9cde20f..c6e1aa5 100644
--- a/pyske/core/util/point_3D.py
+++ b/pyske/core/util/point_3D.py
@@ -37,10 +37,26 @@ def __add__(self, other):
             return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z)
 
     def __mul__(self, other):
-        pass
+        """
+        Multiplication by a point or a scalar
+
+        Examples::
+
+            >>> p1 = Point_3D(5,5,2)
+            >>> p2 = Point_3D(5,7,1)
+            >>> p1 * 5
+            (25, 25, 10)
+            >>> p1 * p2
+            (25, 35, 2)
+        """
+        if isinstance(other, Point_3D):
+            return Point_3D(self.x * other.x, self.y * other.y, self.z * other.z)
+        if isinstance(other, int) or isinstance(other, float):
+            return Point_3D(self.x * other, self.y * other, self.z * other.z)
 
     def __truediv__(self, other):
-        pass
+        if isinstance(other, int):
+            return Point_3D(self.x / other, self.y / other, self.z / other)
 
     @property
     def x(self):
@@ -55,7 +71,7 @@ def y(self):
     @property
     def z(self):
         """Z getter"""
-        return self.z
+        return self.__z
 
     def distance(self, other):
         """
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index b6b20fa..29eb81d 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,7 +27,10 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
+
+    # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_3D_list(pyske_list_class, size, clusters)
+
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
@@ -37,7 +40,7 @@
         result = example(input_list, k_means_init, clusters)
         timing.stop()
         util.print_experiment("", timing.get(), execute, iteration)
-        #if parallel.PID == 0:
+        # if parallel.PID == 0:
         #    for i in range((len(result))):
         #        plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
         #    plt.show()
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 8124598..5279386 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -124,7 +124,7 @@ def rand_point_3D_list(cls, size, clusters):
     from pyske.core.util.point_3D import Point_3D
     from pyske.core import Distribution
 
-    x, _ = make_blobs(n_samples=size, centers=clusters)
+    x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3)
     x = x.tolist()
     x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x))
     distr = Distribution().balanced(size)

From 5b89f49ec00fb655c9dd5794c5cd75a731b7644f Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 15:43:55 +0200
Subject: [PATCH 17/34] Typing Point_2D -> Point_Interface

---
 pyske/examples/list/k_means.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 42af105..52b4e2b 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -6,10 +6,10 @@
 
 from pyske.core.interface import List
 from pyske.core.list import SList
-from pyske.core.util.point_2D import Point_2D
+from pyske.core.util.point_Interface import Point_Interface
 
 
-def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]:
+def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]:
     """
     Get the centroid index of the closest centroid
     """
@@ -22,14 +22,14 @@ def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]:
+def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]:
     """
     Assign each point to a cluster
     """
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]):
+def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]):
     """
     Update centroids of clusters
     """
@@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Poin
     return new_centroids
 
 
-def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]):
+def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interface, float]):
     """
     Return the tuple with the maximum distance
     """
@@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]):
     return pair_b
 
 
-def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]:
+def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]:
     """
     K-means++ initialisation
 
@@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]:
     return centroids
 
 
-def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10) -> SList[SList[Point_2D]]:
+def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10) -> SList[SList[Point_Interface]]:
     """
     K-means algorithm on a list of point
 

From eb792d573bd6ccd067c78620e2236a0972c83fc7 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 28 May 2021 17:11:19 +0200
Subject: [PATCH 18/34] optimization update_centroids

---
 pyske/examples/list/k_means.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index f5da8a0..cc28b11 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -29,20 +29,18 @@ def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tu
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]):
+def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]):
     """
     Update centroids of clusters
     """
-    new_centroids = SList([])
-    i = 0
-    while i < len(centroids):
-        cluster = clusters.filter(lambda x: x[1] == i)
-        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y)
-        average_point = sum_cluster / cluster.length()
-        centroid = clusters.reduce(
-            lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0]
-        new_centroids.append(centroid)
-        i += 1
+
+    new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids))
+
+    new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1),
+        lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else (
+            z if y[1] != i else y)))
+    new_centroids = new_centroids.map(lambda x: x[0] / x[2])
+
     return new_centroids
 
 

From 467b33bf8134354324827be18c3c9a96d707935c Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 28 May 2021 17:37:05 +0200
Subject: [PATCH 19/34] refactoring because of new point implementation

---
 pyske/examples/list/k_means.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 7a47e0b..6d1ac4e 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -34,7 +34,7 @@ def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SLi
     Update centroids of clusters
     """
 
-    new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids))
+    new_centroids = SList.init(lambda _: (_, _, _), len(centroids))
 
     new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1),
         lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else (

From 8c2cf82d97f2f2462b60b3b6502326dd72958f25 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 31 May 2021 11:16:25 +0200
Subject: [PATCH 20/34] use of parallelism random choice first centroid

---
 pyske/examples/list/k_means.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 6d1ac4e..1b224f8 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -7,6 +7,7 @@
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point_Interface import Point_Interface
+from pyske.core.util.par import procs
 
 
 def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]:
@@ -63,7 +64,9 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi
     :return: n_cluster centroids
     """
     centroids = SList([])
-    first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
+    first_centroid = input_list.get_partition()\
+                               .map(lambda l: l[random.randint(0, l.length() - 1)])\
+                               .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])]
     centroids.append(first_centroid)
 
     for _ in range(n_cluster - 1):

From 27a5039d749c16598b4fb8aa8dbbfb5191cc1ddd Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 1 Jun 2021 17:16:47 +0200
Subject: [PATCH 21/34] add point dimensions in k-means-main's options

---
 pyske/examples/list/k_means_main.py |  6 ++--
 pyske/examples/list/util.py         | 45 ++++++++++++++---------------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 29eb81d..163f761 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -19,17 +19,17 @@
     parser.add_argument("--iter", help="number of iterations", type=int, default=30)
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
+    parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
 
     args = parser.parse_args()
     size = args.size
     num_iter = args.iter
     choice = args.data
     clusters = args.clusters
+    dimensions = args.dimensions
 
     pyske_list_class = util.select_pyske_list(choice)
-
-    # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
-    input_list = util.rand_point_3D_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions)
 
     timing = Timing()
     execute = util.select_execute(choice)
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 5279386..ef79d51 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -3,6 +3,7 @@
 """
 
 from sklearn.datasets import make_blobs
+from pyske.core import Distribution
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -90,43 +91,39 @@ def rand_list(cls, size):
     import random
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
-
-def rand_point_2D_list(cls, size, clusters):
+def select_point_dimensions(dimensions):
     """
-    Return a randomly generated list of 2D points.
+    Return a PySke list class.
 
-    :param cls: the class of the generated list.
-    :param size: a positive number
-        Precondition: size >= 0
-    :param clusters: number of clusters
-    :return: a list of the given class
+    :param dimensions: point dimensions
+            Precondition: dimensions >= 2
+    :return: a Point
     """
-    from pyske.core.util.point_2D import Point_2D
-    from pyske.core import Distribution
-
-    x, _ = make_blobs(n_samples=size, centers=clusters)
-    x = x.tolist()
-    x = list(map(lambda y: Point_2D(y[0], y[1]), x))
-    distr = Distribution().balanced(size)
-    return cls.from_seq(x).distribute(distr)
-
+    # pylint: disable=import-outside-toplevel
+    if dimensions == 2:
+        from pyske.core.util.point_2D import Point_2D as PointClass
+    elif dimensions == 3:
+        from pyske.core.util.point_3D import Point_3D as PointClass
+    else:
+        from pyske.core.util.point_2D import Point_2D as PointClass
+    return PointClass
 
-def rand_point_3D_list(cls, size, clusters):
+def rand_point_list(cls, size, clusters, dimensions):
     """
-    Return a randomly generated list of 2D points.
+    Return a randomly generated list of points.
 
     :param cls: the class of the generated list.
     :param size: a positive number
         Precondition: size >= 0
     :param clusters: number of clusters
+    :param dimensions: point dimensions
+            Precondition: dimensions >= 2
     :return: a list of the given class
     """
-    from pyske.core.util.point_3D import Point_3D
-    from pyske.core import Distribution
-
-    x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3)
+    x, _ = make_blobs(n_samples=size, centers=clusters, n_features=dimensions)
     x = x.tolist()
-    x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x))
+    pointclass = select_point_dimensions(dimensions)
+    x = list(map(lambda y: pointclass(*y), x))
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 

From 84f2daaa554b88110374334e5c98ce5c9421bd15 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 2 Jun 2021 11:42:41 +0200
Subject: [PATCH 22/34] interface convention

---
 pyske/core/util/point_Interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py
index aa3be5d..6196c47 100644
--- a/pyske/core/util/point_Interface.py
+++ b/pyske/core/util/point_Interface.py
@@ -1,9 +1,9 @@
 """
 A module to represent a point
 """
+from abc import ABC
 
-
-class Point_Interface:
+class Point_Interface(ABC):
     """Point interface to represent point of n dimensions"""
 
     def __repr__(self):

From 82b7a7decbd6dcfffa6ca5a6709940948c173a08 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 4 Jun 2021 14:55:56 +0200
Subject: [PATCH 23/34] parallel optimization update_centroids

---
 pyske/examples/list/k_means.py | 66 +++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 1b224f8..f238ffa 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -10,7 +10,8 @@
 from pyske.core.util.par import procs
 
 
-def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]:
+def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> \
+        Tuple[Point_Interface, int]:
     """
     Get the centroid index of the closest centroid
     """
@@ -23,24 +24,37 @@ def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) ->
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]:
+def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> \
+        List[Tuple[Point_Interface, int]]:
     """
     Assign each point to a cluster
     """
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]):
+def update_centroids(clusters: List[Tuple[Point_Interface, int]],
+                     centroids: SList[Point_Interface]):
     """
     Update centroids of clusters
     """
 
-    new_centroids = SList.init(lambda _: (_, _, _), len(centroids))
-
-    new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1),
-        lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else (
-            z if y[1] != i else y)))
-    new_centroids = new_centroids.map(lambda x: x[0] / x[2])
+    def centroids_list_update(list_to_update, item):
+        if isinstance(item, SList):
+            list_to_update = list_to_update.map2(lambda a_pair, b_pair: (a_pair[0] + b_pair[0],
+                                                                         a_pair[1] + b_pair[1]),
+                                                 item)
+        else:
+            index = item[1]
+            point = item[0]
+            list_to_update[index] = (list_to_update[index][0] + point,
+                                     list_to_update[index][1] + 1)
+        return list_to_update
+
+    point_class = type(centroids[0])
+    neutral_list = SList.init(lambda _: (point_class(), 0), len(centroids))
+    new_centroids = clusters.reduce(lambda a_item, b_item:
+                                    centroids_list_update(a_item, b_item), neutral_list)
+    new_centroids = new_centroids.map(lambda x: x[0] / x[1])
 
     return new_centroids
 
@@ -58,15 +72,15 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi
     """
     K-means++ initialisation
 
-    :param input_list: a list of point
-    :param n_cluster: number of cluster
+    :param input_list: a list of points
+    :param n_cluster: number of clusters
 
-    :return: n_cluster centroids
+    :return: list of centroids
     """
     centroids = SList([])
-    first_centroid = input_list.get_partition()\
-                               .map(lambda l: l[random.randint(0, l.length() - 1)])\
-                               .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])]
+    first_centroid = input_list.get_partition() \
+        .map(lambda l: l[random.randint(0, l.length() - 1)]) \
+        .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])]
     centroids.append(first_centroid)
 
     for _ in range(n_cluster - 1):
@@ -82,17 +96,18 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi
     return centroids
 
 
-def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10) -> SList[SList[Point_Interface]]:
+def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List],
+            n_cluster: int,
+            max_iter: int = 10) -> List[Tuple[Point_Interface, int]]:
     """
-    K-means algorithm on a list of point
+    K-means algorithm on a list of points
 
-    :param input_list: a list of point
-    :param n_cluster: number of cluster
-    :param max_iter: number of iteration
+    :param input_list: a list of points
+    :param n_cluster: number of clusters
+    :param max_iter: number of iterations
     :param init_function: a function that initialize centroids
 
-    :return: 2 dimensions list of points
+    :return: a list of tuples with the point and his cluster index
     """
     centroids = init_function(input_list, n_cluster)
 
@@ -104,9 +119,4 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in
 
         j = j + 1
 
-    clusters2d = SList([])
-    for i in range(len(centroids)):
-        clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster)
-                          .map(lambda x: x[0]).to_seq()
-                          )
-    return clusters2d
+    return clusters

From 08a4dd6bde88d90e1329ae6ce4387f5fb35c9bdd Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 4 Jun 2021 15:11:08 +0200
Subject: [PATCH 24/34] adding option to show clusters graph of 2D points

---
 pyske/examples/list/k_means_main.py | 11 +++++------
 pyske/examples/list/util.py         | 21 +++++++++++++++++----
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 163f761..41fddc4 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -2,12 +2,10 @@
 Execution of k_means
 """
 import argparse
-import matplotlib.pyplot as plt
 
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means, k_means_init
 from pyske.examples.list import util
-from pyske.core.support import parallel
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -20,6 +18,8 @@
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
     parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
+    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
+                        action="store_true")
 
     args = parser.parse_args()
     size = args.size
@@ -27,6 +27,7 @@
     choice = args.data
     clusters = args.clusters
     dimensions = args.dimensions
+    show_clusters = args.show_clusters
 
     pyske_list_class = util.select_pyske_list(choice)
     input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions)
@@ -40,7 +41,5 @@
         result = example(input_list, k_means_init, clusters)
         timing.stop()
         util.print_experiment("", timing.get(), execute, iteration)
-        # if parallel.PID == 0:
-        #    for i in range((len(result))):
-        #        plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
-        #    plt.show()
+        if show_clusters and dimensions == 2:
+            util.print_2D_result(result.to_seq())
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index ef79d51..e560a16 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -1,9 +1,13 @@
 """
 Utility functions for PySke examples
 """
+from typing import Tuple
+import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_blobs
-from pyske.core import Distribution
+from pyske.core import Distribution, SList
+from pyske.core.support import parallel
+from pyske.core.util.point_2D import Point_2D
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -100,9 +104,7 @@ def select_point_dimensions(dimensions):
     :return: a Point
     """
     # pylint: disable=import-outside-toplevel
-    if dimensions == 2:
-        from pyske.core.util.point_2D import Point_2D as PointClass
-    elif dimensions == 3:
+    if dimensions == 3:
         from pyske.core.util.point_3D import Point_3D as PointClass
     else:
         from pyske.core.util.point_2D import Point_2D as PointClass
@@ -127,6 +129,17 @@ def rand_point_list(cls, size, clusters, dimensions):
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 
+def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]):
+    """
+    Print experiment of 2 dimension points k-means clustering
+    """
+    if parallel.PID == 0:
+        x = clusters_list.map(lambda pair: pair[0].x)
+        y = clusters_list.map(lambda pair: pair[0].y)
+        colors = clusters_list.map(lambda pair: pair[1])
+        plt.scatter(x, y, c=colors)
+        plt.show()
+
 
 def print_experiment(result, timing, execute, iteration=None):
     """

From f6f46cd4b3e85fa943a627d868330eef03f5eb90 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 7 Jun 2021 16:05:27 +0200
Subject: [PATCH 25/34] k-means clustering documentation

---
 docs/api.rst                        | 60 ++++++++++++++++++++++++++++-
 docs/conf.py                        | 10 +++--
 pyske/examples/list/k_means.py      |  4 +-
 pyske/examples/list/k_means_main.py | 11 +-----
 pyske/examples/list/util.py         | 17 +++++++-
 5 files changed, 84 insertions(+), 18 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 2a9f345..19a69e2 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,2 +1,60 @@
 PySke API
-=========
\ No newline at end of file
+=========
+
+Pyske API offer applications implemented with list and tree skeletons.
+The user can use the sequential or parallel version.
+The parallel version allows a faster execution time when its launched on several processors or computers.
+
+Dot Product
+-----------
+
+Discrete Fast Fourier Transform
+-------------------------------
+
+K-means Clustering
+------------------
+
+K-means clustering is an unsupervised algorithm that aims to partition group of points in k clusters.
+
+K-means function
+^^^^^^^^^^^^^^^^
+
+.. py:module:: pyske.examples.list.k_means
+
+.. autofunction:: k_means
+
+Here the implementation of the 2 dimensions point class.
+
+.. autoclass:: pyske.core.util.point_2D.Point_2D
+    :members:
+    :special-members:
+    :show-inheritance:
+    :private-members:
+    :member-order: bysource
+
+Initialization functions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: k_means_init
+
+Running Example
+^^^^^^^^^^^^^^^^^^^^
+
+.. argparse::
+    :module: pyske.examples.list.util
+    :func: k_means_parser
+    :prog: python3 k_means_main.py
+
+
+Maximum Prefix Sum
+------------------
+
+Maximum Segment Sum
+-------------------
+
+Parallel Regular Sampling Sort
+------------------------------
+
+Variance Example
+----------------
+
diff --git a/docs/conf.py b/docs/conf.py
index 8e83820..fe8f596 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -10,9 +10,9 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../.'))
 
 
 # -- Project information -----------------------------------------------------
@@ -31,6 +31,8 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    "sphinx.ext.autodoc",
+    "sphinxarg.ext"
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -52,4 +54,4 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
\ No newline at end of file
+html_static_path = ['_static']
diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index f238ffa..a1eab32 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -70,7 +70,7 @@ def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interfac
 
 def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]:
     """
-    K-means++ initialisation
+    K-means++ initialization
 
     :param input_list: a list of points
     :param n_cluster: number of clusters
@@ -103,9 +103,9 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in
     K-means algorithm on a list of points
 
     :param input_list: a list of points
+    :param init_function: a function that initialize centroids
     :param n_cluster: number of clusters
     :param max_iter: number of iterations
-    :param init_function: a function that initialize centroids
 
     :return: a list of tuples with the point and his cluster index
     """
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 41fddc4..3687f0c 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -1,7 +1,6 @@
 """
 Execution of k_means
 """
-import argparse
 
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means, k_means_init
@@ -10,16 +9,10 @@
 PAR = 'parallel'
 SEQ = 'sequential'
 
+
 if __name__ == '__main__':
 
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
-    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
-    parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
-    parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
-    parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
-    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
-                        action="store_true")
+    parser = util. k_means_parser()
 
     args = parser.parse_args()
     size = args.size
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index e560a16..2e3da87 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -3,6 +3,7 @@
 """
 from typing import Tuple
 import matplotlib.pyplot as plt
+import argparse
 
 from sklearn.datasets import make_blobs
 from pyske.core import Distribution, SList
@@ -26,8 +27,6 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
     :param data_arg: (default True) flag to select argument --data
     :return:  (size, iter, ['parallel' | 'sequential'])
     """
-    # pylint: disable=import-outside-toplevel
-    import argparse
     parser = argparse.ArgumentParser()
     if size_arg:
         parser.add_argument("--size", help="size of the list to generate",
@@ -50,6 +49,20 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
     return size, num_iter, data_type
 
 
+def k_means_parser():
+    """
+    Parse command line for k-means example.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
+    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
+    parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
+    parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
+    parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
+    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
+                        action="store_true")
+    return parser
+
 def select_pyske_list(choice):
     """
     Return a PySke list class.

From 42c750632199f47d51a286655363ba8ce929a3ca Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Tue, 8 Jun 2021 12:18:44 +0200
Subject: [PATCH 26/34] 3d representation for Point_3D clusters

---
 pyske/examples/list/k_means_main.py |  4 ++++
 pyske/examples/list/util.py         | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 3687f0c..93c885a 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -36,3 +36,7 @@
         util.print_experiment("", timing.get(), execute, iteration)
         if show_clusters and dimensions == 2:
             util.print_2D_result(result.to_seq())
+        elif show_clusters and dimensions == 3:
+            util.print_3D_result(result.to_seq())
+
+
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 2e3da87..721f855 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -3,12 +3,14 @@
 """
 from typing import Tuple
 import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import axes3d
 import argparse
 
 from sklearn.datasets import make_blobs
 from pyske.core import Distribution, SList
 from pyske.core.support import parallel
 from pyske.core.util.point_2D import Point_2D
+from pyske.core.util.point_3D import Point_3D
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -153,6 +155,26 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]):
         plt.scatter(x, y, c=colors)
         plt.show()
 
+def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]):
+    """
+        Print experiment of 3 dimension points k-means clustering
+        """
+    if parallel.PID == 0:
+        x = clusters_list.map(lambda pair: pair[0].x)
+        y = clusters_list.map(lambda pair: pair[0].y)
+        z = clusters_list.map(lambda pair: pair[0].z)
+        colors = clusters_list.map(lambda pair: pair[1])
+
+        # Tracé du résultat en 3D
+        fig = plt.figure()
+        ax = fig.gca(projection='3d')  # Affichage en 3D
+        ax.scatter(x, y, z, label='Courbe', marker='d')  # Tracé des points 3D
+        plt.title("Points 3D")
+        ax.set_xlabel('X')
+        ax.set_ylabel('Y')
+        ax.set_zlabel('Z')
+        plt.tight_layout()
+        plt.show()
 
 def print_experiment(result, timing, execute, iteration=None):
     """

From 810c54b30394e297e2c62ef2a5987b086bbd2d8f Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 12:57:39 +0200
Subject: [PATCH 27/34] error subtraction in distance

---
 pyske/core/util/point_3D.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py
index c6e1aa5..678710d 100644
--- a/pyske/core/util/point_3D.py
+++ b/pyske/core/util/point_3D.py
@@ -91,5 +91,5 @@ def distance(self, other):
         """
         dx = self.__x - other.x
         dy = self.__y - other.y
-        dz = self.__x - other.z
+        dz = self.__z - other.z
         return sqrt(dx ** 2 + dy ** 2 + dz ** 2)

From 5cba1e966b00bf01344537f1fe4c9e5fc82babd6 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 14:31:27 +0200
Subject: [PATCH 28/34] adding colors 3D graph result, fix warning matplotlib

---
 pyske/examples/list/k_means_main.py | 12 ++++++------
 pyske/examples/list/util.py         | 13 +++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 93c885a..c8b7782 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -12,7 +12,7 @@
 
 if __name__ == '__main__':
 
-    parser = util. k_means_parser()
+    parser = util.k_means_parser()
 
     args = parser.parse_args()
     size = args.size
@@ -34,9 +34,9 @@
         result = example(input_list, k_means_init, clusters)
         timing.stop()
         util.print_experiment("", timing.get(), execute, iteration)
-        if show_clusters and dimensions == 2:
-            util.print_2D_result(result.to_seq())
-        elif show_clusters and dimensions == 3:
-            util.print_3D_result(result.to_seq())
-
+        if show_clusters:
+            if dimensions == 2:
+                util.print_2D_result(result.to_seq())
+            if dimensions == 3:
+                util.print_3D_result(result.to_seq())
 
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 721f855..1ddca0f 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -1,10 +1,11 @@
 """
 Utility functions for PySke examples
 """
+
 from typing import Tuple
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import axes3d
+
 import argparse
+import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_blobs
 from pyske.core import Distribution, SList
@@ -157,8 +158,8 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]):
 
 def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]):
     """
-        Print experiment of 3 dimension points k-means clustering
-        """
+    Print experiment of 3 dimension points k-means clustering
+    """
     if parallel.PID == 0:
         x = clusters_list.map(lambda pair: pair[0].x)
         y = clusters_list.map(lambda pair: pair[0].y)
@@ -167,8 +168,8 @@ def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]):
 
         # Tracé du résultat en 3D
         fig = plt.figure()
-        ax = fig.gca(projection='3d')  # Affichage en 3D
-        ax.scatter(x, y, z, label='Courbe', marker='d')  # Tracé des points 3D
+        ax = fig.add_subplot(projection='3d')  # Affichage en 3D
+        ax.scatter(x, y, z, label='Courbe', marker='d', c=colors)  # Tracé des points 3D
         plt.title("Points 3D")
         ax.set_xlabel('X')
         ax.set_ylabel('Y')

From eb16d4c72ed89bec88826fa8125774fb60807dfb Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 15:05:23 +0200
Subject: [PATCH 29/34] adding Point Interface section

---
 docs/api.rst                | 24 +++++++++++++++---------
 pyske/core/util/point_2D.py |  3 +++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 19a69e2..864486e 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -3,7 +3,7 @@ PySke API
 
 Pyske API offer applications implemented with list and tree skeletons.
 The user can use the sequential or parallel version.
-The parallel version allows a faster execution time when its launched on several processors or computers.
+The parallel version allows a faster execution time when its launched on several processors, cores or computers.
 
 Dot Product
 -----------
@@ -23,20 +23,26 @@ K-means function
 
 .. autofunction:: k_means
 
-Here the implementation of the 2 dimensions point class.
+Initialization functions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+This is the standard method that initializes the centroids. This method chooses the centroids in order that each point is as far as possible from the other.
+
+.. autofunction:: k_means_init
+
+
+Point Interface
+^^^^^^^^^^^^^^^
+
+K-means algorithm takes a list of points in parameters. For now two versions implement this class, one for 2 dimension points and another for 3 dimension points.
+
+Point 2D class implementation:
 
 .. autoclass:: pyske.core.util.point_2D.Point_2D
     :members:
     :special-members:
-    :show-inheritance:
-    :private-members:
     :member-order: bysource
 
-Initialization functions
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: k_means_init
-
 Running Example
 ^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py
index f6f5f7f..d0bfeca 100644
--- a/pyske/core/util/point_2D.py
+++ b/pyske/core/util/point_2D.py
@@ -17,6 +17,9 @@ def __repr__(self):
         return "(%s, %s)" % (self.__x, self.__y)
 
     def __eq__(self, other):
+        """
+        Equality between two points
+        """
         if isinstance(other, Point_2D):
             return self.__x == other.__x and self.__y == other.__y
         return False

From 529498ef384ae91989a9d88ff4817518f58e4be9 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 15:15:33 +0200
Subject: [PATCH 30/34] change show-clusters display message

---
 pyske/examples/list/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 1ddca0f..4964c93 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -57,12 +57,12 @@ def k_means_parser():
     Parse command line for k-means example.
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=5_000)
     parser.add_argument("--iter", help="number of iterations", type=int, default=30)
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
     parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
-    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
+    parser.add_argument("--show-clusters", help="display the clusters graph of 2D or 3D points",
                         action="store_true")
     return parser
 

From 04d1037c4672406742a2bb37e66cff9c0d237093 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 9 Jun 2021 15:25:03 +0200
Subject: [PATCH 31/34] dot_product documentation

---
 docs/api.rst                            | 26 +++++++++++++++++++++++--
 pyske/examples/list/dot_product_main.py | 10 ++--------
 pyske/examples/list/util.py             | 17 ++++++++++++++++
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 864486e..a347320 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,13 +1,35 @@
 PySke API
 =========
 
-Pyske API offer applications implemented with list and tree skeletons.
+PySke API offer applications implemented with list and tree skeletons.
 The user can use the sequential or parallel version.
 The parallel version allows a faster execution time when its launched on several processors, cores or computers.
 
 Dot Product
 -----------
 
+.. py:module:: pyske.examples.list.dot_product
+
+
+Dot Product function
+^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: opt_dot_product
+
+Dot Product Variant
+^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: dot_product
+
+Running Example
+^^^^^^^^^^^^^^^
+
+.. argparse::
+    :module: pyske.examples.list.util
+    :func: dot_product_parser
+    :prog: python3 dot_product_main.py
+
+
 Discrete Fast Fourier Transform
 -------------------------------
 
@@ -44,7 +66,7 @@ Point 2D class implementation:
     :member-order: bysource
 
 Running Example
-^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^
 
 .. argparse::
     :module: pyske.examples.list.util
diff --git a/pyske/examples/list/dot_product_main.py b/pyske/examples/list/dot_product_main.py
index e357322..782c146 100644
--- a/pyske/examples/list/dot_product_main.py
+++ b/pyske/examples/list/dot_product_main.py
@@ -2,14 +2,13 @@
 Execution of dot_product.py
 """
 
-import argparse
 import gc
 import random
 from pyske.examples.list.dot_product import opt_dot_product, dot_product
 from pyske.core import par, Timing, PList as DPList
 from pyske.core.opt import fun as opt
 from pyske.core.opt.list import PList
-from pyske.examples.list.util import rand_list, print_experiment
+from pyske.examples.list.util import rand_list, print_experiment, dot_product_parser
 
 
 # -------------- Execution -----------------
@@ -26,12 +25,7 @@ def __compute():
         return opt_dot_product(PList.raw(pl1), PList.raw(pl2), uncurry=opt.uncurry).run()
 
     # Command-line arguments parsing
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000)
-    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
-    parser.add_argument("--test", help="choice of the test",
-                        choices=[_DIRECT, _HAND, _EVAL, _OPT],
-                        default=_DIRECT)
+    parser = dot_product_parser()
     args = parser.parse_args()
     size = args.size
     test = args.test
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 4964c93..075321d 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -15,6 +15,10 @@
 
 PAR = 'parallel'
 SEQ = 'sequential'
+_DIRECT = '_DIRECT'
+_HAND = 'hand_optimized'
+_OPT = 'optimized'
+_EVAL = 'evaluated'
 
 
 def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
@@ -66,6 +70,19 @@ def k_means_parser():
                         action="store_true")
     return parser
 
+def dot_product_parser():
+    """
+    Parse command line for dot-product example.
+    """
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000)
+    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
+    parser.add_argument("--test", help="choice of the test",
+                        choices=[_DIRECT, _HAND, _EVAL, _OPT],
+                        default=_DIRECT)
+    return parser
+
 def select_pyske_list(choice):
     """
     Return a PySke list class.

From 6c8eed974153b26da1c71831a16e9dcd9597fa0e Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 9 Jun 2021 18:03:03 +0200
Subject: [PATCH 32/34] adding for each example how to run it

---
 docs/api.rst                | 52 ++++++++++++++++++++++++++++++-------
 docs/conf.py                |  4 +--
 docs/index.rst              |  2 +-
 pyske/examples/list/util.py | 32 ++++++++++++++---------
 4 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index a347320..10d40ef 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,10 +1,13 @@
 PySke API
-=========
+#########
 
 PySke API offer applications implemented with list and tree skeletons.
 The user can use the sequential or parallel version.
 The parallel version allows a faster execution time when its launched on several processors, cores or computers.
 
+List Examples
+=============
+
 Dot Product
 -----------
 
@@ -24,15 +27,20 @@ Dot Product Variant
 Running Example
 ^^^^^^^^^^^^^^^
 
-.. argparse::
-    :module: pyske.examples.list.util
-    :func: dot_product_parser
-    :prog: python3 dot_product_main.py
+.. autoprogram:: pyske.examples.list.util:dot_product_parser()
+    :prog: dot_product_main.py
 
 
 Discrete Fast Fourier Transform
 -------------------------------
 
+Running Example
+^^^^^^^^^^^^^^^
+
+.. autoprogram:: pyske.examples.list.util:standard_parser(data_arg=False)
+    :prog: fft_main.py
+
+
 K-means Clustering
 ------------------
 
@@ -68,21 +76,47 @@ Point 2D class implementation:
 Running Example
 ^^^^^^^^^^^^^^^
 
-.. argparse::
-    :module: pyske.examples.list.util
-    :func: k_means_parser
-    :prog: python3 k_means_main.py
+.. autoprogram:: pyske.examples.list.util:k_means_parser()
+    :prog: k_means_main.py
 
 
 Maximum Prefix Sum
 ------------------
 
+Running Example
+^^^^^^^^^^^^^^^
+
+.. autoprogram:: pyske.examples.list.util:standard_parser()
+    :prog: maximum_prefix_sum_main.py
+
 Maximum Segment Sum
 -------------------
 
+Running Example
+^^^^^^^^^^^^^^^
+
+.. autoprogram:: pyske.examples.list.util:standard_parser()
+    :prog: maximum_segment_sum_main.py
+
 Parallel Regular Sampling Sort
 ------------------------------
 
+Running Example
+^^^^^^^^^^^^^^^
+
+.. autoprogram:: pyske.examples.list.util:standard_parser()
+    :prog: regular_sampling_sort_main.py
+
 Variance Example
 ----------------
 
+Running Example
+^^^^^^^^^^^^^^^
+
+.. autoprogram:: pyske.examples.list.util:standard_parser()
+    :prog: variance_main.py
+
+
+Tree Examples
+=============
+
diff --git a/docs/conf.py b/docs/conf.py
index fe8f596..b6fa59b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,7 +12,7 @@
 #
 import os
 import sys
-sys.path.insert(0, os.path.abspath('../.'))
+sys.path.insert(0, os.path.abspath('../'))
 
 
 # -- Project information -----------------------------------------------------
@@ -32,7 +32,7 @@
 # ones.
 extensions = [
     "sphinx.ext.autodoc",
-    "sphinxarg.ext"
+    "sphinxcontrib.autoprogram"
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/index.rst b/docs/index.rst
index 8115248..552c990 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -7,7 +7,7 @@ Welcome to PySke's documentation!
 =================================
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 3
    :caption: Contents:
 
    intro
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 075321d..0ae417f 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -34,16 +34,7 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
     :param data_arg: (default True) flag to select argument --data
     :return:  (size, iter, ['parallel' | 'sequential'])
     """
-    parser = argparse.ArgumentParser()
-    if size_arg:
-        parser.add_argument("--size", help="size of the list to generate",
-                            type=int, default=1_000_000)
-    if iter_arg:
-        parser.add_argument("--iter", help="number of iterations",
-                            type=int, default=30)
-    if data_arg:
-        parser.add_argument("--data", help="type of data structure",
-                            choices=[PAR, SEQ], default=SEQ)
+    parser = standard_parser(size_arg, iter_arg, data_arg)
     size = num_iter = 0
     data_type = PAR
     args = parser.parse_args()
@@ -56,9 +47,25 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
     return size, num_iter, data_type
 
 
+def standard_parser(size_arg=True, iter_arg=True, data_arg=True):
+    """
+    Parser for standard example.
+    """
+    parser = argparse.ArgumentParser()
+    if size_arg:
+        parser.add_argument("--size", help="size of the list to generate",
+                            type=int, default=1_000_000)
+    if iter_arg:
+        parser.add_argument("--iter", help="number of iterations",
+                            type=int, default=30)
+    if data_arg:
+        parser.add_argument("--data", help="type of data structure",
+                            choices=[PAR, SEQ], default=SEQ)
+    return parser
+
 def k_means_parser():
     """
-    Parse command line for k-means example.
+    Parser for k-means example.
     """
     parser = argparse.ArgumentParser()
     parser.add_argument("--size", help="size of the list to generate", type=int, default=5_000)
@@ -68,11 +75,12 @@ def k_means_parser():
     parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
     parser.add_argument("--show-clusters", help="display the clusters graph of 2D or 3D points",
                         action="store_true")
+
     return parser
 
 def dot_product_parser():
     """
-    Parse command line for dot-product example.
+    Parse for dot-product example.
     """
 
     parser = argparse.ArgumentParser()

From c73d1f3f28214c5c35a1ffd06b2002299953c2ba Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 10 Jun 2021 15:38:06 +0200
Subject: [PATCH 33/34] fft documentation, how to run in parallel

---
 docs/api.rst | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 10d40ef..80a2276 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -5,6 +5,14 @@ PySke API offer applications implemented with list and tree skeletons.
 The user can use the sequential or parallel version.
 The parallel version allows a faster execution time when its launched on several processors, cores or computers.
 
+Run examples with parallel computing:
+
+    .. code-block:: console
+
+        mpirun -np NB_CORES python3 PROGRAM_NAME [OPTIONS]
+
+Examples without :code:`--data` option are only runnable in parallel.
+
 List Examples
 =============
 
@@ -14,14 +22,11 @@ Dot Product
 .. py:module:: pyske.examples.list.dot_product
 
 
-Dot Product function
-^^^^^^^^^^^^^^^^^^^^
+Dot Product functions
+^^^^^^^^^^^^^^^^^^^^^
 
 .. autofunction:: opt_dot_product
 
-Dot Product Variant
-^^^^^^^^^^^^^^^^^^^
-
 .. autofunction:: dot_product
 
 Running Example
@@ -33,6 +38,12 @@ Running Example
 
 Discrete Fast Fourier Transform
 -------------------------------
+.. py:module:: pyske.examples.list.fft
+
+Fast Fourier Transform function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: fft
 
 Running Example
 ^^^^^^^^^^^^^^^

From fad0ab3b77b53990012118916d491c7c0d0102bc Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 10 Jun 2021 16:00:34 +0200
Subject: [PATCH 34/34] last examples documentation functions

---
 docs/api.rst                                 | 28 ++++++++++++++++++++
 pyske/examples/list/regular_sampling_sort.py |  3 +--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 80a2276..1b6eaa9 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -94,6 +94,13 @@ Running Example
 Maximum Prefix Sum
 ------------------
 
+.. py:module:: pyske.examples.list.maximum_prefix_sum
+
+Maximum Prefix Sum function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: mps
+
 Running Example
 ^^^^^^^^^^^^^^^
 
@@ -112,6 +119,20 @@ Running Example
 Parallel Regular Sampling Sort
 ------------------------------
 
+.. py:module:: pyske.examples.list.regular_sampling_sort
+
+
+Broadcast function
+^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: bcast
+
+Sort function
+^^^^^^^^^^^^^
+
+.. autofunction:: pssr
+
+
 Running Example
 ^^^^^^^^^^^^^^^
 
@@ -121,6 +142,13 @@ Running Example
 Variance Example
 ----------------
 
+.. py:module:: pyske.examples.list.variance
+
+Variance function
+^^^^^^^^^^^^^^^^^
+
+.. autofunction:: variance
+
 Running Example
 ^^^^^^^^^^^^^^^
 
diff --git a/pyske/examples/list/regular_sampling_sort.py b/pyske/examples/list/regular_sampling_sort.py
index 69feb8b..f816de0 100644
--- a/pyske/examples/list/regular_sampling_sort.py
+++ b/pyske/examples/list/regular_sampling_sort.py
@@ -18,8 +18,7 @@ def bcast(input_list: PList, src_pid: int) -> PList:
     Example::
 
         >>> from pyske.core import PList, par
-        >>> bcast(PList.from_seq([42]), 0).to_seq() == \
-                list(map(lambda _: 42, par.procs()))
+        >>> bcast(PList.from_seq([42]), 0).to_seq() == list(map(lambda _: 42, par.procs()))
         True
 
     :param input_list: a parallel list.