From ce230dba7ec01dc4bed42848035c4c8559909f70 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 5 May 2021 17:05:56 +0200
Subject: [PATCH 01/53] Class Point / K-means algorithm

---
 pyske/core/util/point.py            |  51 ++++++++++++
 pyske/examples/list/k_means.py      | 121 ++++++++++++++++++++++++++++
 pyske/examples/list/k_means_main.py |  25 ++++++
 pyske/examples/list/util.py         |  14 ++++
 4 files changed, 211 insertions(+)
 create mode 100644 pyske/core/util/point.py
 create mode 100644 pyske/examples/list/k_means.py
 create mode 100644 pyske/examples/list/k_means_main.py

diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py
new file mode 100644
index 0000000..0a5d2fc
--- /dev/null
+++ b/pyske/core/util/point.py
@@ -0,0 +1,51 @@
+"""
+A module to represent a point
+"""
+
+from math import sqrt
+
+
+class Point(object):
+    """A class to represent a point"""
+
+    def __init__(self, x, y):
+        self.__x = x
+        self.__y = y
+
+    def __repr__(self):
+        return "(%s, %s)" % (self.__x, self.__y)
+
+    def __eq__(self, other):
+        if isinstance(other, Point):
+            return self.__x == other.x and self.__y == other.__y
+        return False
+
+    @property
+    def x(self):
+        """X getter"""
+        return self.__x
+
+    @property
+    def y(self):
+        """Y getter"""
+        return self.__y
+
+    def distance(self, other: 'Point'):
+        """
+        Returns the distance from another point.
+
+        Examples::
+
+            >>> from pyske.core.util.point import Point
+            >>> p1 = Point(5,5)
+            >>> p2 = Point(5,7)
+            >>> p1.distance(p2)
+            2.0
+
+        :param other: a point
+        :return: distance from other point
+
+        """
+        dx = self.__x - other.x
+        dy = self.__y - other.y
+        return sqrt(dx ** 2 + dy ** 2)
diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
new file mode 100644
index 0000000..69dbdbc
--- /dev/null
+++ b/pyske/examples/list/k_means.py
@@ -0,0 +1,121 @@
+"""
+K-Means
+"""
+
+from pyske.core.interface import List
+from pyske.core.list import SList
+import random
+import matplotlib.pyplot as plt
+from pyske.core.util.point import Point
+
+
+def cluster_index(p, centroids):
+    """
+    Get the centroid index of the closest centroid
+    """
+    min_dist = float("inf")
+    p_centroid = centroids[0]
+    for c in centroids:
+        if p.distance(c) < min_dist:
+            min_dist = p.distance(c)
+            p_centroid = c
+    return centroids.index(p_centroid)
+
+
+def make_clusters(input_list, centroids):
+    """
+    Append all points to the cluster with the minimal distance from its centroid
+    """
+    clusters = [[] for c in centroids]
+    for p in input_list.to_seq():
+        index = cluster_index(p, centroids)
+        clusters[index].append(p)
+    return clusters
+
+
+def coords_average(cluster):
+    """
+    Get the coordinates average of all points in one cluster
+    """
+    x_average = sum([p.x for p in cluster]) / len(cluster)
+    y_average = sum([p.y for p in cluster]) / len(cluster)
+    return Point(x_average, y_average)
+
+
+def get_new_centroid(cluster):
+    """
+    Get closest point to average of point coordinates
+    """
+    average_point = coords_average(cluster)
+    min_dist = float("inf")
+    new_centroid = cluster[0]
+    for p in cluster:
+        if p.distance(average_point) < min_dist:
+            min_dist = p.distance(average_point)
+            new_centroid = p
+    return new_centroid
+
+
+def define_centroids(clusters):
+    """
+    Redefine centroids of clusters
+    """
+    centroids = []
+    for cluster in clusters:
+        centroids.append(get_new_centroid(cluster))
+    return centroids
+
+
+def k_means_init(input_list: List, n_cluster: int):
+    """
+    K-means++ initialisation
+
+    :param input_list: a list of point
+    :param n_cluster: number of cluster
+
+    :return: n_cluster centroids
+    """
+    centroids = SList([])
+    c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
+    centroids.append(c1)
+
+    for c in range(n_cluster - 1):
+        dist = input_list.map(lambda x: x.distance(centroids[0]))
+        for i in range(1, len(centroids)):
+            temp_dist = input_list.map(lambda x: x.distance(centroids[i]))
+            dist = dist.map2(lambda x, y: min(x, y), temp_dist)
+
+        index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())]
+        next_centroid = input_list.to_seq()[index_max[0]]
+        centroids.append(next_centroid)
+
+    return centroids
+
+
+def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
+    """
+    K-means algorithm on a list of point
+
+    :param input_list: a list of point
+    :param n_cluster: number of cluster
+    :param max_iter: number of iteration
+
+    :return: a list of class
+    """
+    centroids = k_means_init(input_list, n_cluster)
+    j = 0
+    while j < max_iter:
+        clusters = make_clusters(input_list, centroids)
+        plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()],
+                    c='yellow')
+        clusters_color = ['green', 'blue', 'black', 'purple', 'brown']
+        for i in range(len(clusters)):
+            plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]],
+                        c=clusters_color[i])
+
+        centroids = define_centroids(clusters)
+        plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
+        plt.show()
+        j = j + 1
+
+    return clusters
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
new file mode 100644
index 0000000..f2b8c0c
--- /dev/null
+++ b/pyske/examples/list/k_means_main.py
@@ -0,0 +1,25 @@
+"""
+Execution of k_means
+"""
+import gc
+
+from pyske.core import Timing
+from pyske.examples.list.k_means import k_means
+from pyske.examples.list import util
+
+
+if __name__ == '__main__':
+    size, num_iter, choice = util.standard_parse_command_line()
+    pyske_list_class = util.select_pyske_list(choice)
+    input_list = util.rand_point_list(pyske_list_class, size)
+    timing = Timing()
+    execute = util.select_execute(choice)
+    example = k_means
+    execute(lambda: print('Version:\t', choice))
+    gc.disable()
+    for iteration in range(1, 1 + num_iter):
+        timing.start()
+        result = example(input_list, 5)
+        timing.stop()
+        gc.collect()
+        util.print_experiment(result, timing.get(), execute, iteration)
\ No newline at end of file
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 57bed0d..26dbb69 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -89,6 +89,20 @@ def rand_list(cls, size):
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
 
+def rand_point_list(cls, size):
+    """
+    Return a randomly generated list of points.
+
+    :param cls: the class of the generated list.
+    :param size: a positive number
+        Precondition: size >= 0
+    :return: a list of the given class
+    """
+    from pyske.core.util.point import Point
+    import random
+    return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size)
+
+
 def print_experiment(result, timing, execute, iteration=None):
     """
     Print the result and timing of the experiment.

From 0b33eacce3ceaa0dc1ed32ee8bad7b99b30d75e8 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 6 May 2021 16:33:41 +0200
Subject: [PATCH 02/53] number of clusters in parameters / test on datasets

---
 pyske/examples/list/k_means.py      | 11 ++---------
 pyske/examples/list/k_means_main.py | 27 +++++++++++++++++++++++----
 pyske/examples/list/util.py         | 11 ++++++++---
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 69dbdbc..d500fbb 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -100,22 +100,15 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
     :param n_cluster: number of cluster
     :param max_iter: number of iteration
 
-    :return: a list of class
+    :return: 2 dimension list of points
     """
     centroids = k_means_init(input_list, n_cluster)
     j = 0
     while j < max_iter:
         clusters = make_clusters(input_list, centroids)
-        plt.scatter([point.x for point in input_list.to_seq()], [point.y for point in input_list.to_seq()],
-                    c='yellow')
-        clusters_color = ['green', 'blue', 'black', 'purple', 'brown']
-        for i in range(len(clusters)):
-            plt.scatter([point.x for point in clusters[i]], [point.y for point in clusters[i]],
-                        c=clusters_color[i])
 
         centroids = define_centroids(clusters)
-        plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
-        plt.show()
+        # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
         j = j + 1
 
     return clusters
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index f2b8c0c..0367361 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -6,12 +6,28 @@
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means
 from pyske.examples.list import util
+import matplotlib.pyplot as plt
+import argparse
 
+PAR = 'parallel'
+SEQ = 'sequential'
 
 if __name__ == '__main__':
-    size, num_iter, choice = util.standard_parse_command_line()
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000)
+    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
+    parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
+    parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
+
+    args = parser.parse_args()
+    size = args.size
+    num_iter = args.iter
+    choice = args.data
+    clusters = args.clusters
+
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_list(pyske_list_class, size)
+    input_list = util.rand_point_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
@@ -19,7 +35,10 @@
     gc.disable()
     for iteration in range(1, 1 + num_iter):
         timing.start()
-        result = example(input_list, 5)
+        result = example(input_list, clusters)
         timing.stop()
         gc.collect()
-        util.print_experiment(result, timing.get(), execute, iteration)
\ No newline at end of file
+        util.print_experiment("", timing.get(), execute, iteration)
+        for i in range(len(result)):
+            plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
+        plt.show()
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 26dbb69..965067f 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -2,6 +2,8 @@
 Utility functions for PySke examples
 """
 
+from sklearn.datasets import make_blobs
+
 PAR = 'parallel'
 SEQ = 'sequential'
 
@@ -89,18 +91,21 @@ def rand_list(cls, size):
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
 
-def rand_point_list(cls, size):
+def rand_point_list(cls, size, clusters):
     """
     Return a randomly generated list of points.
 
     :param cls: the class of the generated list.
     :param size: a positive number
         Precondition: size >= 0
+    :param clusters: number of clusters
     :return: a list of the given class
     """
     from pyske.core.util.point import Point
-    import random
-    return cls.init(lambda _: Point(random.randint(0, size), random.randint(0, size)), size)
+    x, y_true = make_blobs(n_samples=size, centers=clusters)
+    x = x.tolist()
+    x = list(map(lambda y: Point(y[0], y[1]), x))
+    return cls.from_seq(x)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 8b89af61f688b9b756ccfc0c29747c933ce106e3 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 11 May 2021 09:55:38 +0200
Subject: [PATCH 03/53] radon cc in k_means_init / pylinting

---
 pyske/examples/list/k_means.py      | 29 +++++++++++++++++++----------
 pyske/examples/list/k_means_main.py |  6 +++---
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index d500fbb..4b9ed26 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -1,11 +1,9 @@
 """
 K-Means
 """
-
+import random
 from pyske.core.interface import List
 from pyske.core.list import SList
-import random
-import matplotlib.pyplot as plt
 from pyske.core.util.point import Point
 
 
@@ -65,6 +63,17 @@ def define_centroids(clusters):
         centroids.append(get_new_centroid(cluster))
     return centroids
 
+def index_max_value(input_list: List):
+    """
+    Return the index of the maximum value
+    """
+    index_max = 0
+    max_dist = 0
+    for i in range(len(input_list.to_seq())):
+        if input_list.to_seq()[i] > max_dist:
+            max_dist = input_list.to_seq()[i]
+            index_max = i
+    return index_max
 
 def k_means_init(input_list: List, n_cluster: int):
     """
@@ -79,14 +88,14 @@ def k_means_init(input_list: List, n_cluster: int):
     c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
     centroids.append(c1)
 
-    for c in range(n_cluster - 1):
+    for _ in range(n_cluster - 1):
         dist = input_list.map(lambda x: x.distance(centroids[0]))
         for i in range(1, len(centroids)):
-            temp_dist = input_list.map(lambda x: x.distance(centroids[i]))
-            dist = dist.map2(lambda x, y: min(x, y), temp_dist)
+            temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index]))
+            dist = dist.map2(lambda x, y: y if y < x else x, temp_dist)
 
-        index_max = [i for i, x in enumerate(dist.to_seq()) if x == max(dist.to_seq())]
-        next_centroid = input_list.to_seq()[index_max[0]]
+        index_max = index_max_value(dist)
+        next_centroid = input_list.to_seq()[index_max]
         centroids.append(next_centroid)
 
     return centroids
@@ -100,13 +109,13 @@ def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
     :param n_cluster: number of cluster
     :param max_iter: number of iteration
 
-    :return: 2 dimension list of points
+    :return: 2 dimensions list of points
     """
+
     centroids = k_means_init(input_list, n_cluster)
     j = 0
     while j < max_iter:
         clusters = make_clusters(input_list, centroids)
-
         centroids = define_centroids(clusters)
         # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
         j = j + 1
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 0367361..f08a918 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -2,12 +2,12 @@
 Execution of k_means
 """
 import gc
+import argparse
+import matplotlib.pyplot as plt
 
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means
 from pyske.examples.list import util
-import matplotlib.pyplot as plt
-import argparse
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -15,7 +15,7 @@
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000_000)
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
     parser.add_argument("--iter", help="number of iterations", type=int, default=30)
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)

From 4c7d8727d5ea23062681d352e0dc49987c64bf5a Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 25 May 2021 15:47:05 +0200
Subject: [PATCH 04/53] Default constructor, addition between two points,
 multiplication by scalar and by a point

---
 pyske/core/util/point.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py
index 0a5d2fc..cb55684 100644
--- a/pyske/core/util/point.py
+++ b/pyske/core/util/point.py
@@ -8,7 +8,7 @@
 class Point(object):
     """A class to represent a point"""
 
-    def __init__(self, x, y):
+    def __init__(self, x=0, y=0):
         self.__x = x
         self.__y = y
 
@@ -20,6 +20,38 @@ def __eq__(self, other):
             return self.__x == other.x and self.__y == other.__y
         return False
 
+    def __add__(self, other):
+        """
+        Addition of two points
+
+        Examples::
+
+            >>> p1 = Point(5,5)
+            >>> p2 = Point(5,7)
+            >>> p1 + p2
+            (10, 12)
+        """
+        if isinstance(other, Point):
+            return Point(self.x + other.x, self.y + other.y)
+
+    def __mul__(self, other):
+        """
+        Multiplication by a point or a scalar
+
+        Examples::
+
+            >>> p1 = Point(5,5)
+            >>> p2 = Point(5,7)
+            >>> p1 * 5
+            (25, 25)
+            >>> p1 * p2
+            (25, 35)
+        """
+        if isinstance(other, Point):
+            return Point(self.x * other.x, self.y * other.y)
+        if isinstance(other, int) or isinstance(other, float):
+            return Point(self.x * other, self.y * other)
+
     @property
     def x(self):
         """X getter"""

From ee146229998184612db64bc0cb01ae1160fe0b80 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 25 May 2021 16:13:40 +0200
Subject: [PATCH 05/53] parrallel optimization in k_means_init

---
 pyske/examples/list/k_means.py | 41 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 4b9ed26..d7ab21f 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -2,20 +2,21 @@
 K-Means
 """
 import random
+from typing import Callable, Tuple
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point import Point
 
 
-def cluster_index(p, centroids):
+def cluster_index(point, centroids):
     """
     Get the centroid index of the closest centroid
     """
     min_dist = float("inf")
     p_centroid = centroids[0]
     for c in centroids:
-        if p.distance(c) < min_dist:
-            min_dist = p.distance(c)
+        if point.distance(c) < min_dist:
+            min_dist = point.distance(c)
             p_centroid = c
     return centroids.index(p_centroid)
 
@@ -54,7 +55,7 @@ def get_new_centroid(cluster):
     return new_centroid
 
 
-def define_centroids(clusters):
+def define_centroids(clusters):  # Pas utile car tuple ( num_cluster, point )
     """
     Redefine centroids of clusters
     """
@@ -63,17 +64,16 @@ def define_centroids(clusters):
         centroids.append(get_new_centroid(cluster))
     return centroids
 
-def index_max_value(input_list: List):
+
+def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
     """
-    Return the index of the maximum value
+    Return the tuple with the maximum distance
     """
-    index_max = 0
-    max_dist = 0
-    for i in range(len(input_list.to_seq())):
-        if input_list.to_seq()[i] > max_dist:
-            max_dist = input_list.to_seq()[i]
-            index_max = i
-    return index_max
+    if pair_a[1] > pair_b[1]:
+        return pair_a
+    else:
+        return pair_b
+
 
 def k_means_init(input_list: List, n_cluster: int):
     """
@@ -94,30 +94,31 @@ def k_means_init(input_list: List, n_cluster: int):
             temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index]))
             dist = dist.map2(lambda x, y: y if y < x else x, temp_dist)
 
-        index_max = index_max_value(dist)
-        next_centroid = input_list.to_seq()[index_max]
+        zip_list = input_list.zip(dist)
+        next_centroid = zip_list.reduce(max_dist)[0]
         centroids.append(next_centroid)
 
     return centroids
 
 
-def k_means(input_list: List, n_cluster: int, max_iter: int = 10):
+def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10):
     """
     K-means algorithm on a list of point
 
     :param input_list: a list of point
     :param n_cluster: number of cluster
     :param max_iter: number of iteration
+    :param init_function: a function that initialize centroids
 
     :return: 2 dimensions list of points
     """
 
-    centroids = k_means_init(input_list, n_cluster)
+    centroids = init_function(input_list, n_cluster)
     j = 0
     while j < max_iter:
-        clusters = make_clusters(input_list, centroids)
-        centroids = define_centroids(clusters)
-        # plt.scatter([point.x for point in centroids], [point.y for point in centroids], c='red')
+        clusters = make_clusters(input_list, centroids)  # assign_cluster
+        centroids = define_centroids(clusters)  # update_centroids
         j = j + 1
 
     return clusters

From 057457c98096184e89eac1af93d6ec433eba9428 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 26 May 2021 17:04:36 +0200
Subject: [PATCH 06/53] fix: init instead of from_seq

---
 pyske/examples/list/util.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 965067f..d6517fa 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -102,10 +102,11 @@ def rand_point_list(cls, size, clusters):
     :return: a list of the given class
     """
     from pyske.core.util.point import Point
+    print(clusters)
     x, y_true = make_blobs(n_samples=size, centers=clusters)
     x = x.tolist()
     x = list(map(lambda y: Point(y[0], y[1]), x))
-    return cls.from_seq(x)
+    return cls.init(lambda i: x[i], size)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 87000f9a8ec99b89a26c528e8ba3184838591f38 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 26 May 2021 17:05:11 +0200
Subject: [PATCH 07/53] Division of a point

---
 pyske/core/util/point.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyske/core/util/point.py b/pyske/core/util/point.py
index cb55684..b4c43f0 100644
--- a/pyske/core/util/point.py
+++ b/pyske/core/util/point.py
@@ -52,6 +52,10 @@ def __mul__(self, other):
         if isinstance(other, int) or isinstance(other, float):
             return Point(self.x * other, self.y * other)
 
+    def __truediv__(self, other):
+        if isinstance(other, int):
+            return Point(self.x / other, self.y / other)
+
     @property
     def x(self):
         """X getter"""

From 0d9b023430548a3f91a804dd060c465f686e68df Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 26 May 2021 17:06:15 +0200
Subject: [PATCH 08/53] parallel optimization, assign_cluster and
 update_cluster

---
 pyske/examples/list/k_means.py      | 62 +++++++++++------------------
 pyske/examples/list/k_means_main.py | 15 ++++---
 2 files changed, 30 insertions(+), 47 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index d7ab21f..89cf643 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -18,51 +18,32 @@ def cluster_index(point, centroids):
         if point.distance(c) < min_dist:
             min_dist = point.distance(c)
             p_centroid = c
-    return centroids.index(p_centroid)
+    return point, centroids.index(p_centroid)
 
 
-def make_clusters(input_list, centroids):
+def assign_clusters(input_list, centroids):
     """
-    Append all points to the cluster with the minimal distance from its centroid
+    Assign to each point to a cluster
     """
-    clusters = [[] for c in centroids]
-    for p in input_list.to_seq():
-        index = cluster_index(p, centroids)
-        clusters[index].append(p)
-    return clusters
-
 
-def coords_average(cluster):
-    """
-    Get the coordinates average of all points in one cluster
-    """
-    x_average = sum([p.x for p in cluster]) / len(cluster)
-    y_average = sum([p.y for p in cluster]) / len(cluster)
-    return Point(x_average, y_average)
+    return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def get_new_centroid(cluster):
+def update_centroids(clusters, centroids):
     """
-    Get closest point to average of point coordinates
+    Update centroids of clusters
     """
-    average_point = coords_average(cluster)
-    min_dist = float("inf")
-    new_centroid = cluster[0]
-    for p in cluster:
-        if p.distance(average_point) < min_dist:
-            min_dist = p.distance(average_point)
-            new_centroid = p
-    return new_centroid
-
-
-def define_centroids(clusters):  # Pas utile car tuple ( num_cluster, point )
-    """
-    Redefine centroids of clusters
-    """
-    centroids = []
-    for cluster in clusters:
-        centroids.append(get_new_centroid(cluster))
-    return centroids
+    new_centroids = SList([])
+    i = 0
+    while i < len(centroids):
+        cluster = clusters.filter(lambda x: x[1] == i)
+        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y)
+        average_point = sum_cluster / cluster.length()
+        centroid = clusters.reduce(
+            lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0]
+        new_centroids.append(centroid)
+        i += 1
+    return new_centroids
 
 
 def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
@@ -113,12 +94,15 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus
 
     :return: 2 dimensions list of points
     """
-
     centroids = init_function(input_list, n_cluster)
+
     j = 0
+
     while j < max_iter:
-        clusters = make_clusters(input_list, centroids)  # assign_cluster
-        centroids = define_centroids(clusters)  # update_centroids
+        clusters = assign_clusters(input_list, centroids)
+
+        centroids = update_centroids(clusters, centroids)
+
         j = j + 1
 
     return clusters
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index f08a918..0a3b171 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -1,13 +1,13 @@
 """
 Execution of k_means
 """
-import gc
 import argparse
 import matplotlib.pyplot as plt
 
 from pyske.core import Timing
-from pyske.examples.list.k_means import k_means
+from pyske.examples.list.k_means import k_means, k_means_init
 from pyske.examples.list import util
+from pyske.core.support import parallel
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -32,13 +32,12 @@
     execute = util.select_execute(choice)
     example = k_means
     execute(lambda: print('Version:\t', choice))
-    gc.disable()
     for iteration in range(1, 1 + num_iter):
         timing.start()
-        result = example(input_list, clusters)
+        result = example(input_list, k_means_init, clusters)
         timing.stop()
-        gc.collect()
         util.print_experiment("", timing.get(), execute, iteration)
-        for i in range(len(result)):
-            plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
-        plt.show()
+        #if parallel.PID == 0:
+        #    for i in range((len(result))):
+        #        plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
+        #    plt.show()

From da4a4d6a5a62b4de6e5434b72c9862e89be67d4d Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 27 May 2021 14:34:55 +0200
Subject: [PATCH 09/53] pylinting, typing

---
 pyske/examples/list/k_means.py | 39 ++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 89cf643..f5da8a0 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -3,33 +3,33 @@
 """
 import random
 from typing import Callable, Tuple
+
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point import Point
 
 
-def cluster_index(point, centroids):
+def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]:
     """
     Get the centroid index of the closest centroid
     """
     min_dist = float("inf")
     p_centroid = centroids[0]
-    for c in centroids:
-        if point.distance(c) < min_dist:
-            min_dist = point.distance(c)
-            p_centroid = c
+    for centroid in centroids:
+        if point.distance(centroid) < min_dist:
+            min_dist = point.distance(centroid)
+            p_centroid = centroid
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list, centroids):
+def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]:
     """
-    Assign to each point to a cluster
+    Assign each point to a cluster
     """
-
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters, centroids):
+def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]):
     """
     Update centroids of clusters
     """
@@ -52,11 +52,10 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
     """
     if pair_a[1] > pair_b[1]:
         return pair_a
-    else:
-        return pair_b
+    return pair_b
 
 
-def k_means_init(input_list: List, n_cluster: int):
+def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]:
     """
     K-means++ initialisation
 
@@ -66,8 +65,8 @@ def k_means_init(input_list: List, n_cluster: int):
     :return: n_cluster centroids
     """
     centroids = SList([])
-    c1 = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
-    centroids.append(c1)
+    first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
+    centroids.append(first_centroid)
 
     for _ in range(n_cluster - 1):
         dist = input_list.map(lambda x: x.distance(centroids[0]))
@@ -82,8 +81,8 @@ def k_means_init(input_list: List, n_cluster: int):
     return centroids
 
 
-def k_means(input_list: List, init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10):
+def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10) -> SList[SList[Point]]:
     """
     K-means algorithm on a list of point
 
@@ -97,7 +96,6 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus
     centroids = init_function(input_list, n_cluster)
 
     j = 0
-
     while j < max_iter:
         clusters = assign_clusters(input_list, centroids)
 
@@ -105,4 +103,9 @@ def k_means(input_list: List, init_function: Callable[[List, int], List], n_clus
 
         j = j + 1
 
-    return clusters
+    clusters2d = SList([])
+    for i in range(len(centroids)):
+        clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster)
+                          .map(lambda x: x[0]).to_seq()
+                          )
+    return clusters2d

From 9f1e0fcead895f6aa292a0e497f56a99506e2306 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 27 May 2021 14:39:37 +0200
Subject: [PATCH 10/53] FIX: bad list initialization parallel list

---
 pyske/examples/list/util.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index d6517fa..703dfbb 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -102,11 +102,13 @@ def rand_point_list(cls, size, clusters):
     :return: a list of the given class
     """
     from pyske.core.util.point import Point
-    print(clusters)
-    x, y_true = make_blobs(n_samples=size, centers=clusters)
+    from pyske.core import Distribution
+
+    x, _ = make_blobs(n_samples=size, centers=clusters)
     x = x.tolist()
     x = list(map(lambda y: Point(y[0], y[1]), x))
-    return cls.init(lambda i: x[i], size)
+    distr = Distribution().balanced(size)
+    return cls.from_seq(x).distribute(distr)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From a8d0385a486216141b46fb267616ca56fb1d4816 Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Thu, 27 May 2021 20:39:52 +0200
Subject: [PATCH 11/53] Changing sample type from custom type "Point" to Tuple

---
 pyske/examples/list/k_means.py      | 43 ++++++++++++++++++++++-------
 pyske/examples/list/k_means_main.py |  3 +-
 pyske/examples/list/util.py         | 17 ++++++++++++
 3 files changed, 52 insertions(+), 11 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 89cf643..dbddc8e 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -1,24 +1,47 @@
 """
 K-Means
 """
+import operator
 import random
+from math import sqrt
 from typing import Callable, Tuple
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point import Point
 
 
-def cluster_index(point, centroids):
+def distance2D(sample_1, sample_2):
+    """
+            return distance between 2d sample.
+
+            Examples::
+
+                >>> from pyske.core.util.point import Point
+                >>> p1 = Point(5,5)
+                >>> p2 = Point(5,7)
+                >>> p1.distance(p2)
+                2.0
+
+            :param other: a point
+            :return: distance from other point
+
+            """
+    dx = sample_1[0] - sample_2[0]
+    dy = sample_1[1] - sample_2[1]
+    return sqrt(dx ** 2 + dy ** 2)
+
+
+def cluster_index(sample, centroids):
     """
     Get the centroid index of the closest centroid
     """
     min_dist = float("inf")
     p_centroid = centroids[0]
     for c in centroids:
-        if point.distance(c) < min_dist:
-            min_dist = point.distance(c)
+        if distance2D(sample, c) < min_dist:
+            min_dist = distance2D(sample, c)
             p_centroid = c
-    return point, centroids.index(p_centroid)
+    return sample, centroids.index(p_centroid)
 
 
 def assign_clusters(input_list, centroids):
@@ -37,16 +60,16 @@ def update_centroids(clusters, centroids):
     i = 0
     while i < len(centroids):
         cluster = clusters.filter(lambda x: x[1] == i)
-        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y)
-        average_point = sum_cluster / cluster.length()
+        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda a, b: tuple(map(operator.add, a, b)))
+        average_point = [x/clusters.length() for x in sum_cluster]
         centroid = clusters.reduce(
-            lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0]
+            lambda x, y: x if distance2D(average_point, x[0]) < distance2D(average_point, y[0]) else y)[0]
         new_centroids.append(centroid)
         i += 1
     return new_centroids
 
 
-def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
+def max_dist(pair_a, pair_b):
     """
     Return the tuple with the maximum distance
     """
@@ -70,9 +93,9 @@ def k_means_init(input_list: List, n_cluster: int):
     centroids.append(c1)
 
     for _ in range(n_cluster - 1):
-        dist = input_list.map(lambda x: x.distance(centroids[0]))
+        dist = input_list.map(lambda sample: distance2D(sample, centroids[0]))
         for i in range(1, len(centroids)):
-            temp_dist = input_list.map(lambda x, index=i: x.distance(centroids[index]))
+            temp_dist = input_list.map(lambda sample, index=i: distance2D(sample, centroids[index]))
             dist = dist.map2(lambda x, y: y if y < x else x, temp_dist)
 
         zip_list = input_list.zip(dist)
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 0a3b171..65febba 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,7 +27,8 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_list(pyske_list_class, size, clusters)
+    #input_list = util.rand_point_list(pyske_list_class, size, clusters)
+    input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index d6517fa..2a7327b 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -109,6 +109,23 @@ def rand_point_list(cls, size, clusters):
     return cls.init(lambda i: x[i], size)
 
 
+def rand_2D_sample_list(cls, size , clusters):
+    """
+    Return a randomly generated list of 2D sample.
+
+    :param cls: the class of the generated list.
+    :param size: a positive number
+        Precondition: size >= 0
+    :param clusters: number of clusters
+    :return: a list of the given class
+    """
+    print(clusters)
+    x, y_true = make_blobs(n_samples=size, centers=clusters)
+    x = x.tolist()
+    x = list(map(lambda y: (y[0], y[1]), x))
+    return cls.init(lambda i: x[i], size)
+
+
 def print_experiment(result, timing, execute, iteration=None):
     """
     Print the result and timing of the experiment.

From c617aade25a15c196258ad3464930c6ebf67b37d Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:16:39 +0200
Subject: [PATCH 12/53] Add point_interface and changing the class Point to
 Point_2D

---
 pyske/core/util/{point.py => point_2D.py} | 35 ++++++++++++-----------
 pyske/core/util/point_Interface.py        | 25 ++++++++++++++++
 pyske/examples/list/k_means.py            | 16 +++++------
 pyske/examples/list/util.py               |  4 +--
 4 files changed, 53 insertions(+), 27 deletions(-)
 rename pyske/core/util/{point.py => point_2D.py} (61%)
 create mode 100644 pyske/core/util/point_Interface.py

diff --git a/pyske/core/util/point.py b/pyske/core/util/point_2D.py
similarity index 61%
rename from pyske/core/util/point.py
rename to pyske/core/util/point_2D.py
index b4c43f0..6a0dfd5 100644
--- a/pyske/core/util/point.py
+++ b/pyske/core/util/point_2D.py
@@ -3,10 +3,11 @@
 """
 
 from math import sqrt
+from pyske.core.util.point_Interface import Point_Interface
 
 
-class Point(object):
-    """A class to represent a point"""
+class Point_2D(Point_Interface):
+    """A class to represent a 2D point"""
 
     def __init__(self, x=0, y=0):
         self.__x = x
@@ -16,7 +17,7 @@ def __repr__(self):
         return "(%s, %s)" % (self.__x, self.__y)
 
     def __eq__(self, other):
-        if isinstance(other, Point):
+        if isinstance(other, Point_2D):
             return self.__x == other.x and self.__y == other.__y
         return False
 
@@ -26,13 +27,13 @@ def __add__(self, other):
 
         Examples::
 
-            >>> p1 = Point(5,5)
-            >>> p2 = Point(5,7)
+            >>> p1 = Point_2D(5,5)
+            >>> p2 = Point_2D(5,7)
             >>> p1 + p2
             (10, 12)
         """
-        if isinstance(other, Point):
-            return Point(self.x + other.x, self.y + other.y)
+        if isinstance(other, Point_2D):
+            return Point_2D(self.x + other.x, self.y + other.y)
 
     def __mul__(self, other):
         """
@@ -40,21 +41,21 @@ def __mul__(self, other):
 
         Examples::
 
-            >>> p1 = Point(5,5)
-            >>> p2 = Point(5,7)
+            >>> p1 = Point_2D(5,5)
+            >>> p2 = Point_2D(5,7)
             >>> p1 * 5
             (25, 25)
             >>> p1 * p2
             (25, 35)
         """
-        if isinstance(other, Point):
-            return Point(self.x * other.x, self.y * other.y)
+        if isinstance(other, Point_2D):
+            return Point_2D(self.x * other.x, self.y * other.y)
         if isinstance(other, int) or isinstance(other, float):
-            return Point(self.x * other, self.y * other)
+            return Point_2D(self.x * other, self.y * other)
 
     def __truediv__(self, other):
         if isinstance(other, int):
-            return Point(self.x / other, self.y / other)
+            return Point_2D(self.x / other, self.y / other)
 
     @property
     def x(self):
@@ -66,15 +67,15 @@ def y(self):
         """Y getter"""
         return self.__y
 
-    def distance(self, other: 'Point'):
+    def distance(self, other: 'Point_2D'):
         """
         Returns the distance from another point.
 
         Examples::
 
-            >>> from pyske.core.util.point import Point
-            >>> p1 = Point(5,5)
-            >>> p2 = Point(5,7)
+            >>> from pyske.core.util.point_2D import Point_2D
+            >>> p1 = Point_2D(5,5)
+            >>> p2 = Point_2D(5,7)
             >>> p1.distance(p2)
             2.0
 
diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py
new file mode 100644
index 0000000..aa3be5d
--- /dev/null
+++ b/pyske/core/util/point_Interface.py
@@ -0,0 +1,25 @@
+"""
+A module to represent a point
+"""
+
+
+class Point_Interface:
+    """Point interface to represent point of n dimensions"""
+
+    def __repr__(self):
+        pass
+
+    def __eq__(self, other):
+        pass
+
+    def __add__(self, other):
+        pass
+
+    def __mul__(self, other):
+        pass
+
+    def __truediv__(self, other):
+        pass
+
+    def distance(self, other):
+        pass
diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index f5da8a0..42af105 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -6,10 +6,10 @@
 
 from pyske.core.interface import List
 from pyske.core.list import SList
-from pyske.core.util.point import Point
+from pyske.core.util.point_2D import Point_2D
 
 
-def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]:
+def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]:
     """
     Get the centroid index of the closest centroid
     """
@@ -22,14 +22,14 @@ def cluster_index(point: Point, centroids: SList[Point]) -> Tuple[Point, int]:
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tuple[Point, int]]:
+def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]:
     """
     Assign each point to a cluster
     """
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]):
+def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]):
     """
     Update centroids of clusters
     """
@@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]
     return new_centroids
 
 
-def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
+def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]):
     """
     Return the tuple with the maximum distance
     """
@@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point, float], pair_b: Tuple[Point, float]):
     return pair_b
 
 
-def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]:
+def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]:
     """
     K-means++ initialisation
 
@@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point], n_cluster: int) -> SList[Point]:
     return centroids
 
 
-def k_means(input_list: List[Point], init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10) -> SList[SList[Point]]:
+def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10) -> SList[SList[Point_2D]]:
     """
     K-means algorithm on a list of point
 
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 703dfbb..e9e9e3c 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -101,12 +101,12 @@ def rand_point_list(cls, size, clusters):
     :param clusters: number of clusters
     :return: a list of the given class
     """
-    from pyske.core.util.point import Point
+    from pyske.core.util.point_2D import Point_2D
     from pyske.core import Distribution
 
     x, _ = make_blobs(n_samples=size, centers=clusters)
     x = x.tolist()
-    x = list(map(lambda y: Point(y[0], y[1]), x))
+    x = list(map(lambda y: Point_2D(y[0], y[1]), x))
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 

From ff39d0b502325d502a00fb9dbd26b2c33bbfdf82 Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:22:12 +0200
Subject: [PATCH 13/53] FIX: input_list type form Tuple to Point_2D

---
 pyske/examples/list/k_means_main.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 65febba..0a3b171 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,8 +27,7 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    #input_list = util.rand_point_list(pyske_list_class, size, clusters)
-    input_list = util.rand_2D_sample_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means

From 32f557ec6cb0fa29af29e5e097b620498d8448ee Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:38:04 +0200
Subject: [PATCH 14/53] Add class point_3D.py

---
 pyske/core/util/point_2D.py         |  4 +-
 pyske/core/util/point_3D.py         | 79 +++++++++++++++++++++++++++++
 pyske/examples/list/k_means_main.py |  2 +-
 pyske/examples/list/util.py         | 26 ++++------
 4 files changed, 92 insertions(+), 19 deletions(-)
 create mode 100644 pyske/core/util/point_3D.py

diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py
index 6a0dfd5..f6f5f7f 100644
--- a/pyske/core/util/point_2D.py
+++ b/pyske/core/util/point_2D.py
@@ -1,5 +1,5 @@
 """
-A module to represent a point
+A module to represent a 2D point
 """
 
 from math import sqrt
@@ -18,7 +18,7 @@ def __repr__(self):
 
     def __eq__(self, other):
         if isinstance(other, Point_2D):
-            return self.__x == other.x and self.__y == other.__y
+            return self.__x == other.__x and self.__y == other.__y
         return False
 
     def __add__(self, other):
diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py
new file mode 100644
index 0000000..9cde20f
--- /dev/null
+++ b/pyske/core/util/point_3D.py
@@ -0,0 +1,79 @@
+"""
+A module to represent a 3D point
+"""
+
+from math import sqrt
+from pyske.core.util.point_Interface import Point_Interface
+
+
+class Point_3D(Point_Interface):
+    """A class to represent a 3D point"""
+
+    def __init__(self, x=0, y=0, z=0):
+        self.__x = x
+        self.__y = y
+        self.__z = z
+
+    def __repr__(self):
+        return "(%s, %s, %s)" % (self.__x, self.__y, self.__z)
+
+    def __eq__(self, other):
+        if isinstance(other, Point_3D):
+            return self.__x == other.__x and self.__y == other.__y and self.__z == other.__z
+        return False
+
+    def __add__(self, other):
+        """
+        Addition of two points
+
+        Examples::
+
+            >>> p1 = Point_3D(5,5,2)
+            >>> p2 = Point_3D(5,7,1)
+            >>> p1 + p2
+            (10, 12, 3)
+        """
+        if isinstance(other, Point_3D):
+            return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z)
+
+    def __mul__(self, other):
+        pass
+
+    def __truediv__(self, other):
+        pass
+
+    @property
+    def x(self):
+        """X getter"""
+        return self.__x
+
+    @property
+    def y(self):
+        """Y getter"""
+        return self.__y
+
+    @property
+    def z(self):
+        """Z getter"""
+        return self.z
+
+    def distance(self, other):
+        """
+        Returns the distance from another 3D point.
+
+        Examples::
+
+            >>> from pyske.core.util.point_2D import Point_2D
+            >>> p1 = Point_3D(5,5,2)
+            >>> p2 = Point_3D(5,7,1)
+            >>> p1.distance(p2)
+            2.24
+
+        :param other: a point
+        :return: distance from other point
+
+        """
+        dx = self.__x - other.x
+        dy = self.__y - other.y
+        dz = self.__x - other.z
+        return sqrt(dx ** 2 + dy ** 2 + dz ** 2)
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 0a3b171..b6b20fa 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,7 +27,7 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index bdb68a7..648fe9d 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -91,9 +91,9 @@ def rand_list(cls, size):
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
 
-def rand_point_list(cls, size, clusters):
+def rand_point_2D_list(cls, size, clusters):
     """
-    Return a randomly generated list of points.
+    Return a randomly generated list of 2D points.
 
     :param cls: the class of the generated list.
     :param size: a positive number
@@ -110,22 +110,16 @@ def rand_point_list(cls, size, clusters):
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 
-
-def rand_2D_sample_list(cls, size , clusters):
+def rand_point_3D_list(cls, size, clusters):
     """
-    Return a randomly generated list of 2D sample.
+        Return a randomly generated list of 3D points.
 
-    :param cls: the class of the generated list.
-    :param size: a positive number
-        Precondition: size >= 0
-    :param clusters: number of clusters
-    :return: a list of the given class
-    """
-    print(clusters)
-    x, y_true = make_blobs(n_samples=size, centers=clusters)
-    x = x.tolist()
-    x = list(map(lambda y: (y[0], y[1]), x))
-    return cls.init(lambda i: x[i], size)
+        :param cls: the class of the generated list.
+        :param size: a positive number
+            Precondition: size >= 0
+        :param clusters: number of clusters
+        :return: a list of the given class
+        """
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 7e6966d8534c82393966b83456aa3974822c4c61 Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 14:42:05 +0200
Subject: [PATCH 15/53] rand_point_2D_list / rand_point_3D_list

---
 pyske/examples/list/util.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 648fe9d..8124598 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -110,16 +110,25 @@ def rand_point_2D_list(cls, size, clusters):
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 
+
 def rand_point_3D_list(cls, size, clusters):
     """
-        Return a randomly generated list of 3D points.
+    Return a randomly generated list of 2D points.
+
+    :param cls: the class of the generated list.
+    :param size: a positive number
+        Precondition: size >= 0
+    :param clusters: number of clusters
+    :return: a list of the given class
+    """
+    from pyske.core.util.point_3D import Point_3D
+    from pyske.core import Distribution
 
-        :param cls: the class of the generated list.
-        :param size: a positive number
-            Precondition: size >= 0
-        :param clusters: number of clusters
-        :return: a list of the given class
-        """
+    x, _ = make_blobs(n_samples=size, centers=clusters)
+    x = x.tolist()
+    x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x))
+    distr = Distribution().balanced(size)
+    return cls.from_seq(x).distribute(distr)
 
 
 def print_experiment(result, timing, execute, iteration=None):

From 9b147eba58a6549ee59202f2ad5060918133674c Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 15:30:48 +0200
Subject: [PATCH 16/53] Point_3D update

---
 pyske/core/util/point_3D.py         | 22 +++++++++++++++++++---
 pyske/examples/list/k_means_main.py |  7 +++++--
 pyske/examples/list/util.py         |  2 +-
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py
index 9cde20f..c6e1aa5 100644
--- a/pyske/core/util/point_3D.py
+++ b/pyske/core/util/point_3D.py
@@ -37,10 +37,26 @@ def __add__(self, other):
             return Point_3D(self.x + other.x, self.y + other.y, self.z + other.z)
 
     def __mul__(self, other):
-        pass
+        """
+        Multiplication by a point or a scalar
+
+        Examples::
+
+            >>> p1 = Point_3D(5,5,2)
+            >>> p2 = Point_3D(5,7,1)
+            >>> p1 * 5
+            (25, 25, 10)
+            >>> p1 * p2
+            (25, 35, 2)
+        """
+        if isinstance(other, Point_3D):
+            return Point_3D(self.x * other.x, self.y * other.y, self.z * other.z)
+        if isinstance(other, int) or isinstance(other, float):
+            return Point_3D(self.x * other, self.y * other, self.z * other.z)
 
     def __truediv__(self, other):
-        pass
+        if isinstance(other, int):
+            return Point_3D(self.x / other, self.y / other, self.z / other)
 
     @property
     def x(self):
@@ -55,7 +71,7 @@ def y(self):
     @property
     def z(self):
         """Z getter"""
-        return self.z
+        return self.__z
 
     def distance(self, other):
         """
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index b6b20fa..29eb81d 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -27,7 +27,10 @@
     clusters = args.clusters
 
     pyske_list_class = util.select_pyske_list(choice)
-    input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
+
+    # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_3D_list(pyske_list_class, size, clusters)
+
     timing = Timing()
     execute = util.select_execute(choice)
     example = k_means
@@ -37,7 +40,7 @@
         result = example(input_list, k_means_init, clusters)
         timing.stop()
         util.print_experiment("", timing.get(), execute, iteration)
-        #if parallel.PID == 0:
+        # if parallel.PID == 0:
         #    for i in range((len(result))):
         #        plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
         #    plt.show()
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 8124598..5279386 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -124,7 +124,7 @@ def rand_point_3D_list(cls, size, clusters):
     from pyske.core.util.point_3D import Point_3D
     from pyske.core import Distribution
 
-    x, _ = make_blobs(n_samples=size, centers=clusters)
+    x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3)
     x = x.tolist()
     x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x))
     distr = Distribution().balanced(size)

From 5b89f49ec00fb655c9dd5794c5cd75a731b7644f Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Fri, 28 May 2021 15:43:55 +0200
Subject: [PATCH 17/53] Typing Point_2D -> Point_Interface

---
 pyske/examples/list/k_means.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 42af105..52b4e2b 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -6,10 +6,10 @@
 
 from pyske.core.interface import List
 from pyske.core.list import SList
-from pyske.core.util.point_2D import Point_2D
+from pyske.core.util.point_Interface import Point_Interface
 
 
-def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D, int]:
+def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]:
     """
     Get the centroid index of the closest centroid
     """
@@ -22,14 +22,14 @@ def cluster_index(point: Point_2D, centroids: SList[Point_2D]) -> Tuple[Point_2D
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list: List[Point_2D], centroids: SList[Point_2D]) -> List[Tuple[Point_2D, int]]:
+def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]:
     """
     Assign each point to a cluster
     """
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Point_2D]):
+def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]):
     """
     Update centroids of clusters
     """
@@ -46,7 +46,7 @@ def update_centroids(clusters: List[Tuple[Point_2D, int]], centroids: SList[Poin
     return new_centroids
 
 
-def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]):
+def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interface, float]):
     """
     Return the tuple with the maximum distance
     """
@@ -55,7 +55,7 @@ def max_dist(pair_a: Tuple[Point_2D, float], pair_b: Tuple[Point_2D, float]):
     return pair_b
 
 
-def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]:
+def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]:
     """
     K-means++ initialisation
 
@@ -81,8 +81,8 @@ def k_means_init(input_list: List[Point_2D], n_cluster: int) -> SList[Point_2D]:
     return centroids
 
 
-def k_means(input_list: List[Point_2D], init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10) -> SList[SList[Point_2D]]:
+def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int,
+            max_iter: int = 10) -> SList[SList[Point_Interface]]:
     """
     K-means algorithm on a list of point
 

From eb792d573bd6ccd067c78620e2236a0972c83fc7 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 28 May 2021 17:11:19 +0200
Subject: [PATCH 18/53] optimization update_centroids

---
 pyske/examples/list/k_means.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index f5da8a0..cc28b11 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -29,20 +29,18 @@ def assign_clusters(input_list: List[Point], centroids: SList[Point]) -> List[Tu
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point, int]],  centroids: SList[Point]):
+def update_centroids(clusters: List[Tuple[Point, int]], centroids: SList[Point]):
     """
     Update centroids of clusters
     """
-    new_centroids = SList([])
-    i = 0
-    while i < len(centroids):
-        cluster = clusters.filter(lambda x: x[1] == i)
-        sum_cluster = cluster.map(lambda x: x[0]).reduce(lambda x, y: x + y)
-        average_point = sum_cluster / cluster.length()
-        centroid = clusters.reduce(
-            lambda x, y: x if average_point.distance(x[0]) < average_point.distance(y[0]) else y)[0]
-        new_centroids.append(centroid)
-        i += 1
+
+    new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids))
+
+    new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1),
+        lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else (
+            z if y[1] != i else y)))
+    new_centroids = new_centroids.map(lambda x: x[0] / x[2])
+
     return new_centroids
 
 

From 467b33bf8134354324827be18c3c9a96d707935c Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 28 May 2021 17:37:05 +0200
Subject: [PATCH 19/53] refactoring because of new point implementation

---
 pyske/examples/list/k_means.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 7a47e0b..6d1ac4e 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -34,7 +34,7 @@ def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SLi
     Update centroids of clusters
     """
 
-    new_centroids = SList.init(lambda _: (Point(), _, _), len(centroids))
+    new_centroids = SList.init(lambda _: (_, _, _), len(centroids))
 
     new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1),
         lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else (

From 8c2cf82d97f2f2462b60b3b6502326dd72958f25 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 31 May 2021 11:16:25 +0200
Subject: [PATCH 20/53] use of parallelism random choice first centroid

---
 pyske/examples/list/k_means.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 6d1ac4e..1b224f8 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -7,6 +7,7 @@
 from pyske.core.interface import List
 from pyske.core.list import SList
 from pyske.core.util.point_Interface import Point_Interface
+from pyske.core.util.par import procs
 
 
 def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]:
@@ -63,7 +64,9 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi
     :return: n_cluster centroids
     """
     centroids = SList([])
-    first_centroid = input_list.to_seq()[random.randint(0, input_list.length() - 1)]
+    first_centroid = input_list.get_partition()\
+                               .map(lambda l: l[random.randint(0, l.length() - 1)])\
+                               .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])]
     centroids.append(first_centroid)
 
     for _ in range(n_cluster - 1):

From 27a5039d749c16598b4fb8aa8dbbfb5191cc1ddd Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 1 Jun 2021 17:16:47 +0200
Subject: [PATCH 21/53] add point dimensions in k-means-main's options

---
 pyske/examples/list/k_means_main.py |  6 ++--
 pyske/examples/list/util.py         | 45 ++++++++++++++---------------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 29eb81d..163f761 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -19,17 +19,17 @@
     parser.add_argument("--iter", help="number of iterations", type=int, default=30)
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
+    parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
 
     args = parser.parse_args()
     size = args.size
     num_iter = args.iter
     choice = args.data
     clusters = args.clusters
+    dimensions = args.dimensions
 
     pyske_list_class = util.select_pyske_list(choice)
-
-    # input_list = util.rand_point_2D_list(pyske_list_class, size, clusters)
-    input_list = util.rand_point_3D_list(pyske_list_class, size, clusters)
+    input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions)
 
     timing = Timing()
     execute = util.select_execute(choice)
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 5279386..ef79d51 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -3,6 +3,7 @@
 """
 
 from sklearn.datasets import make_blobs
+from pyske.core import Distribution
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -90,43 +91,39 @@ def rand_list(cls, size):
     import random
     return cls.init(lambda _: float(random.randint(-100, 100)), size)
 
-
-def rand_point_2D_list(cls, size, clusters):
+def select_point_dimensions(dimensions):
     """
-    Return a randomly generated list of 2D points.
+    Return a PySke list class.
 
-    :param cls: the class of the generated list.
-    :param size: a positive number
-        Precondition: size >= 0
-    :param clusters: number of clusters
-    :return: a list of the given class
+    :param dimensions: point dimensions
+            Precondition: dimensions >= 2
+    :return: a Point
     """
-    from pyske.core.util.point_2D import Point_2D
-    from pyske.core import Distribution
-
-    x, _ = make_blobs(n_samples=size, centers=clusters)
-    x = x.tolist()
-    x = list(map(lambda y: Point_2D(y[0], y[1]), x))
-    distr = Distribution().balanced(size)
-    return cls.from_seq(x).distribute(distr)
-
+    # pylint: disable=import-outside-toplevel
+    if dimensions == 2:
+        from pyske.core.util.point_2D import Point_2D as PointClass
+    elif dimensions == 3:
+        from pyske.core.util.point_3D import Point_3D as PointClass
+    else:
+        from pyske.core.util.point_2D import Point_2D as PointClass
+    return PointClass
 
-def rand_point_3D_list(cls, size, clusters):
+def rand_point_list(cls, size, clusters, dimensions):
     """
-    Return a randomly generated list of 2D points.
+    Return a randomly generated list of points.
 
     :param cls: the class of the generated list.
     :param size: a positive number
         Precondition: size >= 0
     :param clusters: number of clusters
+    :param dimensions: point dimensions
+            Precondition: dimensions >= 2
     :return: a list of the given class
     """
-    from pyske.core.util.point_3D import Point_3D
-    from pyske.core import Distribution
-
-    x, _ = make_blobs(n_samples=size, centers=clusters, n_features=3)
+    x, _ = make_blobs(n_samples=size, centers=clusters, n_features=dimensions)
     x = x.tolist()
-    x = list(map(lambda y: Point_3D(y[0], y[1], y[2]), x))
+    pointclass = select_point_dimensions(dimensions)
+    x = list(map(lambda y: pointclass(*y), x))
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 

From 84f2daaa554b88110374334e5c98ce5c9421bd15 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 2 Jun 2021 11:42:41 +0200
Subject: [PATCH 22/53] interface convention

---
 pyske/core/util/point_Interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyske/core/util/point_Interface.py b/pyske/core/util/point_Interface.py
index aa3be5d..6196c47 100644
--- a/pyske/core/util/point_Interface.py
+++ b/pyske/core/util/point_Interface.py
@@ -1,9 +1,9 @@
 """
 A module to represent a point
 """
+from abc import ABC
 
-
-class Point_Interface:
+class Point_Interface(ABC):
     """Point interface to represent point of n dimensions"""
 
     def __repr__(self):

From 82b7a7decbd6dcfffa6ca5a6709940948c173a08 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 4 Jun 2021 14:55:56 +0200
Subject: [PATCH 23/53] parallel optimization update_centroids

---
 pyske/examples/list/k_means.py | 66 +++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index 1b224f8..f238ffa 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -10,7 +10,8 @@
 from pyske.core.util.par import procs
 
 
-def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> Tuple[Point_Interface, int]:
+def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) -> \
+        Tuple[Point_Interface, int]:
     """
     Get the centroid index of the closest centroid
     """
@@ -23,24 +24,37 @@ def cluster_index(point: Point_Interface, centroids: SList[Point_Interface]) ->
     return point, centroids.index(p_centroid)
 
 
-def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> List[Tuple[Point_Interface, int]]:
+def assign_clusters(input_list: List[Point_Interface], centroids: SList[Point_Interface]) -> \
+        List[Tuple[Point_Interface, int]]:
     """
     Assign each point to a cluster
     """
     return input_list.map(lambda x: cluster_index(x, centroids))
 
 
-def update_centroids(clusters: List[Tuple[Point_Interface, int]], centroids: SList[Point_Interface]):
+def update_centroids(clusters: List[Tuple[Point_Interface, int]],
+                     centroids: SList[Point_Interface]):
     """
     Update centroids of clusters
     """
 
-    new_centroids = SList.init(lambda _: (_, _, _), len(centroids))
-
-    new_centroids = new_centroids.mapi(lambda i, x: clusters.map_reduce(lambda w: (w[0], w[1], 1),
-        lambda y, z: (y[0] + z[0], y[1], y[2] + z[2]) if y[1] == i and z[1] == i else (
-            z if y[1] != i else y)))
-    new_centroids = new_centroids.map(lambda x: x[0] / x[2])
+    def centroids_list_update(list_to_update, item):
+        if isinstance(item, SList):
+            list_to_update = list_to_update.map2(lambda a_pair, b_pair: (a_pair[0] + b_pair[0],
+                                                                         a_pair[1] + b_pair[1]),
+                                                 item)
+        else:
+            index = item[1]
+            point = item[0]
+            list_to_update[index] = (list_to_update[index][0] + point,
+                                     list_to_update[index][1] + 1)
+        return list_to_update
+
+    point_class = type(centroids[0])
+    neutral_list = SList.init(lambda _: (point_class(), 0), len(centroids))
+    new_centroids = clusters.reduce(lambda a_item, b_item:
+                                    centroids_list_update(a_item, b_item), neutral_list)
+    new_centroids = new_centroids.map(lambda x: x[0] / x[1])
 
     return new_centroids
 
@@ -58,15 +72,15 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi
     """
     K-means++ initialisation
 
-    :param input_list: a list of point
-    :param n_cluster: number of cluster
+    :param input_list: a list of points
+    :param n_cluster: number of clusters
 
-    :return: n_cluster centroids
+    :return: list of centroids
     """
     centroids = SList([])
-    first_centroid = input_list.get_partition()\
-                               .map(lambda l: l[random.randint(0, l.length() - 1)])\
-                               .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])]
+    first_centroid = input_list.get_partition() \
+        .map(lambda l: l[random.randint(0, l.length() - 1)]) \
+        .to_seq()[random.randint(0, list(procs())[len(list(procs())) - 1])]
     centroids.append(first_centroid)
 
     for _ in range(n_cluster - 1):
@@ -82,17 +96,18 @@ def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Poi
     return centroids
 
 
-def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List], n_cluster: int,
-            max_iter: int = 10) -> SList[SList[Point_Interface]]:
+def k_means(input_list: List[Point_Interface], init_function: Callable[[List, int], List],
+            n_cluster: int,
+            max_iter: int = 10) -> List[Tuple[Point_Interface, int]]:
     """
-    K-means algorithm on a list of point
+    K-means algorithm on a list of points
 
-    :param input_list: a list of point
-    :param n_cluster: number of cluster
-    :param max_iter: number of iteration
+    :param input_list: a list of points
+    :param n_cluster: number of clusters
+    :param max_iter: number of iterations
     :param init_function: a function that initialize centroids
 
-    :return: 2 dimensions list of points
+    :return: a list of tuples with the point and his cluster index
     """
     centroids = init_function(input_list, n_cluster)
 
@@ -104,9 +119,4 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in
 
         j = j + 1
 
-    clusters2d = SList([])
-    for i in range(len(centroids)):
-        clusters2d.append(clusters.filter(lambda x, num_cluster=i: x[1] == num_cluster)
-                          .map(lambda x: x[0]).to_seq()
-                          )
-    return clusters2d
+    return clusters

From 08a4dd6bde88d90e1329ae6ce4387f5fb35c9bdd Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 4 Jun 2021 15:11:08 +0200
Subject: [PATCH 24/53] adding option to show clusters graph of 2D points

---
 pyske/examples/list/k_means_main.py | 11 +++++------
 pyske/examples/list/util.py         | 21 +++++++++++++++++----
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 163f761..41fddc4 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -2,12 +2,10 @@
 Execution of k_means
 """
 import argparse
-import matplotlib.pyplot as plt
 
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means, k_means_init
 from pyske.examples.list import util
-from pyske.core.support import parallel
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -20,6 +18,8 @@
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
     parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
+    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
+                        action="store_true")
 
     args = parser.parse_args()
     size = args.size
@@ -27,6 +27,7 @@
     choice = args.data
     clusters = args.clusters
     dimensions = args.dimensions
+    show_clusters = args.show_clusters
 
     pyske_list_class = util.select_pyske_list(choice)
     input_list = util.rand_point_list(pyske_list_class, size, clusters, dimensions)
@@ -40,7 +41,5 @@
         result = example(input_list, k_means_init, clusters)
         timing.stop()
         util.print_experiment("", timing.get(), execute, iteration)
-        # if parallel.PID == 0:
-        #    for i in range((len(result))):
-        #        plt.scatter([point.x for point in result[i]], [point.y for point in result[i]])
-        #    plt.show()
+        if show_clusters and dimensions == 2:
+            util.print_2D_result(result.to_seq())
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index ef79d51..e560a16 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -1,9 +1,13 @@
 """
 Utility functions for PySke examples
 """
+from typing import Tuple
+import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_blobs
-from pyske.core import Distribution
+from pyske.core import Distribution, SList
+from pyske.core.support import parallel
+from pyske.core.util.point_2D import Point_2D
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -100,9 +104,7 @@ def select_point_dimensions(dimensions):
     :return: a Point
     """
     # pylint: disable=import-outside-toplevel
-    if dimensions == 2:
-        from pyske.core.util.point_2D import Point_2D as PointClass
-    elif dimensions == 3:
+    if dimensions == 3:
         from pyske.core.util.point_3D import Point_3D as PointClass
     else:
         from pyske.core.util.point_2D import Point_2D as PointClass
@@ -127,6 +129,17 @@ def rand_point_list(cls, size, clusters, dimensions):
     distr = Distribution().balanced(size)
     return cls.from_seq(x).distribute(distr)
 
+def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]):
+    """
+    Print experiment of 2 dimension points k-means clustering
+    """
+    if parallel.PID == 0:
+        x = clusters_list.map(lambda pair: pair[0].x)
+        y = clusters_list.map(lambda pair: pair[0].y)
+        colors = clusters_list.map(lambda pair: pair[1])
+        plt.scatter(x, y, c=colors)
+        plt.show()
+
 
 def print_experiment(result, timing, execute, iteration=None):
     """

From f6f46cd4b3e85fa943a627d868330eef03f5eb90 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 7 Jun 2021 16:05:27 +0200
Subject: [PATCH 25/53] k-means clustering documentation

---
 docs/api.rst                        | 60 ++++++++++++++++++++++++++++-
 docs/conf.py                        | 10 +++--
 pyske/examples/list/k_means.py      |  4 +-
 pyske/examples/list/k_means_main.py | 11 +-----
 pyske/examples/list/util.py         | 17 +++++++-
 5 files changed, 84 insertions(+), 18 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 2a9f345..19a69e2 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,2 +1,60 @@
 PySke API
-=========
\ No newline at end of file
+=========
+
+Pyske API offer applications implemented with list and tree skeletons.
+The user can use the sequential or parallel version.
+The parallel version allows a faster execution time when its launched on several processors or computers.
+
+Dot Product
+-----------
+
+Discrete Fast Fourier Transform
+-------------------------------
+
+K-means Clustering
+------------------
+
+K-means clustering is an unsupervised algorithm that aims to partition group of points in k clusters.
+
+K-means function
+^^^^^^^^^^^^^^^^
+
+.. py:module:: pyske.examples.list.k_means
+
+.. autofunction:: k_means
+
+Here the implementation of the 2 dimensions point class.
+
+.. autoclass:: pyske.core.util.point_2D.Point_2D
+    :members:
+    :special-members:
+    :show-inheritance:
+    :private-members:
+    :member-order: bysource
+
+Initialization functions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autofunction:: k_means_init
+
+Running Example
+^^^^^^^^^^^^^^^^^^^^
+
+.. argparse::
+    :module: pyske.examples.list.util
+    :func: k_means_parser
+    :prog: python3 k_means_main.py
+
+
+Maximum Prefix Sum
+------------------
+
+Maximum Segment Sum
+-------------------
+
+Parallel Regular Sampling Sort
+------------------------------
+
+Variance Example
+----------------
+
diff --git a/docs/conf.py b/docs/conf.py
index 8e83820..fe8f596 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -10,9 +10,9 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../.'))
 
 
 # -- Project information -----------------------------------------------------
@@ -31,6 +31,8 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    "sphinx.ext.autodoc",
+    "sphinxarg.ext"
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -52,4 +54,4 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
\ No newline at end of file
+html_static_path = ['_static']
diff --git a/pyske/examples/list/k_means.py b/pyske/examples/list/k_means.py
index f238ffa..a1eab32 100644
--- a/pyske/examples/list/k_means.py
+++ b/pyske/examples/list/k_means.py
@@ -70,7 +70,7 @@ def max_dist(pair_a: Tuple[Point_Interface, float], pair_b: Tuple[Point_Interfac
 
 def k_means_init(input_list: List[Point_Interface], n_cluster: int) -> SList[Point_Interface]:
     """
-    K-means++ initialisation
+    K-means++ initialization
 
     :param input_list: a list of points
     :param n_cluster: number of clusters
@@ -103,9 +103,9 @@ def k_means(input_list: List[Point_Interface], init_function: Callable[[List, in
     K-means algorithm on a list of points
 
     :param input_list: a list of points
+    :param init_function: a function that initialize centroids
     :param n_cluster: number of clusters
     :param max_iter: number of iterations
-    :param init_function: a function that initialize centroids
 
     :return: a list of tuples with the point and his cluster index
     """
diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 41fddc4..3687f0c 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -1,7 +1,6 @@
 """
 Execution of k_means
 """
-import argparse
 
 from pyske.core import Timing
 from pyske.examples.list.k_means import k_means, k_means_init
@@ -10,16 +9,10 @@
 PAR = 'parallel'
 SEQ = 'sequential'
 
+
 if __name__ == '__main__':
 
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
-    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
-    parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
-    parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
-    parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
-    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
-                        action="store_true")
+    parser = util. k_means_parser()
 
     args = parser.parse_args()
     size = args.size
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index e560a16..2e3da87 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -3,6 +3,7 @@
 """
 from typing import Tuple
 import matplotlib.pyplot as plt
+import argparse
 
 from sklearn.datasets import make_blobs
 from pyske.core import Distribution, SList
@@ -26,8 +27,6 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
     :param data_arg: (default True) flag to select argument --data
     :return:  (size, iter, ['parallel' | 'sequential'])
     """
-    # pylint: disable=import-outside-toplevel
-    import argparse
     parser = argparse.ArgumentParser()
     if size_arg:
         parser.add_argument("--size", help="size of the list to generate",
@@ -50,6 +49,20 @@ def standard_parse_command_line(size_arg=True, iter_arg=True, data_arg=True):
     return size, num_iter, data_type
 
 
+def k_means_parser():
+    """
+    Parse command line for k-means example.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
+    parser.add_argument("--iter", help="number of iterations", type=int, default=30)
+    parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
+    parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
+    parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
+    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
+                        action="store_true")
+    return parser
+
 def select_pyske_list(choice):
     """
     Return a PySke list class.

From 42c750632199f47d51a286655363ba8ce929a3ca Mon Sep 17 00:00:00 2001
From: Evan MULUMBA <evans.mllb@gmail.com>
Date: Tue, 8 Jun 2021 12:18:44 +0200
Subject: [PATCH 26/53] 3d representation for Point_3D clusters

---
 pyske/examples/list/k_means_main.py |  4 ++++
 pyske/examples/list/util.py         | 22 ++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 3687f0c..93c885a 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -36,3 +36,7 @@
         util.print_experiment("", timing.get(), execute, iteration)
         if show_clusters and dimensions == 2:
             util.print_2D_result(result.to_seq())
+        elif show_clusters and dimensions == 3:
+            util.print_3D_result(result.to_seq())
+
+
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 2e3da87..721f855 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -3,12 +3,14 @@
 """
 from typing import Tuple
 import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import axes3d
 import argparse
 
 from sklearn.datasets import make_blobs
 from pyske.core import Distribution, SList
 from pyske.core.support import parallel
 from pyske.core.util.point_2D import Point_2D
+from pyske.core.util.point_3D import Point_3D
 
 PAR = 'parallel'
 SEQ = 'sequential'
@@ -153,6 +155,26 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]):
         plt.scatter(x, y, c=colors)
         plt.show()
 
+def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]):
+    """
+        Print experiment of 3 dimension points k-means clustering
+        """
+    if parallel.PID == 0:
+        x = clusters_list.map(lambda pair: pair[0].x)
+        y = clusters_list.map(lambda pair: pair[0].y)
+        z = clusters_list.map(lambda pair: pair[0].z)
+        colors = clusters_list.map(lambda pair: pair[1])
+
+        # Tracé du résultat en 3D
+        fig = plt.figure()
+        ax = fig.gca(projection='3d')  # Affichage en 3D
+        ax.scatter(x, y, z, label='Courbe', marker='d')  # Tracé des points 3D
+        plt.title("Points 3D")
+        ax.set_xlabel('X')
+        ax.set_ylabel('Y')
+        ax.set_zlabel('Z')
+        plt.tight_layout()
+        plt.show()
 
 def print_experiment(result, timing, execute, iteration=None):
     """

From 810c54b30394e297e2c62ef2a5987b086bbd2d8f Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 12:57:39 +0200
Subject: [PATCH 27/53] error subtraction in distance

---
 pyske/core/util/point_3D.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyske/core/util/point_3D.py b/pyske/core/util/point_3D.py
index c6e1aa5..678710d 100644
--- a/pyske/core/util/point_3D.py
+++ b/pyske/core/util/point_3D.py
@@ -91,5 +91,5 @@ def distance(self, other):
         """
         dx = self.__x - other.x
         dy = self.__y - other.y
-        dz = self.__x - other.z
+        dz = self.__z - other.z
         return sqrt(dx ** 2 + dy ** 2 + dz ** 2)

From 5cba1e966b00bf01344537f1fe4c9e5fc82babd6 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 14:31:27 +0200
Subject: [PATCH 28/53] adding colors 3D graph result, fix warning matplotlib

---
 pyske/examples/list/k_means_main.py | 12 ++++++------
 pyske/examples/list/util.py         | 13 +++++++------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pyske/examples/list/k_means_main.py b/pyske/examples/list/k_means_main.py
index 93c885a..c8b7782 100644
--- a/pyske/examples/list/k_means_main.py
+++ b/pyske/examples/list/k_means_main.py
@@ -12,7 +12,7 @@
 
 if __name__ == '__main__':
 
-    parser = util. k_means_parser()
+    parser = util.k_means_parser()
 
     args = parser.parse_args()
     size = args.size
@@ -34,9 +34,9 @@
         result = example(input_list, k_means_init, clusters)
         timing.stop()
         util.print_experiment("", timing.get(), execute, iteration)
-        if show_clusters and dimensions == 2:
-            util.print_2D_result(result.to_seq())
-        elif show_clusters and dimensions == 3:
-            util.print_3D_result(result.to_seq())
-
+        if show_clusters:
+            if dimensions == 2:
+                util.print_2D_result(result.to_seq())
+            if dimensions == 3:
+                util.print_3D_result(result.to_seq())
 
diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 721f855..1ddca0f 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -1,10 +1,11 @@
 """
 Utility functions for PySke examples
 """
+
 from typing import Tuple
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import axes3d
+
 import argparse
+import matplotlib.pyplot as plt
 
 from sklearn.datasets import make_blobs
 from pyske.core import Distribution, SList
@@ -157,8 +158,8 @@ def print_2D_result(clusters_list: SList[Tuple[Point_2D, int]]):
 
 def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]):
     """
-        Print experiment of 3 dimension points k-means clustering
-        """
+    Print experiment of 3 dimension points k-means clustering
+    """
     if parallel.PID == 0:
         x = clusters_list.map(lambda pair: pair[0].x)
         y = clusters_list.map(lambda pair: pair[0].y)
@@ -167,8 +168,8 @@ def print_3D_result(clusters_list: SList[Tuple[Point_3D, int]]):
 
         # Tracé du résultat en 3D
         fig = plt.figure()
-        ax = fig.gca(projection='3d')  # Affichage en 3D
-        ax.scatter(x, y, z, label='Courbe', marker='d')  # Tracé des points 3D
+        ax = fig.add_subplot(projection='3d')  # Affichage en 3D
+        ax.scatter(x, y, z, label='Courbe', marker='d', c=colors)  # Tracé des points 3D
         plt.title("Points 3D")
         ax.set_xlabel('X')
         ax.set_ylabel('Y')

From eb16d4c72ed89bec88826fa8125774fb60807dfb Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 15:05:23 +0200
Subject: [PATCH 29/53] adding Point Interface section

---
 docs/api.rst                | 24 +++++++++++++++---------
 pyske/core/util/point_2D.py |  3 +++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index 19a69e2..864486e 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -3,7 +3,7 @@ PySke API
 
 Pyske API offer applications implemented with list and tree skeletons.
 The user can use the sequential or parallel version.
-The parallel version allows a faster execution time when its launched on several processors or computers.
+The parallel version allows a faster execution time when its launched on several processors, cores or computers.
 
 Dot Product
 -----------
@@ -23,20 +23,26 @@ K-means function
 
 .. autofunction:: k_means
 
-Here the implementation of the 2 dimensions point class.
+Initialization functions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+This is the standard method that initializes the centroids. This method chooses the centroids in order that each point is as far as possible from the other.
+
+.. autofunction:: k_means_init
+
+
+Point Interface
+^^^^^^^^^^^^^^^
+
+K-means algorithm takes a list of points in parameters. For now two versions implement this class, one for 2 dimension points and another for 3 dimension points.
+
+Point 2D class implementation:
 
 .. autoclass:: pyske.core.util.point_2D.Point_2D
     :members:
     :special-members:
-    :show-inheritance:
-    :private-members:
     :member-order: bysource
 
-Initialization functions
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. autofunction:: k_means_init
-
 Running Example
 ^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pyske/core/util/point_2D.py b/pyske/core/util/point_2D.py
index f6f5f7f..d0bfeca 100644
--- a/pyske/core/util/point_2D.py
+++ b/pyske/core/util/point_2D.py
@@ -17,6 +17,9 @@ def __repr__(self):
         return "(%s, %s)" % (self.__x, self.__y)
 
     def __eq__(self, other):
+        """
+        Equality between two points
+        """
         if isinstance(other, Point_2D):
             return self.__x == other.__x and self.__y == other.__y
         return False

From 529498ef384ae91989a9d88ff4817518f58e4be9 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 8 Jun 2021 15:15:33 +0200
Subject: [PATCH 30/53] change show-clusters display message

---
 pyske/examples/list/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyske/examples/list/util.py b/pyske/examples/list/util.py
index 1ddca0f..4964c93 100644
--- a/pyske/examples/list/util.py
+++ b/pyske/examples/list/util.py
@@ -57,12 +57,12 @@ def k_means_parser():
     Parse command line for k-means example.
     """
     parser = argparse.ArgumentParser()
-    parser.add_argument("--size", help="size of the list to generate", type=int, default=1_000)
+    parser.add_argument("--size", help="size of the list to generate", type=int, default=5_000)
     parser.add_argument("--iter", help="number of iterations", type=int, default=30)
     parser.add_argument("--data", help="type of data structure", choices=[PAR, SEQ], default=SEQ)
     parser.add_argument("--clusters", help="number of clusters", type=int, default=3)
     parser.add_argument("--dimensions", help="point dimensions", type=int, default=2)
-    parser.add_argument("--show-clusters", help="display the clusters graph of 2D points",
+    parser.add_argument("--show-clusters", help="display the clusters graph of 2D or 3D points",
                         action="store_true")
     return parser
 

From 34ff5ceaff07ecd98292ed0d75ef9e63df368e01 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 15 Jun 2021 15:31:43 +0200
Subject: [PATCH 31/53] array module

---
 pyske/core/array/__init__.py |  0
 pyske/core/array/parray2d.py | 10 ++++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 pyske/core/array/__init__.py
 create mode 100644 pyske/core/array/parray2d.py

diff --git a/pyske/core/array/__init__.py b/pyske/core/array/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
new file mode 100644
index 0000000..dcb1c5d
--- /dev/null
+++ b/pyske/core/array/parray2d.py
@@ -0,0 +1,10 @@
+"""
+A module of parallel arrays and associated skeletons
+
+class PArray2D: parallel arrays.
+"""
+
+class PArray2D:
+    """
+    Distributed arrays
+    """

From d072ac2c40afae7c2cb6a132db1ec00ba6d183d3 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 16 Jun 2021 17:01:54 +0200
Subject: [PATCH 32/53] init, str method

---
 pyske/core/array/parray2d.py | 46 ++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index dcb1c5d..4d0366b 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -3,8 +3,54 @@
 
 class PArray2D: parallel arrays.
 """
+from typing import Callable
+
+from pyske.core.support import parallel as parimpl
+
+_PID: int = parimpl.PID
+_NPROCS: int = parimpl.NPROCS
+_COMM = parimpl.COMM
+
 
 class PArray2D:
+    # pylint: disable=protected-access
     """
     Distributed arrays
     """
+
+    def __init__(self):
+        self.__global_index = 0
+        self.__local_index = 0
+        self.__content = []
+        self.__distribution = [0 for _ in range(0, _NPROCS)]
+
+    def __str__(self) -> str:
+        return "PID[" + str(_PID) + "]:\n" + \
+               "  global_index: " + str(self.__global_index) + "\n" + \
+               "  local_index: " + str(self.__local_index) + "\n" + \
+               "  distribution: " + str(self.__distribution) + "\n" + \
+               "  content: " + str(self.__content) + "\n"
+
+    @staticmethod
+    def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int = _NPROCS):
+        assert _NPROCS <= col_size
+        assert _NPROCS <= line_size
+
+        parray2d = PArray2D()
+        parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
+
+        local_line_size = parimpl.local_size(_PID, line_size)
+
+        lines_start_index = local_line_size * _PID
+        lines_stop_index = lines_start_index + local_line_size - 1
+        colums_start_index = 0
+        colums_stop_index = col_size - 1
+
+        parray2d.__local_index = ((lines_start_index, lines_stop_index),
+                                  (colums_start_index, colums_stop_index))
+
+        parray2d.__content = [value_at(i) for i in range(lines_start_index * col_size,
+                                                         (lines_stop_index + 1) * col_size)]
+        parray2d.__distribution = [parimpl.local_size(_PID, line_size) * col_size for _ in
+                                   range(0, _NPROCS)]
+        return parray2d

From c0203ad99c4f698386d175ff92e4f7be9e7c1370 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 16 Jun 2021 17:02:48 +0200
Subject: [PATCH 33/53] array hello_world

---
 pyske/examples/array/__init__.py    |  0
 pyske/examples/array/hello_world.py | 14 ++++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 pyske/examples/array/__init__.py
 create mode 100644 pyske/examples/array/hello_world.py

diff --git a/pyske/examples/array/__init__.py b/pyske/examples/array/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
new file mode 100644
index 0000000..886e553
--- /dev/null
+++ b/pyske/examples/array/hello_world.py
@@ -0,0 +1,14 @@
+"""
+Example: various manipulations on a parallel array
+"""
+
+from pyske.core.array.parray2d import PArray2D
+
+
+def __main():
+    parray2d = PArray2D.init(lambda x: x, 6, 12)
+    print(parray2d)
+
+
+if __name__ == '__main__':
+    __main()

From 1779397eecc585e0dc6a484f62ea5d11aac7eb16 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 17 Jun 2021 15:12:01 +0200
Subject: [PATCH 34/53] allgather for distribution

---
 pyske/core/array/parray2d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 4d0366b..d7042dc 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -51,6 +51,6 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int
 
         parray2d.__content = [value_at(i) for i in range(lines_start_index * col_size,
                                                          (lines_stop_index + 1) * col_size)]
-        parray2d.__distribution = [parimpl.local_size(_PID, line_size) * col_size for _ in
-                                   range(0, _NPROCS)]
+        parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
+
         return parray2d

From 42fc4fc45261048b5163442db167592adf8171fd Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 17 Jun 2021 18:56:01 +0200
Subject: [PATCH 35/53] distribution lines to colums

---
 pyske/core/array/parray2d.py        | 49 +++++++++++++++++++++++++++--
 pyske/examples/array/hello_world.py |  2 ++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index d7042dc..2a48299 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -40,9 +40,14 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int
         parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
 
         local_line_size = parimpl.local_size(_PID, line_size)
-
-        lines_start_index = local_line_size * _PID
+        if _PID == 0:
+            lines_start_index = local_line_size * _PID
+        else:
+            lines_start_index = _COMM.recv(source=_PID - 1) + 1
         lines_stop_index = lines_start_index + local_line_size - 1
+        if _PID != _NPROCS - 1:
+            _COMM.send(lines_stop_index, _PID + 1)
+
         colums_start_index = 0
         colums_stop_index = col_size - 1
 
@@ -54,3 +59,43 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int
         parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
 
         return parray2d
+
+    def distribute(self: 'PArray2D') -> 'PArray2D':
+        parray2d = PArray2D()
+        parray2d.__global_index = self.__global_index
+
+        col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
+        line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
+
+        local_col_size = parimpl.local_size(_PID, col_size)
+        if _PID == 0:
+            colums_start_index = local_col_size * _PID
+        else:
+            colums_start_index = _COMM.recv(source=_PID - 1) + 1
+        colums_stop_index = colums_start_index + local_col_size - 1
+        if _PID != _NPROCS - 1:
+            _COMM.send(colums_stop_index, _PID + 1)
+
+        lines_start_index = 0
+        lines_stop_index = line_size - 1
+
+        parray2d.__local_index = ((lines_start_index, lines_stop_index),
+                                  (colums_start_index, colums_stop_index))
+
+        parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
+
+        # update content for each process
+        for i in range(0, _NPROCS):
+            content_to_send = []
+            for j in range(len(self.__content)):
+                if j % col_size in range(parray2d.__distribution[i][1][0],
+                                         parray2d.__distribution[i][1][1] + 1):
+                    content_to_send.append(self.__content[j])
+            if i == _PID:
+                parray2d.__content = _COMM.gather(content_to_send, i)
+                # flatten the list
+                parray2d.__content = [item for items in parray2d.__content for item in items]
+            else:
+                _COMM.gather(content_to_send, i)
+
+        return parray2d
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 886e553..66cabec 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -7,6 +7,8 @@
 
 def __main():
     parray2d = PArray2D.init(lambda x: x, 6, 12)
+    #print(parray2d)
+    parray2d = parray2d.distribute()
     print(parray2d)
 
 

From 2acc26a07d4250d65023d911866ab13776b98197 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 21 Jun 2021 11:27:13 +0200
Subject: [PATCH 36/53] use of enum for distribution choice

---
 pyske/core/array/parray2d.py | 72 ++++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 32 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 2a48299..0e8e517 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -4,6 +4,7 @@
 class PArray2D: parallel arrays.
 """
 from typing import Callable
+from enum import Enum
 
 from pyske.core.support import parallel as parimpl
 
@@ -11,6 +12,34 @@ class PArray2D: parallel arrays.
 _NPROCS: int = parimpl.NPROCS
 _COMM = parimpl.COMM
 
+class Distribution(Enum):
+    LINE = 'LINE'
+    COLUMN = 'COLUMN'
+
+
+def _local_index(distribution: str, col_size: int, line_size: int):
+    if distribution == Distribution.LINE:
+        local_size = parimpl.local_size(_PID, line_size)
+        b_start_index = 0
+        b_stop_index = col_size - 1
+    else:
+        local_size = parimpl.local_size(_PID, col_size)
+        b_start_index = 0
+        b_stop_index = line_size - 1
+
+    if _PID == 0:
+        a_start_index = local_size * _PID
+    else:
+        a_start_index = _COMM.recv(source=_PID - 1) + 1
+    a_stop_index = a_start_index + local_size - 1
+    if _PID != _NPROCS - 1:
+        _COMM.send(a_stop_index, _PID + 1)
+
+    if distribution == Distribution.LINE:
+        return (a_start_index, a_stop_index), (b_start_index, b_stop_index)
+    else:
+        return (b_start_index, b_stop_index), (a_start_index, a_stop_index)
+
 
 class PArray2D:
     # pylint: disable=protected-access
@@ -19,10 +48,11 @@ class PArray2D:
     """
 
     def __init__(self):
-        self.__global_index = 0
-        self.__local_index = 0
+        self.__global_index = ((-1, -1), (-1, -1))
+        self.__local_index = ((-1, -1), (-1, -1))
         self.__content = []
         self.__distribution = [0 for _ in range(0, _NPROCS)]
+        self.__distribution_direction = Distribution.LINE
 
     def __str__(self) -> str:
         return "PID[" + str(_PID) + "]:\n" + \
@@ -39,50 +69,28 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int
         parray2d = PArray2D()
         parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
 
-        local_line_size = parimpl.local_size(_PID, line_size)
-        if _PID == 0:
-            lines_start_index = local_line_size * _PID
-        else:
-            lines_start_index = _COMM.recv(source=_PID - 1) + 1
-        lines_stop_index = lines_start_index + local_line_size - 1
-        if _PID != _NPROCS - 1:
-            _COMM.send(lines_stop_index, _PID + 1)
+        parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size)
 
-        colums_start_index = 0
-        colums_stop_index = col_size - 1
-
-        parray2d.__local_index = ((lines_start_index, lines_stop_index),
-                                  (colums_start_index, colums_stop_index))
-
-        parray2d.__content = [value_at(i) for i in range(lines_start_index * col_size,
-                                                         (lines_stop_index + 1) * col_size)]
+        parray2d.__content = [value_at(i) for i in range(parray2d.__local_index[0][0] * col_size,
+                                                         (parray2d.__local_index[0][1] + 1) * col_size)]
         parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
 
         return parray2d
 
     def distribute(self: 'PArray2D') -> 'PArray2D':
+        """
+        Distribute line to column
+        """
         parray2d = PArray2D()
         parray2d.__global_index = self.__global_index
 
         col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
         line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
 
-        local_col_size = parimpl.local_size(_PID, col_size)
-        if _PID == 0:
-            colums_start_index = local_col_size * _PID
-        else:
-            colums_start_index = _COMM.recv(source=_PID - 1) + 1
-        colums_stop_index = colums_start_index + local_col_size - 1
-        if _PID != _NPROCS - 1:
-            _COMM.send(colums_stop_index, _PID + 1)
-
-        lines_start_index = 0
-        lines_stop_index = line_size - 1
-
-        parray2d.__local_index = ((lines_start_index, lines_stop_index),
-                                  (colums_start_index, colums_stop_index))
+        parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size)
 
         parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
+        parray2d.__distribution_direction = Distribution.COLUMN
 
         # update content for each process
         for i in range(0, _NPROCS):

From 3d219178a50c47e57fb2214cdf801bd3227f1233 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 21 Jun 2021 14:27:26 +0200
Subject: [PATCH 37/53] changes in local_index

---
 pyske/core/array/parray2d.py | 48 ++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 0e8e517..b4814b8 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -6,6 +6,7 @@ class PArray2D: parallel arrays.
 from typing import Callable
 from enum import Enum
 
+from pyske.core import SList
 from pyske.core.support import parallel as parimpl
 
 _PID: int = parimpl.PID
@@ -17,28 +18,17 @@ class Distribution(Enum):
     COLUMN = 'COLUMN'
 
 
-def _local_index(distribution: str, col_size: int, line_size: int):
+def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int):
+    local_sizes = SList([])
+    for i in range(_NPROCS):
+        if distribution == Distribution.LINE:
+            local_sizes.append(parimpl.local_size(i, line_size))
+        else:
+            local_sizes.append(parimpl.local_size(i, col_size))
+    start_indexes = local_sizes.scanl(lambda x, y: x + y, 0)
     if distribution == Distribution.LINE:
-        local_size = parimpl.local_size(_PID, line_size)
-        b_start_index = 0
-        b_stop_index = col_size - 1
-    else:
-        local_size = parimpl.local_size(_PID, col_size)
-        b_start_index = 0
-        b_stop_index = line_size - 1
-
-    if _PID == 0:
-        a_start_index = local_size * _PID
-    else:
-        a_start_index = _COMM.recv(source=_PID - 1) + 1
-    a_stop_index = a_start_index + local_size - 1
-    if _PID != _NPROCS - 1:
-        _COMM.send(a_stop_index, _PID + 1)
-
-    if distribution == Distribution.LINE:
-        return (a_start_index, a_stop_index), (b_start_index, b_stop_index)
-    else:
-        return (b_start_index, b_stop_index), (a_start_index, a_stop_index)
+        return (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1), (0, col_size - 1)
+    return (0, line_size - 1), (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1)
 
 
 class PArray2D:
@@ -69,11 +59,14 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int
         parray2d = PArray2D()
         parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
 
-        parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size)
+        parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size, _PID)
 
         parray2d.__content = [value_at(i) for i in range(parray2d.__local_index[0][0] * col_size,
-                                                         (parray2d.__local_index[0][1] + 1) * col_size)]
-        parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
+                                                         (parray2d.__local_index[0][
+                                                              1] + 1) * col_size)]
+        parray2d.__distribution = [
+            _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
+            range(_NPROCS)]
 
         return parray2d
 
@@ -87,10 +80,11 @@ def distribute(self: 'PArray2D') -> 'PArray2D':
         col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
         line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
 
-        parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size)
-
-        parray2d.__distribution = _COMM.allgather(parray2d.__local_index)
+        parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID)
         parray2d.__distribution_direction = Distribution.COLUMN
+        parray2d.__distribution = [
+            _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
+            range(_NPROCS)]
 
         # update content for each process
         for i in range(0, _NPROCS):

From 724b88f09b40a535ed715a1c6ff8bf9ea9c991ce Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 21 Jun 2021 15:29:44 +0200
Subject: [PATCH 38/53] callable init function with line and column parameters

---
 pyske/core/array/parray2d.py        | 9 +++++----
 pyske/examples/array/hello_world.py | 4 +++-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index b4814b8..bc9e776 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -52,7 +52,7 @@ def __str__(self) -> str:
                "  content: " + str(self.__content) + "\n"
 
     @staticmethod
-    def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int = _NPROCS):
+    def init(value_at: Callable[[int, int], int], col_size: int = _NPROCS, line_size: int = _NPROCS):
         assert _NPROCS <= col_size
         assert _NPROCS <= line_size
 
@@ -61,9 +61,10 @@ def init(value_at: Callable[[int], int], col_size: int = _NPROCS, line_size: int
 
         parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size, _PID)
 
-        parray2d.__content = [value_at(i) for i in range(parray2d.__local_index[0][0] * col_size,
-                                                         (parray2d.__local_index[0][
-                                                              1] + 1) * col_size)]
+        for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1):
+            for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1):
+                parray2d.__content.append(value_at(line, column))
+
         parray2d.__distribution = [
             _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
             range(_NPROCS)]
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 66cabec..d599ce0 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -6,7 +6,9 @@
 
 
 def __main():
-    parray2d = PArray2D.init(lambda x: x, 6, 12)
+    col_size = 6
+    line_size = 12
+    parray2d = PArray2D.init(lambda line, column: line * col_size + column, col_size, line_size)
     #print(parray2d)
     parray2d = parray2d.distribute()
     print(parray2d)

From 84cc587c9296a17844c45ac20e27ae55da034d90 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 21 Jun 2021 18:18:03 +0200
Subject: [PATCH 39/53] init column and line distribution

---
 pyske/core/array/parray2d.py        | 40 +++++++++++++++++++++++++++--
 pyske/examples/array/hello_world.py | 16 +++++++++---
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index bc9e776..eee6a5c 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -17,7 +17,6 @@ class Distribution(Enum):
     LINE = 'LINE'
     COLUMN = 'COLUMN'
 
-
 def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int):
     local_sizes = SList([])
     for i in range(_NPROCS):
@@ -52,7 +51,15 @@ def __str__(self) -> str:
                "  content: " + str(self.__content) + "\n"
 
     @staticmethod
-    def init(value_at: Callable[[int, int], int], col_size: int = _NPROCS, line_size: int = _NPROCS):
+    def init_line(value_at: Callable[[int, int], int], col_size: int = _NPROCS,
+                  line_size: int = _NPROCS):
+        """
+        Return an array built using a function per line on each processor
+
+        :param value_at: binary function
+        :return: an 2d array of the given line and column size, where for all valid line column
+            i, j, the value at this index is value_at(i, j)
+        """
         assert _NPROCS <= col_size
         assert _NPROCS <= line_size
 
@@ -71,6 +78,35 @@ def init(value_at: Callable[[int, int], int], col_size: int = _NPROCS, line_size
 
         return parray2d
 
+    @staticmethod
+    def init_column(value_at: Callable[[int, int], int], col_size: int = _NPROCS,
+                    line_size: int = _NPROCS):
+        """
+        Return an array built using a function per column on each processor
+
+        :param value_at: binary function
+        :return: an 2d array of the given line and column size, where for all valid line column
+            i, j, the value at this index is value_at(i, j)
+        """
+        assert _NPROCS <= col_size
+        assert _NPROCS <= line_size
+
+        parray2d = PArray2D()
+        parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
+
+        parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID)
+
+        for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1):
+            for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1):
+                parray2d.__content.append(value_at(line, column))
+
+        parray2d.__distribution_direction = Distribution.COLUMN
+        parray2d.__distribution = [
+            _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
+            range(_NPROCS)]
+
+        return parray2d
+
     def distribute(self: 'PArray2D') -> 'PArray2D':
         """
         Distribute line to column
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index d599ce0..6064f00 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -8,10 +8,18 @@
 def __main():
     col_size = 6
     line_size = 12
-    parray2d = PArray2D.init(lambda line, column: line * col_size + column, col_size, line_size)
-    #print(parray2d)
-    parray2d = parray2d.distribute()
-    print(parray2d)
+
+    print("Line initialization")
+    parray2d_line_init = PArray2D.init_line(lambda line, column: line * col_size + column, col_size, line_size)
+    print(parray2d_line_init)
+
+    print("Line to column distribution")
+    parray2d_column = parray2d_line_init.distribute()
+    print(parray2d_column)
+
+    print("Column initialization")
+    parray2d_column_init = PArray2D.init_column(lambda line, column: line * col_size + column, col_size, line_size)
+    print(parray2d_column_init)
 
 
 if __name__ == '__main__':

From efcd451188a9fc2cffa4ed6c310c995cdb4a3797 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 22 Jun 2021 10:34:56 +0200
Subject: [PATCH 40/53] map function, merge of init function, generic type

---
 pyske/core/array/parray2d.py        | 66 ++++++++++++-----------------
 pyske/examples/array/hello_world.py |  6 +--
 2 files changed, 30 insertions(+), 42 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index eee6a5c..9f91457 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -3,7 +3,7 @@
 
 class PArray2D: parallel arrays.
 """
-from typing import Callable
+from typing import Callable, TypeVar, Generic
 from enum import Enum
 
 from pyske.core import SList
@@ -13,6 +13,9 @@ class PArray2D: parallel arrays.
 _NPROCS: int = parimpl.NPROCS
 _COMM = parimpl.COMM
 
+T = TypeVar('T')  # pylint: disable=invalid-name
+V = TypeVar('V')  # pylint: disable=invalid-name
+
 class Distribution(Enum):
     LINE = 'LINE'
     COLUMN = 'COLUMN'
@@ -30,20 +33,20 @@ def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int):
     return (0, line_size - 1), (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1)
 
 
-class PArray2D:
+class PArray2D(Generic[T]):
     # pylint: disable=protected-access
     """
     Distributed arrays
     """
 
-    def __init__(self):
+    def __init__(self: 'PArray2D[T]'):
         self.__global_index = ((-1, -1), (-1, -1))
         self.__local_index = ((-1, -1), (-1, -1))
         self.__content = []
-        self.__distribution = [0 for _ in range(0, _NPROCS)]
+        self.__distribution = [((-1, -1), (-1, -1)) for _ in range(0, _NPROCS)]
         self.__distribution_direction = Distribution.LINE
 
-    def __str__(self) -> str:
+    def __str__(self: 'PArray2D[T]') -> str:
         return "PID[" + str(_PID) + "]:\n" + \
                "  global_index: " + str(self.__global_index) + "\n" + \
                "  local_index: " + str(self.__local_index) + "\n" + \
@@ -51,12 +54,16 @@ def __str__(self) -> str:
                "  content: " + str(self.__content) + "\n"
 
     @staticmethod
-    def init_line(value_at: Callable[[int, int], int], col_size: int = _NPROCS,
-                  line_size: int = _NPROCS):
+    def init(value_at: Callable[[int, int], V], distribution: Distribution,
+             col_size: int = _NPROCS,
+             line_size: int = _NPROCS) -> 'PArray2D[V]':
         """
         Return an array built using a function per line on each processor
 
         :param value_at: binary function
+        :param distribution: the distribution direction (LINE, COLUMN)
+        :param col_size: number of columns
+        :param line_size: number of lines
         :return: an 2d array of the given line and column size, where for all valid line column
             i, j, the value at this index is value_at(i, j)
         """
@@ -66,48 +73,19 @@ def init_line(value_at: Callable[[int, int], int], col_size: int = _NPROCS,
         parray2d = PArray2D()
         parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
 
-        parray2d.__local_index = _local_index(Distribution.LINE, col_size, line_size, _PID)
-
-        for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1):
-            for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1):
-                parray2d.__content.append(value_at(line, column))
-
-        parray2d.__distribution = [
-            _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
-            range(_NPROCS)]
-
-        return parray2d
-
-    @staticmethod
-    def init_column(value_at: Callable[[int, int], int], col_size: int = _NPROCS,
-                    line_size: int = _NPROCS):
-        """
-        Return an array built using a function per column on each processor
-
-        :param value_at: binary function
-        :return: an 2d array of the given line and column size, where for all valid line column
-            i, j, the value at this index is value_at(i, j)
-        """
-        assert _NPROCS <= col_size
-        assert _NPROCS <= line_size
-
-        parray2d = PArray2D()
-        parray2d.__global_index = ((0, line_size - 1), (0, col_size - 1))
-
-        parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID)
+        parray2d.__local_index = _local_index(distribution, col_size, line_size, _PID)
 
         for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1):
             for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1):
                 parray2d.__content.append(value_at(line, column))
-
-        parray2d.__distribution_direction = Distribution.COLUMN
+        parray2d.__distribution_direction = Distribution.LINE
         parray2d.__distribution = [
             _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
             range(_NPROCS)]
 
         return parray2d
 
-    def distribute(self: 'PArray2D') -> 'PArray2D':
+    def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
         """
         Distribute line to column
         """
@@ -138,3 +116,13 @@ def distribute(self: 'PArray2D') -> 'PArray2D':
                 _COMM.gather(content_to_send, i)
 
         return parray2d
+
+    def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]':
+        """
+        Apply a function to all the elements.
+
+        The returned array has the same shape (same size, same distribution)
+        than the initial array.
+        """
+        self.__content = [unary_op(elem) for elem in self.__content]
+        return self
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 6064f00..270c55a 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -2,7 +2,7 @@
 Example: various manipulations on a parallel array
 """
 
-from pyske.core.array.parray2d import PArray2D
+from pyske.core.array.parray2d import PArray2D, Distribution
 
 
 def __main():
@@ -10,7 +10,7 @@ def __main():
     line_size = 12
 
     print("Line initialization")
-    parray2d_line_init = PArray2D.init_line(lambda line, column: line * col_size + column, col_size, line_size)
+    parray2d_line_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.LINE, col_size, line_size)
     print(parray2d_line_init)
 
     print("Line to column distribution")
@@ -18,7 +18,7 @@ def __main():
     print(parray2d_column)
 
     print("Column initialization")
-    parray2d_column_init = PArray2D.init_column(lambda line, column: line * col_size + column, col_size, line_size)
+    parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.COLUMN, col_size, line_size)
     print(parray2d_column_init)
 
 

From f318b51e9f46f2b442dcaeefd126178fc1d47c1a Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 22 Jun 2021 11:55:21 +0200
Subject: [PATCH 41/53] reduce function

---
 pyske/core/array/parray2d.py        | 27 ++++++++++++++++++++++++++-
 pyske/examples/array/hello_world.py | 11 +++++++++--
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 9f91457..226b7ae 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -3,7 +3,8 @@
 
 class PArray2D: parallel arrays.
 """
-from typing import Callable, TypeVar, Generic
+import functools
+from typing import Callable, TypeVar, Generic, Optional
 from enum import Enum
 
 from pyske.core import SList
@@ -16,10 +17,12 @@ class PArray2D: parallel arrays.
 T = TypeVar('T')  # pylint: disable=invalid-name
 V = TypeVar('V')  # pylint: disable=invalid-name
 
+
 class Distribution(Enum):
     LINE = 'LINE'
     COLUMN = 'COLUMN'
 
+
 def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int):
     local_sizes = SList([])
     for i in range(_NPROCS):
@@ -126,3 +129,25 @@ def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]':
         """
         self.__content = [unary_op(elem) for elem in self.__content]
         return self
+
+    def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
+               neutral: Optional[T] = None) -> T:
+        """
+        Reduce an array of value to one value.
+
+        :param binary_op: a binary associative and commutative operation
+        :param neutral: (optional):
+            a value that should be a neutral element for the operation,
+            i.e. for all element e,
+                ``binary_op(neutral, e) == binary_op(e, neutral) == e``.
+            If this argument is omitted the list should not be empty.
+        :return: a value
+        """
+        if neutral is None:
+            assert self.__global_index != ((-1, -1), (-1, -1))
+            partial = functools.reduce(binary_op, self.__content)
+            partials = _COMM.allgather(partial)
+            return functools.reduce(binary_op, partials)
+        partial = functools.reduce(binary_op, self.__content, neutral)
+        partials = _COMM.allgather(partial)
+        return functools.reduce(binary_op, partials, neutral)
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 270c55a..ed38f4b 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -10,7 +10,8 @@ def __main():
     line_size = 12
 
     print("Line initialization")
-    parray2d_line_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.LINE, col_size, line_size)
+    parray2d_line_init = PArray2D.init(lambda line, column: line * col_size + column,
+                                       Distribution.LINE, col_size, line_size)
     print(parray2d_line_init)
 
     print("Line to column distribution")
@@ -18,9 +19,15 @@ def __main():
     print(parray2d_column)
 
     print("Column initialization")
-    parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column, Distribution.COLUMN, col_size, line_size)
+    parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column,
+                                         Distribution.COLUMN, col_size, line_size)
     print(parray2d_column_init)
 
+    print(parray2d_column_init.reduce(lambda x, y: x + y, 0))
+    print(parray2d_line_init.reduce(lambda x, y: x + y, 0))
+    print(parray2d_column.reduce(lambda x, y: x + y, 0))
+    print(PArray2D().reduce(lambda x, y: x + y, 0))
+
 
 if __name__ == '__main__':
     __main()

From 8282befe53fece4a9263c226b11cbfbe241803bd Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 23 Jun 2021 10:40:21 +0200
Subject: [PATCH 42/53] array interface

---
 pyske/core/array/array_interface.py | 100 ++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 pyske/core/array/array_interface.py

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
new file mode 100644
index 0000000..1709262
--- /dev/null
+++ b/pyske/core/array/array_interface.py
@@ -0,0 +1,100 @@
+"""
+Interface for PySke array.
+
+Interfaces: Array2D.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Callable, Generic, TypeVar, Optional
+
+from pyske.core.array.parray2d import Distribution
+# pylint: disable=unused-import
+from pyske.core.interface import List
+from pyske.core.support import parallel as parimpl
+
+T = TypeVar('T')  # pylint: disable=invalid-name
+V = TypeVar('V')  # pylint: disable=invalid-name
+
+_PID: int = parimpl.PID
+_NPROCS: int = parimpl.NPROCS
+_COMM = parimpl.COMM
+
+class Array2D(ABC, Generic[T]):
+    """
+        PySke array2d (interface)
+
+        Static methods:
+            init.
+
+        Methods:
+            map, reduce, distribute,
+            get_partition.
+        """
+
+    @abstractmethod
+    def __init__(self: 'Array2D[T]') -> None:
+        """
+        Return an empty list.
+        """
+
+    @staticmethod
+    @abstractmethod
+    def init(value_at: Callable[[int, int], V], distribution: Distribution,
+             col_size: int = _NPROCS,
+             line_size: int = _NPROCS) -> 'Array2D[V]':
+        """
+        Return an array built using a function per line on each processor
+
+        :param value_at: binary function
+        :param distribution: the distribution direction (LINE, COLUMN)
+        :param col_size: number of columns
+        :param line_size: number of lines
+        :return: an 2d array of the given line and column size, where for all valid line column
+            i, j, the value at this index is value_at(i, j)
+        """
+
+    @abstractmethod
+    def distribute(self: 'Array2D[T]') -> 'Array2D[T]':
+        """
+        Copy the array while changing its distribution.
+
+        In sequential, it just returns ``self``. In parallel, communications
+        are performed to meet the new distribution.
+
+        :return: an array containing the same elements.
+        """
+
+    @abstractmethod
+    def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]':
+        """
+        Apply a function to all the elements.
+
+        The returned array has the same shape (same size, same distribution)
+        than the initial array.
+
+        :param unary_op: function to apply to elements
+        :return: a new array
+        """
+
+    @abstractmethod
+    def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
+               neutral: Optional[T] = None) -> T:
+        """
+        Reduce an array of value to one value.
+
+        :param binary_op: a binary associative and commutative operation
+        :param neutral: (optional):
+            a value that should be a neutral element for the operation,
+            i.e. for all element e,
+                ``binary_op(neutral, e) == binary_op(e, neutral) == e``.
+            If this argument is omitted the list should not be empty.
+        :return: a value
+        """
+
+    @abstractmethod
+    def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]':
+        """
+        Make the distribution visible.
+
+        :return: a list of array.
+        """

From 0cec3c4c97c830c2b9bde7c3beaeeae84b4d71a9 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 23 Jun 2021 18:31:37 +0200
Subject: [PATCH 43/53] sarray2d class, changes parray2d content with sarray2d

---
 pyske/core/array/array_interface.py | 15 +++---
 pyske/core/array/parray2d.py        | 51 +++++++++++--------
 pyske/core/array/sarray2d.py        | 79 +++++++++++++++++++++++++++++
 pyske/examples/array/hello_world.py | 15 +++++-
 4 files changed, 132 insertions(+), 28 deletions(-)
 create mode 100644 pyske/core/array/sarray2d.py

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index 1709262..4b28a60 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -5,9 +5,9 @@
 """
 
 from abc import ABC, abstractmethod
+from enum import Enum
 from typing import Callable, Generic, TypeVar, Optional
 
-from pyske.core.array.parray2d import Distribution
 # pylint: disable=unused-import
 from pyske.core.interface import List
 from pyske.core.support import parallel as parimpl
@@ -19,6 +19,10 @@
 _NPROCS: int = parimpl.NPROCS
 _COMM = parimpl.COMM
 
+class Distribution(Enum):
+    LINE = 'LINE'
+    COLUMN = 'COLUMN'
+
 class Array2D(ABC, Generic[T]):
     """
         PySke array2d (interface)
@@ -39,14 +43,13 @@ def __init__(self: 'Array2D[T]') -> None:
 
     @staticmethod
     @abstractmethod
-    def init(value_at: Callable[[int, int], V], distribution: Distribution,
-             col_size: int = _NPROCS,
-             line_size: int = _NPROCS) -> 'Array2D[V]':
+    def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size: int,
+             line_size: int) -> 'Array2D[V]':
         """
         Return an array built using a function per line on each processor
 
         :param value_at: binary function
-        :param distribution: the distribution direction (LINE, COLUMN)
+        :param distribution: the distribution direction (LINE, COLUMN), leave empty for sequential array
         :param col_size: number of columns
         :param line_size: number of lines
         :return: an 2d array of the given line and column size, where for all valid line column
@@ -77,7 +80,7 @@ def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]':
         """
 
     @abstractmethod
-    def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
+    def reduce(self: 'Array2D[T]', binary_op: Callable[[T, T], T],
                neutral: Optional[T] = None) -> T:
         """
         Reduce an array of value to one value.
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 226b7ae..73df1ff 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -3,11 +3,13 @@
 
 class PArray2D: parallel arrays.
 """
-import functools
 from typing import Callable, TypeVar, Generic, Optional
 from enum import Enum
 
 from pyske.core import SList
+from pyske.core.array import array_interface
+from pyske.core.array.array_interface import Distribution
+from pyske.core.array.sarray2d import SArray2D
 from pyske.core.support import parallel as parimpl
 
 _PID: int = parimpl.PID
@@ -18,11 +20,6 @@ class PArray2D: parallel arrays.
 V = TypeVar('V')  # pylint: disable=invalid-name
 
 
-class Distribution(Enum):
-    LINE = 'LINE'
-    COLUMN = 'COLUMN'
-
-
 def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int):
     local_sizes = SList([])
     for i in range(_NPROCS):
@@ -36,16 +33,17 @@ def _local_index(distribution: Enum, col_size: int, line_size: int, pid: int):
     return (0, line_size - 1), (start_indexes[pid], start_indexes[pid] + local_sizes[pid] - 1)
 
 
-class PArray2D(Generic[T]):
+class PArray2D(array_interface.Array2D, Generic[T]):
     # pylint: disable=protected-access
     """
     Distributed arrays
     """
 
     def __init__(self: 'PArray2D[T]'):
+        super().__init__()
         self.__global_index = ((-1, -1), (-1, -1))
         self.__local_index = ((-1, -1), (-1, -1))
-        self.__content = []
+        self.__content = SArray2D([], -1, -1)
         self.__distribution = [((-1, -1), (-1, -1)) for _ in range(0, _NPROCS)]
         self.__distribution_direction = Distribution.LINE
 
@@ -54,7 +52,7 @@ def __str__(self: 'PArray2D[T]') -> str:
                "  global_index: " + str(self.__global_index) + "\n" + \
                "  local_index: " + str(self.__local_index) + "\n" + \
                "  distribution: " + str(self.__distribution) + "\n" + \
-               "  content: " + str(self.__content) + "\n"
+               "  content: \n" + str(self.__content) + "\n"
 
     @staticmethod
     def init(value_at: Callable[[int, int], V], distribution: Distribution,
@@ -78,10 +76,15 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution,
 
         parray2d.__local_index = _local_index(distribution, col_size, line_size, _PID)
 
+        content = []
         for line in range(parray2d.__local_index[0][0], parray2d.__local_index[0][1] + 1):
             for column in range(parray2d.__local_index[1][0], parray2d.__local_index[1][1] + 1):
-                parray2d.__content.append(value_at(line, column))
-        parray2d.__distribution_direction = Distribution.LINE
+                content.append(value_at(line, column))
+        local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1
+        local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1
+        parray2d.__content = SArray2D(content, local_line_size, local_col_size)
+
+        parray2d.__distribution_direction = distribution
         parray2d.__distribution = [
             _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
             range(_NPROCS)]
@@ -103,6 +106,8 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
         parray2d.__distribution = [
             _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
             range(_NPROCS)]
+        local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1
+        local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1
 
         # update content for each process
         for i in range(0, _NPROCS):
@@ -110,11 +115,12 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
             for j in range(len(self.__content)):
                 if j % col_size in range(parray2d.__distribution[i][1][0],
                                          parray2d.__distribution[i][1][1] + 1):
-                    content_to_send.append(self.__content[j])
+                    content_to_send.append(self.__content.values[j])
             if i == _PID:
-                parray2d.__content = _COMM.gather(content_to_send, i)
+                content = _COMM.gather(content_to_send, i)
                 # flatten the list
-                parray2d.__content = [item for items in parray2d.__content for item in items]
+                content = [item for items in content for item in items]
+                parray2d.__content = SArray2D(content, local_line_size, local_col_size)
             else:
                 _COMM.gather(content_to_send, i)
 
@@ -127,7 +133,7 @@ def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]':
         The returned array has the same shape (same size, same distribution)
         than the initial array.
         """
-        self.__content = [unary_op(elem) for elem in self.__content]
+        self.__content = self.__content.map(unary_op)
         return self
 
     def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
@@ -145,9 +151,12 @@ def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
         """
         if neutral is None:
             assert self.__global_index != ((-1, -1), (-1, -1))
-            partial = functools.reduce(binary_op, self.__content)
-            partials = _COMM.allgather(partial)
-            return functools.reduce(binary_op, partials)
-        partial = functools.reduce(binary_op, self.__content, neutral)
-        partials = _COMM.allgather(partial)
-        return functools.reduce(binary_op, partials, neutral)
+            partial = self.__content.reduce(binary_op)
+            partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size)
+        else:
+            partial = self.__content.reduce(binary_op, neutral)
+            partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size)
+        return partials.reduce(binary_op, neutral)
+
+    def get_partition(self: 'PArray2D[T]') -> 'SList[PArray2D[T]]':
+        pass
diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py
new file mode 100644
index 0000000..5ea30be
--- /dev/null
+++ b/pyske/core/array/sarray2d.py
@@ -0,0 +1,79 @@
+"""
+A module of sequential arrays and associated skeletons
+
+class SArray2D: sequential arrays.
+"""
+import functools
+from typing import TypeVar, Generic, Callable, Optional
+
+# pylint: disable=unused-import
+from pyske.core import SList
+from pyske.core.array.array_interface import Array2D, Distribution
+
+T = TypeVar('T')  # pylint: disable=invalid-name
+V = TypeVar('V')  # pylint: disable=invalid-name
+
+
+class SArray2D(Array2D, Generic[T]):
+    """
+    Sequential arrays
+    """
+
+    def __init__(self, content: list, line_size, col_size):
+        super().__init__()
+        self.__line_size = line_size
+        self.__column_size = col_size
+        self.__values = content
+
+    @property
+    def values(self):
+        return self.__values
+
+    @property
+    def line_size(self):
+        return self.__line_size
+
+    @property
+    def column_size(self):
+        return self.__column_size
+
+    def __str__(self):
+        content = ""
+        for i in range(self.__line_size):
+            content += "[ "
+            for j in range(self.__column_size):
+                content += str(self.__values[i * self.__column_size + j]) + " "
+            content += "]"
+            content += "\n"
+        return content
+
+    def __len__(self):
+        return self.__column_size * self.__line_size
+
+    @staticmethod
+    def init(value_at: Callable[[int, int], V], _: Distribution, col_size: int,
+             line_size: int) -> 'SArray2D[V]':
+        assert col_size > 0
+        assert line_size > 0
+        content = []
+        for line in range(line_size):
+            for column in range(col_size):
+                content.append(value_at(line, column))
+        sarray2d = SArray2D(content, line_size, col_size)
+        return sarray2d
+
+    def map(self: 'SArray2D[T]', unary_op: Callable[[T], V]) -> 'SArray2D[V]':
+        content = list(map(unary_op, self.__values))
+        return SArray2D(content, self.__line_size, self.__column_size)
+
+    def reduce(self: 'SArray2D[T]', binary_op: Callable[[T, T], T],
+               neutral: Optional[T] = None) -> T:
+        if neutral is None:
+            return functools.reduce(binary_op, self.__values)
+        return functools.reduce(binary_op, self.__values, neutral)
+
+    def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]':
+        pass
+
+    def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]':
+        return self
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index ed38f4b..382dbb2 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -2,7 +2,9 @@
 Example: various manipulations on a parallel array
 """
 
-from pyske.core.array.parray2d import PArray2D, Distribution
+from pyske.core.array.parray2d import PArray2D
+from pyske.core.array.array_interface import Distribution
+from pyske.core.array.sarray2d import SArray2D
 
 
 def __main():
@@ -23,11 +25,22 @@ def __main():
                                          Distribution.COLUMN, col_size, line_size)
     print(parray2d_column_init)
 
+    print("Reduce Test")
     print(parray2d_column_init.reduce(lambda x, y: x + y, 0))
+    print(parray2d_column_init.reduce(lambda x, y: x + y))
     print(parray2d_line_init.reduce(lambda x, y: x + y, 0))
     print(parray2d_column.reduce(lambda x, y: x + y, 0))
     print(PArray2D().reduce(lambda x, y: x + y, 0))
 
+    print("Mapped array")
+    parray2d_map = parray2d_line_init.map(lambda x: x + 1)
+    print(parray2d_map)
+
+    print("Sarray initialization")
+    sarray2d = SArray2D.init(lambda line, column: line * col_size + column, Distribution.LINE,
+                             col_size, line_size)
+    print(sarray2d)
+
 
 if __name__ == '__main__':
     __main()

From 07fd6476e860c16a3dabaf695069470a90f86a8e Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 24 Jun 2021 12:09:20 +0200
Subject: [PATCH 44/53] array get_partition

---
 pyske/core/array/array_interface.py |  2 +-
 pyske/core/array/parray2d.py        | 12 +++++++-----
 pyske/core/array/sarray2d.py        |  5 ++++-
 pyske/examples/array/hello_world.py |  3 +++
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index 4b28a60..8e28e2d 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -38,7 +38,7 @@ class Array2D(ABC, Generic[T]):
     @abstractmethod
     def __init__(self: 'Array2D[T]') -> None:
         """
-        Return an empty list.
+        Return an empty array.
         """
 
     @staticmethod
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 73df1ff..4695c28 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -6,7 +6,7 @@ class PArray2D: parallel arrays.
 from typing import Callable, TypeVar, Generic, Optional
 from enum import Enum
 
-from pyske.core import SList
+from pyske.core import SList, PList
 from pyske.core.array import array_interface
 from pyske.core.array.array_interface import Distribution
 from pyske.core.array.sarray2d import SArray2D
@@ -152,11 +152,13 @@ def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
         if neutral is None:
             assert self.__global_index != ((-1, -1), (-1, -1))
             partial = self.__content.reduce(binary_op)
-            partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size)
         else:
             partial = self.__content.reduce(binary_op, neutral)
-            partials = SArray2D(_COMM.allgather(partial), self.__content.line_size, self.__content.column_size)
+        partials = SArray2D(_COMM.allgather(partial), self.__content.line_size,
+                            self.__content.column_size)
         return partials.reduce(binary_op, neutral)
 
-    def get_partition(self: 'PArray2D[T]') -> 'SList[PArray2D[T]]':
-        pass
+    def get_partition(self: 'PArray2D[T]') -> 'PList[SArray2D[T]]':
+        contents = _COMM.allgather(self.__content)
+        p_list = PList().init(lambda i: contents[i], _NPROCS)
+        return p_list
diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py
index 5ea30be..71a6f22 100644
--- a/pyske/core/array/sarray2d.py
+++ b/pyske/core/array/sarray2d.py
@@ -47,6 +47,9 @@ def __str__(self):
             content += "\n"
         return content
 
+    def __repr__(self):
+        return str(self)
+
     def __len__(self):
         return self.__column_size * self.__line_size
 
@@ -73,7 +76,7 @@ def reduce(self: 'SArray2D[T]', binary_op: Callable[[T, T], T],
         return functools.reduce(binary_op, self.__values, neutral)
 
     def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]':
-        pass
+        return SList([self])
 
     def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]':
         return self
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 382dbb2..2d79649 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -41,6 +41,9 @@ def __main():
                              col_size, line_size)
     print(sarray2d)
 
+    print("Get partition")
+    print(parray2d_column_init.get_partition())
+
 
 if __name__ == '__main__':
     __main()

From aea60636517673f6465f037d78b9d8e38eef9853 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Thu, 24 Jun 2021 15:14:47 +0200
Subject: [PATCH 45/53] doctest, docstring array interface

---
 pyske/core/array/array_interface.py | 48 +++++++++++++++++++++++++++--
 pyske/core/array/parray2d.py        | 37 +++++-----------------
 pyske/core/array/sarray2d.py        | 21 ++++++++++---
 3 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index 8e28e2d..808edfb 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -46,10 +46,21 @@ def __init__(self: 'Array2D[T]') -> None:
     def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size: int,
              line_size: int) -> 'Array2D[V]':
         """
-        Return an array built using a function per line on each processor
+        Return an array built using a function
+
+        Example::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.array_interface import Distribution
+            >>> number_line = 2
+            >>> number_column = 2
+            >>> init_function = lambda line, column: line * number_column + column
+            >>> SArray2D.init(init_function, Distribution.LINE, number_column, number_line)
+            (   0   1   )
+            (   2   3   )
 
         :param value_at: binary function
-        :param distribution: the distribution direction (LINE, COLUMN), leave empty for sequential array
+        :param distribution: the distribution direction (LINE, COLUMN)
         :param col_size: number of columns
         :param line_size: number of lines
         :return: an 2d array of the given line and column size, where for all valid line column
@@ -62,7 +73,7 @@ def distribute(self: 'Array2D[T]') -> 'Array2D[T]':
         Copy the array while changing its distribution.
 
         In sequential, it just returns ``self``. In parallel, communications
-        are performed to meet the new distribution.
+        are performed to meet line or column distribution.
 
         :return: an array containing the same elements.
         """
@@ -75,6 +86,16 @@ def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]':
         The returned array has the same shape (same size, same distribution)
         than the initial array.
 
+        Examples::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.array_interface import Distribution
+            >>> col_size = 2
+            >>> line_size = 2
+            >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).map(lambda x: x + 1)
+            (   2   2   )
+            (   2   2   )
+
         :param unary_op: function to apply to elements
         :return: a new array
         """
@@ -85,6 +106,17 @@ def reduce(self: 'Array2D[T]', binary_op: Callable[[T, T], T],
         """
         Reduce an array of value to one value.
 
+        Examples::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.parray2d import PArray2D
+            >>> from pyske.core.array.array_interface import Distribution
+            >>> parray2d = PArray2D.init(lambda i, j: 1, Distribution.COLUMN, col_size=2, line_size=2)
+            >>> parray2d.reduce(lambda x, y: x + y)
+            4
+            >>> SArray2D().reduce(lambda x, y: x + y, 0)
+            0
+
         :param binary_op: a binary associative and commutative operation
         :param neutral: (optional):
             a value that should be a neutral element for the operation,
@@ -99,5 +131,15 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]':
         """
         Make the distribution visible.
 
+        Examples::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.array_interface import Distribution
+            >>> col_size = 2
+            >>> line_size = 2
+            >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).get_partition()
+            [(   1   1   )
+            (   1   1   )]
+
         :return: a list of array.
         """
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 4695c28..6410f66 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -37,6 +37,13 @@ class PArray2D(array_interface.Array2D, Generic[T]):
     # pylint: disable=protected-access
     """
     Distributed arrays
+
+    Static methods from interface IArray:
+        init.
+
+    Methods from interface IArray:
+        map,reduce,
+        get_partition,distribute
     """
 
     def __init__(self: 'PArray2D[T]'):
@@ -58,16 +65,6 @@ def __str__(self: 'PArray2D[T]') -> str:
     def init(value_at: Callable[[int, int], V], distribution: Distribution,
              col_size: int = _NPROCS,
              line_size: int = _NPROCS) -> 'PArray2D[V]':
-        """
-        Return an array built using a function per line on each processor
-
-        :param value_at: binary function
-        :param distribution: the distribution direction (LINE, COLUMN)
-        :param col_size: number of columns
-        :param line_size: number of lines
-        :return: an 2d array of the given line and column size, where for all valid line column
-            i, j, the value at this index is value_at(i, j)
-        """
         assert _NPROCS <= col_size
         assert _NPROCS <= line_size
 
@@ -92,9 +89,6 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution,
         return parray2d
 
     def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
-        """
-        Distribute line to column
-        """
         parray2d = PArray2D()
         parray2d.__global_index = self.__global_index
 
@@ -127,28 +121,11 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
         return parray2d
 
     def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]':
-        """
-        Apply a function to all the elements.
-
-        The returned array has the same shape (same size, same distribution)
-        than the initial array.
-        """
         self.__content = self.__content.map(unary_op)
         return self
 
     def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
                neutral: Optional[T] = None) -> T:
-        """
-        Reduce an array of value to one value.
-
-        :param binary_op: a binary associative and commutative operation
-        :param neutral: (optional):
-            a value that should be a neutral element for the operation,
-            i.e. for all element e,
-                ``binary_op(neutral, e) == binary_op(e, neutral) == e``.
-            If this argument is omitted the list should not be empty.
-        :return: a value
-        """
         if neutral is None:
             assert self.__global_index != ((-1, -1), (-1, -1))
             partial = self.__content.reduce(binary_op)
diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py
index 71a6f22..edd8b96 100644
--- a/pyske/core/array/sarray2d.py
+++ b/pyske/core/array/sarray2d.py
@@ -17,10 +17,20 @@ class SArray2D: sequential arrays.
 class SArray2D(Array2D, Generic[T]):
     """
     Sequential arrays
+
+    Static methods from interface IArray:
+        init.
+
+    Methods from interface IArray:
+        map,reduce,
+        get_partition,distribute
+
     """
 
-    def __init__(self, content: list, line_size, col_size):
+    def __init__(self, content=None, line_size=-1, col_size=-1):
         super().__init__()
+        if content is None:
+            content = []
         self.__line_size = line_size
         self.__column_size = col_size
         self.__values = content
@@ -40,11 +50,12 @@ def column_size(self):
     def __str__(self):
         content = ""
         for i in range(self.__line_size):
-            content += "[ "
+            content += "("
             for j in range(self.__column_size):
-                content += str(self.__values[i * self.__column_size + j]) + " "
-            content += "]"
-            content += "\n"
+                content += '%4s' % self.__values[i * self.__column_size + j]
+            content += '%4s' % ')'
+            if i != self.line_size - 1:
+                content += '\n'
         return content
 
     def __repr__(self):

From d2717a1048904345ffd69de12872f65c3aa8afa9 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Fri, 25 Jun 2021 11:20:51 +0200
Subject: [PATCH 46/53] map2 skeleton

---
 pyske/core/array/array_interface.py | 25 +++++++++++++++++++++++++
 pyske/core/array/parray2d.py        | 21 +++++++++++++++++++--
 pyske/core/array/sarray2d.py        |  8 ++++++++
 pyske/examples/array/hello_world.py |  7 +++++++
 4 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index 808edfb..c38b920 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -13,6 +13,7 @@
 from pyske.core.support import parallel as parimpl
 
 T = TypeVar('T')  # pylint: disable=invalid-name
+U = TypeVar('U')  # pylint: disable=invalid-name
 V = TypeVar('V')  # pylint: disable=invalid-name
 
 _PID: int = parimpl.PID
@@ -143,3 +144,27 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]':
 
         :return: a list of array.
         """
+
+    @abstractmethod
+    def map2(self: 'Array2D[T]', binary_op: Callable[[T, U], V],
+             a_array: 'Array2D[U]') -> 'Array2D[V]':
+        """
+        Apply a function to all the elements of ``self`` and an array.
+
+        The returned array has the same shape (same size, same distribution)
+        than the initial arrays.
+
+        Examples::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.array_interface import Distribution
+            >>> sarray2d = SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size = 2, line_size = 2)
+            >>> sarray2d.map2(lambda x, y: x + y, sarray2d)
+            (   2   2   )
+            (   2   2   )
+
+        :param binary_op: function to apply to each pair of elements
+        :param a_array: the second array.
+            The second array must have same column and line size than `self`.
+        :return: a new array.
+        """
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 6410f66..9d67a45 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -17,6 +17,7 @@ class PArray2D: parallel arrays.
 _COMM = parimpl.COMM
 
 T = TypeVar('T')  # pylint: disable=invalid-name
+U = TypeVar('U')  # pylint: disable=invalid-name
 V = TypeVar('V')  # pylint: disable=invalid-name
 
 
@@ -54,6 +55,14 @@ def __init__(self: 'PArray2D[T]'):
         self.__distribution = [((-1, -1), (-1, -1)) for _ in range(0, _NPROCS)]
         self.__distribution_direction = Distribution.LINE
 
+    def __get_shape(self: 'PArray2D[T]') -> 'PArray2D':
+        p_array2d = PArray2D()
+        p_array2d.__global_index = self.__global_index
+        p_array2d.__local_index = self.__local_index
+        p_array2d.__distribution = self.__distribution
+        p_array2d.__distribution_direction = self.__distribution_direction
+        return p_array2d
+
     def __str__(self: 'PArray2D[T]') -> str:
         return "PID[" + str(_PID) + "]:\n" + \
                "  global_index: " + str(self.__global_index) + "\n" + \
@@ -121,8 +130,9 @@ def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
         return parray2d
 
     def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]':
-        self.__content = self.__content.map(unary_op)
-        return self
+        p_array2d = self.__get_shape()
+        p_array2d.__content = self.__content.map(unary_op)
+        return p_array2d
 
     def reduce(self: 'PArray2D[T]', binary_op: Callable[[T, T], T],
                neutral: Optional[T] = None) -> T:
@@ -139,3 +149,10 @@ def get_partition(self: 'PArray2D[T]') -> 'PList[SArray2D[T]]':
         contents = _COMM.allgather(self.__content)
         p_list = PList().init(lambda i: contents[i], _NPROCS)
         return p_list
+
+    def map2(self: 'PArray2D[T]', binary_op: Callable[[T, U], V],
+             a_array: 'PArray2D[U]') -> 'PArray2D[V]':
+        assert self.__distribution == a_array.__distribution
+        p_array2d = self.__get_shape()
+        p_array2d.__content = self.__content.map2(binary_op, a_array.__content)
+        return p_array2d
diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py
index edd8b96..cda9ffd 100644
--- a/pyske/core/array/sarray2d.py
+++ b/pyske/core/array/sarray2d.py
@@ -11,6 +11,7 @@ class SArray2D: sequential arrays.
 from pyske.core.array.array_interface import Array2D, Distribution
 
 T = TypeVar('T')  # pylint: disable=invalid-name
+U = TypeVar('U')  # pylint: disable=invalid-name
 V = TypeVar('V')  # pylint: disable=invalid-name
 
 
@@ -91,3 +92,10 @@ def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]':
 
     def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]':
         return self
+
+    def map2(self: 'SArray2D[T]', binary_op: Callable[[T, U], V],
+             a_array: 'SArray2D[U]') -> 'SArray2D[V]':
+        assert self.__line_size == a_array.line_size
+        assert self.__column_size == a_array.column_size
+        content = [binary_op(left, right) for (left, right) in zip(self.__values, a_array.values)]
+        return SArray2D(content, self.__line_size, self.__column_size)
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 2d79649..3c9a55b 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -44,6 +44,13 @@ def __main():
     print("Get partition")
     print(parray2d_column_init.get_partition())
 
+    b_sarray2d = SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size, line_size)
+
+    print("Map2 array")
+    print(sarray2d.map2(lambda x, y: x + y, b_sarray2d))
+    print(parray2d_line_init.map2(lambda x, y: x + y, parray2d_line_init))
+    print(parray2d_column_init.map2(lambda x, y: x + y, parray2d_column))
+
 
 if __name__ == '__main__':
     __main()

From 22481844af843f04292fbd106276c222c5ab0930 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 28 Jun 2021 11:53:10 +0200
Subject: [PATCH 47/53] adding to_seq skeleton

---
 pyske/core/array/array_interface.py | 30 +++++++++++++++++++++++++++++
 pyske/core/array/parray2d.py        |  9 +++++++++
 pyske/core/array/sarray2d.py        |  9 +++++++++
 pyske/examples/array/hello_world.py |  3 +++
 4 files changed, 51 insertions(+)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index c38b920..a4ba0e7 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -76,6 +76,14 @@ def distribute(self: 'Array2D[T]') -> 'Array2D[T]':
         In sequential, it just returns ``self``. In parallel, communications
         are performed to meet line or column distribution.
 
+        Examples::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> sarray2d = SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2)
+            >>> sarray2d.distribute()
+            (   1   1   )
+            (   1   1   )
+
         :return: an array containing the same elements.
         """
 
@@ -168,3 +176,25 @@ def map2(self: 'Array2D[T]', binary_op: Callable[[T, U], V],
             The second array must have same column and line size than `self`.
         :return: a new array.
         """
+
+    @abstractmethod
+    def to_seq(self: 'Array2D[T]') -> 'Array2D[T]':
+        """
+        Return a sequential array with same content.
+
+        The distribution must be per line.
+
+        Examples::
+
+            >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.parray2d import PArray2D
+            >>> from pyske.core.array.array_interface import Distribution
+            >>> PArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2).to_seq()
+            (   1   1   )
+            (   1   1   )
+            >>> SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size = 2, line_size = 2).to_seq()
+            (   1   1   )
+            (   1   1   )
+
+        :return: a sequential array.
+        """
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 9d67a45..446afea 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -156,3 +156,12 @@ def map2(self: 'PArray2D[T]', binary_op: Callable[[T, U], V],
         p_array2d = self.__get_shape()
         p_array2d.__content = self.__content.map2(binary_op, a_array.__content)
         return p_array2d
+
+    def to_seq(self: 'PArray2D[T]') -> 'SArray2D[T]':
+        assert self.__distribution_direction == Distribution.LINE
+        col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
+        line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
+        content = self.get_partition()\
+            .reduce(lambda a_sarray, b_sarray: SArray2D.concat(a_sarray, b_sarray),
+                    SArray2D([], 0, 0)).values
+        return SArray2D(content, line_size, col_size)
diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py
index cda9ffd..e9f75f5 100644
--- a/pyske/core/array/sarray2d.py
+++ b/pyske/core/array/sarray2d.py
@@ -99,3 +99,12 @@ def map2(self: 'SArray2D[T]', binary_op: Callable[[T, U], V],
         assert self.__column_size == a_array.column_size
         content = [binary_op(left, right) for (left, right) in zip(self.__values, a_array.values)]
         return SArray2D(content, self.__line_size, self.__column_size)
+
+    def to_seq(self: 'SArray2D[T]') -> 'SArray2D[T]':
+        return self
+
+    @staticmethod
+    def concat(a_sarray: 'SArray2D[T]', b_sarray: 'SArray2D[T]') -> 'SArray2D[T]':
+        line_size = a_sarray.line_size + b_sarray.line_size
+        col_size = a_sarray.column_size
+        return SArray2D(a_sarray.values + b_sarray.values, line_size, col_size)
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 3c9a55b..65e10f2 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -51,6 +51,9 @@ def __main():
     print(parray2d_line_init.map2(lambda x, y: x + y, parray2d_line_init))
     print(parray2d_column_init.map2(lambda x, y: x + y, parray2d_column))
 
+    print("To seq")
+    print(parray2d_line_init.to_seq())
+
 
 if __name__ == '__main__':
     __main()

From 91a8978bc559da37d603d031ab87b2b534df6749 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 28 Jun 2021 12:19:37 +0200
Subject: [PATCH 48/53] new doctests with to_seq

---
 pyske/core/array/array_interface.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index a4ba0e7..c34d744 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -79,6 +79,7 @@ def distribute(self: 'Array2D[T]') -> 'Array2D[T]':
         Examples::
 
             >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.array_interface import Distribution
             >>> sarray2d = SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2)
             >>> sarray2d.distribute()
             (   1   1   )
@@ -98,12 +99,17 @@ def map(self: 'Array2D[T]', unary_op: Callable[[T], V]) -> 'Array2D[V]':
         Examples::
 
             >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.parray2d import PArray2D
             >>> from pyske.core.array.array_interface import Distribution
             >>> col_size = 2
             >>> line_size = 2
             >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).map(lambda x: x + 1)
             (   2   2   )
             (   2   2   )
+            >>> parray2d = PArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2).map(lambda x: x + 1)
+            >>> parray2d.to_seq()
+            (   2   2   )
+            (   2   2   )
 
         :param unary_op: function to apply to elements
         :return: a new array
@@ -143,12 +149,21 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]':
         Examples::
 
             >>> from pyske.core.array.sarray2d import SArray2D
+            >>> from pyske.core.array.parray2d import PArray2D
             >>> from pyske.core.array.array_interface import Distribution
+            >>> from pyske.core.util import par
             >>> col_size = 2
             >>> line_size = 2
-            >>> SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size, line_size).get_partition()
-            [(   1   1   )
-            (   1   1   )]
+            >>> init_function = lambda line, column: line * col_size + column
+            >>> SArray2D.init(init_function, Distribution.LINE, col_size, line_size).get_partition()
+            [(   0   1   )
+            (   2   3   )]
+            >>> parray2d = PArray2D.init(init_function, Distribution.LINE, col_size=2, line_size=2)
+            >>> parray2d.get_partition().to_seq() if par.procs() == [0, 1] else [(0, 1), (2, 3)]
+            [(0, 1), (2, 3)]
+
+
+
 
         :return: a list of array.
         """

From e0e02a0e3e52ed986ad05e15242665666fd179d1 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 28 Jun 2021 12:37:53 +0200
Subject: [PATCH 49/53] correction doctest get_partition

---
 pyske/core/array/array_interface.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index c34d744..acb03f8 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -159,11 +159,8 @@ def get_partition(self: 'Array2D[T]') -> 'List[Array2D[T]]':
             [(   0   1   )
             (   2   3   )]
             >>> parray2d = PArray2D.init(init_function, Distribution.LINE, col_size=2, line_size=2)
-            >>> parray2d.get_partition().to_seq() if par.procs() == [0, 1] else [(0, 1), (2, 3)]
-            [(0, 1), (2, 3)]
-
-
-
+            >>> parray2d.get_partition().to_seq() if par.procs() == [0, 1] else '[(   0   1   ), (   2   3   )]'
+            '[(   0   1   ), (   2   3   )]'
 
         :return: a list of array.
         """

From b51b9d3c7a1e746e3ce39b6b8e467bca25a8b91d Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Mon, 28 Jun 2021 19:24:06 +0200
Subject: [PATCH 50/53] column to line distribution

---
 pyske/core/array/array_interface.py |  6 +--
 pyske/core/array/parray2d.py        | 74 +++++++++++++++++++++--------
 pyske/examples/array/hello_world.py |  8 +++-
 3 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index acb03f8..a366011 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -69,7 +69,7 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution, col_size
         """
 
     @abstractmethod
-    def distribute(self: 'Array2D[T]') -> 'Array2D[T]':
+    def distribute(self: 'Array2D[T]', distribution_direction: Distribution) -> 'Array2D[T]':
         """
         Copy the array while changing its distribution.
 
@@ -194,14 +194,12 @@ def to_seq(self: 'Array2D[T]') -> 'Array2D[T]':
         """
         Return a sequential array with same content.
 
-        The distribution must be per line.
-
         Examples::
 
             >>> from pyske.core.array.sarray2d import SArray2D
             >>> from pyske.core.array.parray2d import PArray2D
             >>> from pyske.core.array.array_interface import Distribution
-            >>> PArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2).to_seq()
+            >>> PArray2D.init(lambda i, j: 1, Distribution.COLUMN, col_size=2, line_size=2).to_seq()
             (   1   1   )
             (   1   1   )
             >>> SArray2D.init(lambda line, column: 1, Distribution.LINE, col_size = 2, line_size = 2).to_seq()
diff --git a/pyske/core/array/parray2d.py b/pyske/core/array/parray2d.py
index 446afea..28e88c0 100644
--- a/pyske/core/array/parray2d.py
+++ b/pyske/core/array/parray2d.py
@@ -97,36 +97,66 @@ def init(value_at: Callable[[int, int], V], distribution: Distribution,
 
         return parray2d
 
-    def distribute(self: 'PArray2D[T]') -> 'PArray2D[T]':
-        parray2d = PArray2D()
-        parray2d.__global_index = self.__global_index
-
+    def __distribute_column(self: 'PArray2D[T]', new_parray: 'PArray2D[T]', local_line_size,
+                            local_col_size):
         col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
-        line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
-
-        parray2d.__local_index = _local_index(Distribution.COLUMN, col_size, line_size, _PID)
-        parray2d.__distribution_direction = Distribution.COLUMN
-        parray2d.__distribution = [
-            _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
-            range(_NPROCS)]
-        local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1
-        local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1
-
         # update content for each process
         for i in range(0, _NPROCS):
             content_to_send = []
             for j in range(len(self.__content)):
-                if j % col_size in range(parray2d.__distribution[i][1][0],
-                                         parray2d.__distribution[i][1][1] + 1):
+                if j % col_size in range(new_parray.__distribution[i][1][0],
+                                         new_parray.__distribution[i][1][1] + 1):
                     content_to_send.append(self.__content.values[j])
             if i == _PID:
                 content = _COMM.gather(content_to_send, i)
                 # flatten the list
                 content = [item for items in content for item in items]
-                parray2d.__content = SArray2D(content, local_line_size, local_col_size)
+                new_parray.__content = SArray2D(content, local_line_size, local_col_size)
             else:
                 _COMM.gather(content_to_send, i)
 
+        return new_parray
+
+    def __distribute_line(self: 'PArray2D[T]', new_parray: 'PArray2D[T]', local_line_size,
+                          local_col_size, old_distribution):
+        # update content for each process
+        for i in range(0, _NPROCS):
+            content = []
+            old_local_col_size = old_distribution[_PID][1][1] - old_distribution[_PID][1][0] + 1
+            start_index = new_parray.__distribution[i][0][0] * old_local_col_size
+            stop_index = (new_parray.__distribution[i][0][1] + 1) * old_local_col_size
+            for j in range(start_index, stop_index, old_local_col_size):
+                content_to_send = self.__content.values[j:j+old_local_col_size]
+                content_to_send = _COMM.allgather(content_to_send)
+                content.extend([item for items in content_to_send for item in items])
+            if i == _PID:
+                new_parray.__content = SArray2D(content, local_line_size, local_col_size)
+
+        return new_parray
+
+    def distribute(self: 'PArray2D[T]', distribution_direction: Distribution) -> 'PArray2D[T]':
+        if distribution_direction == self.__distribution_direction:
+            return self
+        parray2d = PArray2D()
+        parray2d.__global_index = self.__global_index
+
+        line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
+        col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
+
+        old_distribution = self.__distribution
+
+        parray2d.__local_index = _local_index(distribution_direction, col_size, line_size, _PID)
+        parray2d.__distribution_direction = distribution_direction
+        parray2d.__distribution = [
+            _local_index(parray2d.__distribution_direction, col_size, line_size, i) for i in
+            range(_NPROCS)]
+        local_line_size = parray2d.__local_index[0][1] - parray2d.__local_index[0][0] + 1
+        local_col_size = parray2d.__local_index[1][1] - parray2d.__local_index[1][0] + 1
+        if distribution_direction == Distribution.COLUMN:
+            parray2d = self.__distribute_column(parray2d, local_line_size, local_col_size)
+        else:
+            parray2d = self.__distribute_line(parray2d, local_line_size, local_col_size, old_distribution)
+
         return parray2d
 
     def map(self: 'PArray2D[T]', unary_op: Callable[[T], V]) -> 'PArray2D[V]':
@@ -158,10 +188,12 @@ def map2(self: 'PArray2D[T]', binary_op: Callable[[T, U], V],
         return p_array2d
 
     def to_seq(self: 'PArray2D[T]') -> 'SArray2D[T]':
-        assert self.__distribution_direction == Distribution.LINE
-        col_size = self.__global_index[1][1] - self.__global_index[1][0] + 1
-        line_size = self.__global_index[0][1] - self.__global_index[0][0] + 1
-        content = self.get_partition()\
+        parray2d = self
+        if self.__distribution_direction == Distribution.COLUMN:
+            parray2d = self.distribute(Distribution.LINE)
+        col_size = parray2d.__global_index[1][1] - parray2d.__global_index[1][0] + 1
+        line_size = parray2d.__global_index[0][1] - parray2d.__global_index[0][0] + 1
+        content = parray2d.get_partition() \
             .reduce(lambda a_sarray, b_sarray: SArray2D.concat(a_sarray, b_sarray),
                     SArray2D([], 0, 0)).values
         return SArray2D(content, line_size, col_size)
diff --git a/pyske/examples/array/hello_world.py b/pyske/examples/array/hello_world.py
index 65e10f2..7f4f4eb 100644
--- a/pyske/examples/array/hello_world.py
+++ b/pyske/examples/array/hello_world.py
@@ -17,9 +17,13 @@ def __main():
     print(parray2d_line_init)
 
     print("Line to column distribution")
-    parray2d_column = parray2d_line_init.distribute()
+    parray2d_column = parray2d_line_init.distribute(Distribution.COLUMN)
     print(parray2d_column)
 
+    print("Column to line distribution")
+    parray2d_line = parray2d_column.distribute(Distribution.LINE)
+    print(parray2d_line)
+
     print("Column initialization")
     parray2d_column_init = PArray2D.init(lambda line, column: line * col_size + column,
                                          Distribution.COLUMN, col_size, line_size)
@@ -52,7 +56,7 @@ def __main():
     print(parray2d_column_init.map2(lambda x, y: x + y, parray2d_column))
 
     print("To seq")
-    print(parray2d_line_init.to_seq())
+    print(parray2d_column.to_seq())
 
 
 if __name__ == '__main__':

From afc28a0bfc35b0fd456f0b51cbc775e4805601b7 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Tue, 29 Jun 2021 13:50:52 +0200
Subject: [PATCH 51/53] bad signature distribute

---
 pyske/core/array/sarray2d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyske/core/array/sarray2d.py b/pyske/core/array/sarray2d.py
index e9f75f5..432df3f 100644
--- a/pyske/core/array/sarray2d.py
+++ b/pyske/core/array/sarray2d.py
@@ -90,7 +90,7 @@ def reduce(self: 'SArray2D[T]', binary_op: Callable[[T, T], T],
     def get_partition(self: 'SArray2D[T]') -> 'SList[SArray2D[T]]':
         return SList([self])
 
-    def distribute(self: 'SArray2D[T]') -> 'SArray2D[T]':
+    def distribute(self: 'SArray2D[T]', distribution_direction: Distribution) -> 'SArray2D[T]':
         return self
 
     def map2(self: 'SArray2D[T]', binary_op: Callable[[T, U], V],

From 543bf9cb95deb6d45bebd2e52c71279e043c5005 Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 30 Jun 2021 10:27:09 +0200
Subject: [PATCH 52/53] distribute signature correction

---
 pyske/core/array/array_interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index a366011..ecc6afb 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -84,7 +84,7 @@ def distribute(self: 'Array2D[T]', distribution_direction: Distribution) -> 'Arr
             >>> sarray2d.distribute()
             (   1   1   )
             (   1   1   )
-
+        :param distribution_direction: the distribution direction (LINE, COLUMN)
         :return: an array containing the same elements.
         """
 

From c16c720ec1bfcaf22d1bb12cbeb480e8f7369f4f Mon Sep 17 00:00:00 2001
From: Besnard Clement <clement.besnard2@etu.univ-orleans.fr>
Date: Wed, 30 Jun 2021 13:58:38 +0200
Subject: [PATCH 53/53] missing parameter doctest distribute

---
 pyske/core/array/array_interface.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyske/core/array/array_interface.py b/pyske/core/array/array_interface.py
index ecc6afb..35d3a24 100644
--- a/pyske/core/array/array_interface.py
+++ b/pyske/core/array/array_interface.py
@@ -81,9 +81,10 @@ def distribute(self: 'Array2D[T]', distribution_direction: Distribution) -> 'Arr
             >>> from pyske.core.array.sarray2d import SArray2D
             >>> from pyske.core.array.array_interface import Distribution
             >>> sarray2d = SArray2D.init(lambda i, j: 1, Distribution.LINE, col_size=2, line_size=2)
-            >>> sarray2d.distribute()
+            >>> sarray2d.distribute(Distribution.COLUMN)
             (   1   1   )
             (   1   1   )
+
         :param distribution_direction: the distribution direction (LINE, COLUMN)
         :return: an array containing the same elements.
         """