From 038a5daee0b941ef0c1143acf3618391f26a65ba Mon Sep 17 00:00:00 2001
From: Arthur <arthur_gaillard@yahoo.com>
Date: Sat, 15 Nov 2025 15:05:15 +0100
Subject: [PATCH 1/5] Part B ArthurGaillard

---
 numpy_questions.py   | 26 +++++++++---
 sklearn_questions.py | 95 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 94 insertions(+), 27 deletions(-)

diff --git a/numpy_questions.py b/numpy_questions.py
index 21fcec4b..a0f57401 100644
--- a/numpy_questions.py
+++ b/numpy_questions.py
@@ -15,6 +15,7 @@
 This will be enforced with `flake8`. You can check that there is no flake8
 errors by calling `flake8` at the root of the repo.
 """
+
 import numpy as np
 
 
@@ -37,12 +38,20 @@ def max_index(X):
         If the input is not a numpy array or
         if the shape is not 2D.
     """
-    i = 0
-    j = 0
+    if not isinstance(X, np.ndarray) or (X.ndim != 2):
+        raise ValueError
 
-    # TODO
+    max_i = 0
+    max_j = 0
+    max_val = X[0][0]
 
-    return i, j
+    for i in range(X.shape[0]):
+        for j in range(X.shape[1]):
+            if X[i][j] > max_val:
+                max_val = X[i][j]
+                max_i = i
+                max_j = j
+    return max_i, max_j
 
 
 def wallis_product(n_terms):
@@ -64,4 +73,11 @@ def wallis_product(n_terms):
     """
     # XXX : The n_terms is an int that corresponds to the number of
     # terms in the product. For example 10000.
-    return 0.
+    pi = 1
+
+    if n_terms == 0:
+        return pi
+    else:
+        for i in range(1, n_terms + 1):
+            pi = pi * (4 * i**2) / ((4 * i**2) - 1)
+        return 2 * pi
diff --git a/sklearn_questions.py b/sklearn_questions.py
index f65038c6..0c1d4f01 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -20,55 +20,106 @@
 `pydocstyle` that you can also call at the root of the repo.
 """
 import numpy as np
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
+from sklearn.base import ClassifierMixin, BaseEstimator
 from sklearn.utils.validation import check_X_y
-from sklearn.utils.validation import check_array
+from sklearn.utils.validation import validate_data
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
 class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
-    "OneNearestNeighbor classifier."
+    """OneNearestNeighbor classifier."""
 
     def __init__(self):  # noqa: D107
         pass
 
     def fit(self, X, y):
-        """Write docstring.
-
-        And describe parameters
+        """Fit the OneNearestNeighbor classifier.
+    
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training samples.
+        y : array-like of shape (n_samples,)
+            Target labels.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
         """
-        X, y = check_X_y(X, y)
+        X, y = validate_data(
+            self,
+            X, y,
+            ensure_2d=True,
+            dtype=None
+            )
+        
         check_classification_targets(y)
+        
+        self.X_ = X
+        self.y_ = y
+
         self.classes_ = np.unique(y)
         self.n_features_in_ = X.shape[1]
 
-        # XXX fix
         return self
 
     def predict(self, X):
-        """Write docstring.
-
-        And describe parameters
+        """Predict the label of each sample in X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The samples for which we want to guess the label. They must have the 
+            same number of features as the data used during ``fit``.
+
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            The predicted labels. For each sample, the model looks for the closest
+            point in the training set and returns its label.
         """
+
         check_is_fitted(self)
-        X = check_array(X)
-        y_pred = np.full(
-            shape=len(X), fill_value=self.classes_[0],
-            dtype=self.classes_.dtype
+        X = validate_data(
+            self,
+            X,
+            ensure_2d=True,
+            dtype=None,
+            reset=False
         )
 
-        # XXX fix
+        distance = np.linalg.norm(self.X_[None,:,:] - X[:,None,:], axis=2)
+        nearest_index = np.argmin(distance, axis=1)
+        y_pred = self.y_[nearest_index]
+        
         return y_pred
 
     def score(self, X, y):
-        """Write docstring.
+        """Compute the accuracy of the classifier.
 
-        And describe parameters
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples used to evaluate the model.
+
+        y : array-like of shape (n_samples,)
+            True labels corresponding to X.
+
+        Returns
+        -------
+        score : float
+            The accuracy of the predictions, between 0 and 1.
         """
-        X, y = check_X_y(X, y)
+        X, y = validate_data(
+            self,
+            X, y,
+            ensure_2d=True,
+            dtype=None,
+            reset=False
+        )
         y_pred = self.predict(X)
 
-        # XXX fix
-        return y_pred.sum()
+
+        return (y_pred == y).mean()

From d046cb003cd142a187636c8270d25cfe703cd18c Mon Sep 17 00:00:00 2001
From: Arthur <arthur_gaillard@yahoo.com>
Date: Sat, 15 Nov 2025 15:17:36 +0100
Subject: [PATCH 2/5] Part B ArthurGaillard

---
 sklearn_questions.py | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 0c1d4f01..2d855827 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -19,15 +19,15 @@
 for the methods you code and for the class. The docstring will be checked using
 `pydocstyle` that you can also call at the root of the repo.
 """
+
 import numpy as np
 from sklearn.base import ClassifierMixin, BaseEstimator
-from sklearn.utils.validation import check_X_y
 from sklearn.utils.validation import validate_data
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
-class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
+class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
     """OneNearestNeighbor classifier."""
 
     def __init__(self):  # noqa: D107
@@ -35,7 +35,7 @@ def __init__(self):  # noqa: D107
 
     def fit(self, X, y):
         """Fit the OneNearestNeighbor classifier.
-    
+
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
@@ -48,15 +48,10 @@ def fit(self, X, y):
         self : object
             Fitted estimator.
         """
-        X, y = validate_data(
-            self,
-            X, y,
-            ensure_2d=True,
-            dtype=None
-            )
-        
+        X, y = validate_data(self, X, y, ensure_2d=True, dtype="numeric")
+
         check_classification_targets(y)
-        
+
         self.X_ = X
         self.y_ = y
 
@@ -71,29 +66,28 @@ def predict(self, X):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            The samples for which we want to guess the label. They must have the 
-            same number of features as the data used during ``fit``.
+            The samples for which we want to guess the label. They must have
+            the same number of features as the data used during ``fit``.
 
         Returns
         -------
         y_pred : ndarray of shape (n_samples,)
-            The predicted labels. For each sample, the model looks for the closest
-            point in the training set and returns its label.
+            The predicted labels. For each sample, the model looks for the
+            closest point in the training set and returns its label.
         """
-
         check_is_fitted(self)
         X = validate_data(
             self,
             X,
             ensure_2d=True,
-            dtype=None,
+            dtype="numeric",
             reset=False
-        )
+            )
 
-        distance = np.linalg.norm(self.X_[None,:,:] - X[:,None,:], axis=2)
+        distance = np.linalg.norm(self.X_[None, :, :] - X[:, None, :], axis=2)
         nearest_index = np.argmin(distance, axis=1)
         y_pred = self.y_[nearest_index]
-        
+
         return y_pred
 
     def score(self, X, y):
@@ -118,8 +112,7 @@ def score(self, X, y):
             ensure_2d=True,
             dtype=None,
             reset=False
-        )
+            )
         y_pred = self.predict(X)
 
-
         return (y_pred == y).mean()

From 02b6f95f4db0461b4527210b838d78c983af81c5 Mon Sep 17 00:00:00 2001
From: Arthur <arthur_gaillard@yahoo.com>
Date: Sat, 15 Nov 2025 15:32:37 +0100
Subject: [PATCH 3/5] Part B ArthurGaillard

---
 sklearn_questions.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 2d855827..ce921e0a 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -48,7 +48,12 @@ def fit(self, X, y):
         self : object
             Fitted estimator.
         """
-        X, y = validate_data(self, X, y, ensure_2d=True, dtype="numeric")
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            ensure_2d=True,
+            dtype="numeric")
 
         check_classification_targets(y)
 

From a73702cbc454fe6e281a4e3702a6237eb322e606 Mon Sep 17 00:00:00 2001
From: Arthur <arthur_gaillard@yahoo.com>
Date: Sat, 15 Nov 2025 16:03:51 +0100
Subject: [PATCH 4/5] Part B ArthurGaillard

---
 sklearn_questions.py | 75 ++++++++++++++------------------------------
 1 file changed, 24 insertions(+), 51 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index ce921e0a..41366c14 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -1,29 +1,8 @@
-"""Assignment - making a sklearn estimator.
-
-The goal of this assignment is to implement by yourself a scikit-learn
-estimator for the OneNearestNeighbor and check that it is working properly.
-
-The nearest neighbor classifier predicts for a point X_i the target y_k of
-the training sample X_k which is the closest to X_i. We measure proximity with
-the Euclidean distance. The model will be evaluated with the accuracy (average
-number of samples corectly classified). You need to implement the `fit`,
-`predict` and `score` methods for this class. The code you write should pass
-the test we implemented. You can run the tests by calling at the root of the
-repo `pytest test_sklearn_questions.py`.
-
-We also ask to respect the pep8 convention: https://pep8.org. This will be
-enforced with `flake8`. You can check that there is no flake8 errors by
-calling `flake8` at the root of the repo.
-
-Finally, you need to write docstring similar to the one in `numpy_questions`
-for the methods you code and for the class. The docstring will be checked using
-`pydocstyle` that you can also call at the root of the repo.
-"""
+"""Assignment - making a sklearn estimator."""
 
 import numpy as np
 from sklearn.base import ClassifierMixin, BaseEstimator
-from sklearn.utils.validation import validate_data
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_array, check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
@@ -48,18 +27,12 @@ def fit(self, X, y):
         self : object
             Fitted estimator.
         """
-        X, y = validate_data(
-            self,
-            X,
-            y,
-            ensure_2d=True,
-            dtype="numeric")
-
+        X = check_array(X, ensure_2d=True)
+        y = check_array(y, ensure_2d=False)
         check_classification_targets(y)
 
         self.X_ = X
         self.y_ = y
-
         self.classes_ = np.unique(y)
         self.n_features_in_ = X.shape[1]
 
@@ -71,29 +44,27 @@ def predict(self, X):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            The samples for which we want to guess the label. They must have
-            the same number of features as the data used during ``fit``.
+            Samples for which to predict labels.
 
         Returns
         -------
         y_pred : ndarray of shape (n_samples,)
-            The predicted labels. For each sample, the model looks for the
-            closest point in the training set and returns its label.
+            Predicted labels.
         """
         check_is_fitted(self)
-        X = validate_data(
-            self,
-            X,
-            ensure_2d=True,
-            dtype="numeric",
-            reset=False
+        X = check_array(X, ensure_2d=True)
+
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError(
+                f"X has {X.shape[1]} features, but "
+                f"{self.__class__.__name__} is expecting "
+                f"{self.n_features_in_} features as input"
             )
 
         distance = np.linalg.norm(self.X_[None, :, :] - X[:, None, :], axis=2)
         nearest_index = np.argmin(distance, axis=1)
-        y_pred = self.y_[nearest_index]
 
-        return y_pred
+        return self.y_[nearest_index]
 
     def score(self, X, y):
         """Compute the accuracy of the classifier.
@@ -102,21 +73,23 @@ def score(self, X, y):
         ----------
         X : array-like of shape (n_samples, n_features)
             Test samples used to evaluate the model.
-
         y : array-like of shape (n_samples,)
-            True labels corresponding to X.
+            True labels.
 
         Returns
         -------
         score : float
             The accuracy of the predictions, between 0 and 1.
         """
-        X, y = validate_data(
-            self,
-            X, y,
-            ensure_2d=True,
-            dtype=None,
-            reset=False
+        check_is_fitted(self)
+        X = check_array(X, ensure_2d=True)
+        y = check_array(y, ensure_2d=False)
+
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError(
+                f"X has {X.shape[1]} features, but "
+                f"{self.__class__.__name__} is expecting "
+                f"{self.n_features_in_} features as input"
             )
         y_pred = self.predict(X)
 

From db97db64ccbc421929836f83855f37192fbff3c4 Mon Sep 17 00:00:00 2001
From: Arthur <arthur_gaillard@yahoo.com>
Date: Sat, 15 Nov 2025 16:13:44 +0100
Subject: [PATCH 5/5] Part B ArthurGaillard

---
 sklearn_questions.py | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 41366c14..94115f3c 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -1,8 +1,8 @@
 """Assignment - making a sklearn estimator."""
 
 import numpy as np
-from sklearn.base import ClassifierMixin, BaseEstimator
-from sklearn.utils.validation import check_array, check_is_fitted
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
@@ -27,8 +27,7 @@ def fit(self, X, y):
         self : object
             Fitted estimator.
         """
-        X = check_array(X, ensure_2d=True)
-        y = check_array(y, ensure_2d=False)
+        X, y = check_X_y(X, y)
         check_classification_targets(y)
 
         self.X_ = X
@@ -39,32 +38,33 @@ def fit(self, X, y):
         return self
 
     def predict(self, X):
-        """Predict the label of each sample in X.
+        """Predict labels for the input samples.
 
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Samples for which to predict labels.
+            Samples to classify. Must have same number of features
+            as training data.
 
         Returns
         -------
         y_pred : ndarray of shape (n_samples,)
-            Predicted labels.
+            Predicted labels for each input sample.
         """
         check_is_fitted(self)
-        X = check_array(X, ensure_2d=True)
+        X = check_array(X)
 
         if X.shape[1] != self.n_features_in_:
             raise ValueError(
                 f"X has {X.shape[1]} features, but "
                 f"{self.__class__.__name__} is expecting "
-                f"{self.n_features_in_} features as input"
+                f"{self.n_features_in_} features as input."
             )
 
-        distance = np.linalg.norm(self.X_[None, :, :] - X[:, None, :], axis=2)
-        nearest_index = np.argmin(distance, axis=1)
+        distances = np.linalg.norm(self.X_[None, :, :] - X[:, None, :], axis=2)
+        nearest_idx = np.argmin(distances, axis=1)
 
-        return self.y_[nearest_index]
+        return self.y_[nearest_idx]
 
     def score(self, X, y):
         """Compute the accuracy of the classifier.
@@ -72,25 +72,24 @@ def score(self, X, y):
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Test samples used to evaluate the model.
+            Samples used to evaluate the model.
         y : array-like of shape (n_samples,)
-            True labels.
+            True labels corresponding to X.
 
         Returns
         -------
         score : float
-            The accuracy of the predictions, between 0 and 1.
+            Accuracy of predictions, between 0 and 1.
         """
         check_is_fitted(self)
-        X = check_array(X, ensure_2d=True)
-        y = check_array(y, ensure_2d=False)
+        X, y = check_X_y(X, y)
 
         if X.shape[1] != self.n_features_in_:
             raise ValueError(
                 f"X has {X.shape[1]} features, but "
                 f"{self.__class__.__name__} is expecting "
-                f"{self.n_features_in_} features as input"
+                f"{self.n_features_in_} features as input."
             )
-        y_pred = self.predict(X)
 
-        return (y_pred == y).mean()
+        y_pred = self.predict(X)
+        return np.mean(y_pred == y)