From f31b7d6e1393280f190b109bc5fd7785aba8c9bc Mon Sep 17 00:00:00 2001
From: Mathis Le Lagadec <mathislelagadec@Mathisfang-MacBook-Pro.local>
Date: Fri, 14 Nov 2025 14:46:54 +0100
Subject: [PATCH 1/4] Complete numpy and OneNearestNeighbor assignment

---
 numpy_questions.py   |  25 ++++++----
 sklearn_questions.py | 107 +++++++++++++++++++++++++++++++------------
 2 files changed, 94 insertions(+), 38 deletions(-)

diff --git a/numpy_questions.py b/numpy_questions.py
index 21fcec4b..a271c77e 100644
--- a/numpy_questions.py
+++ b/numpy_questions.py
@@ -37,12 +37,14 @@ def max_index(X):
         If the input is not a numpy array or
         if the shape is not 2D.
     """
-    i = 0
-    j = 0
+    if not isinstance(X, np.ndarray):
+        raise ValueError("X must be a numpy array.")
+    if X.ndim != 2:
+        raise ValueError("X must be a 2D array.")
 
-    # TODO
-
-    return i, j
+    flat_idx = np.argmax(X)
+    i, j = np.unravel_index(flat_idx, X.shape)
+    return int(i), int(j)
 
 
 def wallis_product(n_terms):
@@ -62,6 +64,13 @@ def wallis_product(n_terms):
     pi : float
         The approximation of order `n_terms` of pi using the Wallis product.
     """
-    # XXX : The n_terms is an int that corresponds to the number of
-    # terms in the product. For example 10000.
-    return 0.
+    if n_terms < 0:
+        raise ValueError("n_terms must be non-negative.")
+
+    if n_terms == 0:
+        return 1.0
+
+    n = np.arange(1, n_terms + 1, dtype=float)
+    terms = (2 * n / (2 * n - 1)) * (2 * n / (2 * n + 1))
+    product = np.prod(terms)
+    return float(2 * product)
diff --git a/sklearn_questions.py b/sklearn_questions.py
index f65038c6..8ccee0dd 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -6,7 +6,7 @@
 The nearest neighbor classifier predicts for a point X_i the target y_k of
 the training sample X_k which is the closest to X_i. We measure proximity with
 the Euclidean distance. The model will be evaluated with the accuracy (average
-number of samples corectly classified). You need to implement the `fit`,
+number of samples correctly classified). You need to implement the `fit`,
 `predict` and `score` methods for this class. The code you write should pass
 the test we implemented. You can run the tests by calling at the root of the
 repo `pytest test_sklearn_questions.py`.
@@ -16,59 +16,106 @@
 calling `flake8` at the root of the repo.
 
 Finally, you need to write docstring similar to the one in `numpy_questions`
-for the methods you code and for the class. The docstring will be checked using
-`pydocstyle` that you can also call at the root of the repo.
+for the methods you code and for the class. The docstring will be checked
+using `pydocstyle` that you can also call at the root of the repo.
 """
 import numpy as np
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
+from sklearn.base import ClassifierMixin, BaseEstimator
 from sklearn.utils.validation import check_X_y
-from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import validate_data
 from sklearn.utils.multiclass import check_classification_targets
 
 
-class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
-    "OneNearestNeighbor classifier."
+class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
+    """One-nearest-neighbor classifier.
+
+    This classifier predicts, for each input sample, the target of the
+    closest training sample in Euclidean distance.
+    """
 
     def __init__(self):  # noqa: D107
+        """Initialize the OneNearestNeighbor classifier."""
+        # This estimator has no hyper-parameters.
         pass
 
     def fit(self, X, y):
-        """Write docstring.
-
-        And describe parameters
+        """Fit the one-nearest-neighbor classifier.
+
+        The training samples and their labels are stored so that predictions
+        can be made by finding the closest training sample to each new point.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+        y : array-like of shape (n_samples,)
+            Target labels.
+
+        Returns
+        -------
+        self : OneNearestNeighbor
+            Fitted estimator.
         """
-        X, y = check_X_y(X, y)
+        # validate_data will set n_features_in_ and perform basic checks
+        X, y = validate_data(self, X, y)
         check_classification_targets(y)
+
+        # Store training data and targets
+        self.X_ = X
+        self.y_ = y
+
+        # Attributes expected by scikit-learn
         self.classes_ = np.unique(y)
-        self.n_features_in_ = X.shape[1]
 
-        # XXX fix
         return self
 
     def predict(self, X):
-        """Write docstring.
+        """Predict class labels for samples in X.
+
+        Each sample in X is assigned the label of the closest training sample
+        stored during :meth:`fit`, using the Euclidean distance.
 
-        And describe parameters
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input samples to classify.
+
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            Predicted class labels for each sample in X.
         """
-        check_is_fitted(self)
-        X = check_array(X)
-        y_pred = np.full(
-            shape=len(X), fill_value=self.classes_[0],
-            dtype=self.classes_.dtype
-        )
-
-        # XXX fix
+        check_is_fitted(self, attributes=["X_", "y_"])
+        # reset=False enforces consistency with n_features_in_
+        X = validate_data(self, X, reset=False)
+
+        # Compute squared Euclidean distances to all training samples:
+        # diff shape: (n_samples_test, n_samples_train, n_features)
+        diff = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :]
+        distances = np.sum(diff**2, axis=2)
+
+        # Index of nearest neighbor in the training set for each test sample
+        nearest_idx = np.argmin(distances, axis=1)
+        y_pred = self.y_[nearest_idx]
+
         return y_pred
 
     def score(self, X, y):
-        """Write docstring.
-
-        And describe parameters
+        """Return the mean accuracy on the given test data and labels.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples.
+        y : array-like of shape (n_samples,)
+            True labels for X.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of predictions on X with respect to y.
         """
         X, y = check_X_y(X, y)
         y_pred = self.predict(X)
-
-        # XXX fix
-        return y_pred.sum()
+        return float(np.mean(y_pred == y))
\ No newline at end of file

From 166aa825a0fc9a2b157f6469f44bf593de102e08 Mon Sep 17 00:00:00 2001
From: Mathis Le Lagadec <mathislelagadec@Mathisfang-MacBook-Pro.local>
Date: Fri, 14 Nov 2025 14:53:40 +0100
Subject: [PATCH 2/4] Complete numpy and OneNearestNeighbor assignment

---
 sklearn_questions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 8ccee0dd..4110c9c6 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -6,7 +6,7 @@
 The nearest neighbor classifier predicts for a point X_i the target y_k of
 the training sample X_k which is the closest to X_i. We measure proximity with
 the Euclidean distance. The model will be evaluated with the accuracy (average
-number of samples correctly classified). You need to implement the `fit`,
+number of samples corectly classified). You need to implement the `fit`,
 `predict` and `score` methods for this class. The code you write should pass
 the test we implemented. You can run the tests by calling at the root of the
 repo `pytest test_sklearn_questions.py`.

From 3a7d362db076799b8f6b577b38b612ecadb76be1 Mon Sep 17 00:00:00 2001
From: Mathis Le Lagadec <mathislelagadec@Mathisfang-MacBook-Pro.local>
Date: Fri, 14 Nov 2025 15:08:56 +0100
Subject: [PATCH 3/4] Complete numpy + OneNearestNeighbor solution and pass all
 tests

---
 sklearn_questions.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 4110c9c6..6098b8fe 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -6,7 +6,7 @@
 The nearest neighbor classifier predicts for a point X_i the target y_k of
 the training sample X_k which is the closest to X_i. We measure proximity with
 the Euclidean distance. The model will be evaluated with the accuracy (average
-number of samples corectly classified). You need to implement the `fit`,
+number of samples correctly classified). You need to implement the `fit`,
 `predict` and `score` methods for this class. The code you write should pass
 the test we implemented. You can run the tests by calling at the root of the
 repo `pytest test_sklearn_questions.py`.
@@ -21,9 +21,7 @@
 """
 import numpy as np
 from sklearn.base import ClassifierMixin, BaseEstimator
-from sklearn.utils.validation import check_X_y
-from sklearn.utils.validation import check_is_fitted
-from sklearn.utils.validation import validate_data
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
@@ -57,8 +55,7 @@ def fit(self, X, y):
         self : OneNearestNeighbor
             Fitted estimator.
         """
-        # validate_data will set n_features_in_ and perform basic checks
-        X, y = validate_data(self, X, y)
+        X, y = check_X_y(X, y)
         check_classification_targets(y)
 
         # Store training data and targets
@@ -67,6 +64,7 @@ def fit(self, X, y):
 
         # Attributes expected by scikit-learn
         self.classes_ = np.unique(y)
+        self.n_features_in_ = X.shape[1]
 
         return self
 
@@ -86,14 +84,22 @@ def predict(self, X):
         y_pred : ndarray of shape (n_samples,)
             Predicted class labels for each sample in X.
         """
-        check_is_fitted(self, attributes=["X_", "y_"])
-        # reset=False enforces consistency with n_features_in_
-        X = validate_data(self, X, reset=False)
+        check_is_fitted(self, attributes=["X_", "y_", "n_features_in_"])
+        X = check_array(X)
+
+        # Enforce consistency with the number of features seen during fit
+        n_features = X.shape[1]
+        if n_features != self.n_features_in_:
+            msg = (
+                f"X has {n_features} features, but {self.__class__.__name__} "
+                f"is expecting {self.n_features_in_} features as input"
+            )
+            raise ValueError(msg)
 
         # Compute squared Euclidean distances to all training samples:
         # diff shape: (n_samples_test, n_samples_train, n_features)
         diff = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :]
-        distances = np.sum(diff**2, axis=2)
+        distances = np.sum(diff ** 2, axis=2)
 
         # Index of nearest neighbor in the training set for each test sample
         nearest_idx = np.argmin(distances, axis=1)
@@ -118,4 +124,4 @@ def score(self, X, y):
         """
         X, y = check_X_y(X, y)
         y_pred = self.predict(X)
-        return float(np.mean(y_pred == y))
\ No newline at end of file
+        return float(np.mean(y_pred == y))

From 1fe36464f503ea07b262c37aec0b83bd13c21d24 Mon Sep 17 00:00:00 2001
From: Mathis Le Lagadec <mathislelagadec@Mathisfang-MacBook-Pro.local>
Date: Fri, 14 Nov 2025 15:14:04 +0100
Subject: [PATCH 4/4] Complete numpy + OneNearestNeighbor solution and pass all
 tests

---
 sklearn_questions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 6098b8fe..436f85e2 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -6,7 +6,7 @@
 The nearest neighbor classifier predicts for a point X_i the target y_k of
 the training sample X_k which is the closest to X_i. We measure proximity with
 the Euclidean distance. The model will be evaluated with the accuracy (average
-number of samples correctly classified). You need to implement the `fit`,
+number of samples corectly classified). You need to implement the `fit`,
 `predict` and `score` methods for this class. The code you write should pass
 the test we implemented. You can run the tests by calling at the root of the
 repo `pytest test_sklearn_questions.py`.