From 559dccc88025241c14a2e9c85da32cf3971de142 Mon Sep 17 00:00:00 2001
From: ryanbalech1 <ryan.balech@polytechnique.edu>
Date: Sun, 16 Nov 2025 14:46:06 +0100
Subject: [PATCH 1/6] Complete numpy assignment (Ryan Balech)

---
 numpy_questions.py | 88 ++++++++++++++++++----------------------------
 1 file changed, 35 insertions(+), 53 deletions(-)

diff --git a/numpy_questions.py b/numpy_questions.py
index 21fcec4b..ff84d8c8 100644
--- a/numpy_questions.py
+++ b/numpy_questions.py
@@ -1,67 +1,49 @@
-"""Assignment - using numpy and making a PR.
-
-The goals of this assignment are:
-    * Use numpy in practice with two easy exercises.
-    * Use automated tools to validate the code (`pytest` and `flake8`)
-    * Submit a Pull-Request on github to practice `git`.
-
-The two functions below are skeleton functions. The docstrings explain what
-are the inputs, the outputs and the expected error. Fill the function to
-complete the assignment. The code should be able to pass the test that we
-wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of
-the repo. It should say that 2 tests ran with success.
-
-We also ask to respect the pep8 convention: https://pep8.org.
-This will be enforced with `flake8`. You can check that there is no flake8
-errors by calling `flake8` at the root of the repo.
-"""
+# """Assignment - using numpy and making a PR.
+
+# The goals of this assignment are:
+#     * Use numpy in practice with two easy exercises.
+#     * Use automated tools to validate the code (`pytest` and `flake8`)
+#     * Submit a Pull-Request on github to practice `git`.
+
+# The two functions below are skeleton functions. The docstrings explain what
+# are the inputs, the outputs and the expected error. Fill the function to
+# complete the assignment. The code should be able to pass the test that we
+# wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of
+# the repo. It should say that 2 tests ran with success.
+
+# We also ask to respect the pep8 convention: https://pep8.org.
+# This will be enforced with `flake8`. You can check that there is no flake8
+# errors by calling `flake8` at the root of the repo.
+# """
 import numpy as np
 
 
 def max_index(X):
-    """Return the index of the maximum in a numpy array.
-
-    Parameters
-    ----------
-    X : ndarray of shape (n_samples, n_features)
-        The input array.
+    if not isinstance(X, np.ndarray):
+        raise ValueError("Input must be a numpy array.")
+    if X.ndim != 2:
+        raise ValueError("Input must be a 2D numpy array.")
 
-    Returns
-    -------
-    (i, j) : tuple(int)
-        The row and columnd index of the maximum.
+    # Flatten index of the max
+    flat_index = np.argmax(X)
 
-    Raises
-    ------
-    ValueError
-        If the input is not a numpy array or
-        if the shape is not 2D.
-    """
-    i = 0
-    j = 0
-
-    # TODO
+    # Convert to 2D index
+    i, j = np.unravel_index(flat_index, X.shape)
 
     return i, j
 
 
 def wallis_product(n_terms):
-    """Implement the Wallis product to compute an approximation of pi.
+    if n_terms < 0:
+        raise ValueError("n_terms must be non-negative.")
+
+    if n_terms == 0:
+        return 1.0
+
+    n = np.arange(1, n_terms + 1)
+    terms = (4 * n * n) / (4 * n * n - 1)
 
-    See:
-    https://en.wikipedia.org/wiki/Wallis_product
+    product = np.prod(terms)
 
-    Parameters
-    ----------
-    n_terms : int
-        Number of steps in the Wallis product. Note that `n_terms=0` will
-        consider the product to be `1`.
+    return 2 * product
 
-    Returns
-    -------
-    pi : float
-        The approximation of order `n_terms` of pi using the Wallis product.
-    """
-    # XXX : The n_terms is an int that corresponds to the number of
-    # terms in the product. For example 10000.
-    return 0.

From 814f8b395ec4416bb5127c774e870a103099a3fd Mon Sep 17 00:00:00 2001
From: ryanbalech1 <ryan.balech@polytechnique.edu>
Date: Sun, 16 Nov 2025 14:53:45 +0100
Subject: [PATCH 2/6] Restore docstrings and fix flake8

---
 numpy_questions.py | 77 ++++++++++++++++++++++++++++++----------------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/numpy_questions.py b/numpy_questions.py
index ff84d8c8..a53ad088 100644
--- a/numpy_questions.py
+++ b/numpy_questions.py
@@ -1,49 +1,74 @@
-# """Assignment - using numpy and making a PR.
-
-# The goals of this assignment are:
-#     * Use numpy in practice with two easy exercises.
-#     * Use automated tools to validate the code (`pytest` and `flake8`)
-#     * Submit a Pull-Request on github to practice `git`.
-
-# The two functions below are skeleton functions. The docstrings explain what
-# are the inputs, the outputs and the expected error. Fill the function to
-# complete the assignment. The code should be able to pass the test that we
-# wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of
-# the repo. It should say that 2 tests ran with success.
-
-# We also ask to respect the pep8 convention: https://pep8.org.
-# This will be enforced with `flake8`. You can check that there is no flake8
-# errors by calling `flake8` at the root of the repo.
-# """
+"""Assignment - using numpy and making a PR.
+
+The goals of this assignment are:
+    * Use numpy in practice with two easy exercises.
+    * Use automated tools to validate the code (`pytest` and `flake8`)
+    * Submit a Pull-Request on github to practice `git`.
+
+The two functions below are skeleton functions. The docstrings explain what
+are the inputs, the outputs and the expected error. Fill the function to
+complete the assignment. The code should be able to pass the test that we
+wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of
+the repo. It should say that 2 tests ran with success.
+
+We also ask to respect the pep8 convention: https://pep8.org.
+This will be enforced with `flake8`. You can check that there is no flake8
+errors by calling `flake8` at the root of the repo.
+"""
 import numpy as np
 
 
 def max_index(X):
+    """Return the index of the maximum in a numpy array.
+
+    Parameters
+    ----------
+    X : ndarray of shape (n_samples, n_features)
+        The input array.
+
+    Returns
+    -------
+    (i, j) : tuple(int)
+        The row and columnd index of the maximum.
+
+    Raises
+    ------
+    ValueError
+        If the input is not a numpy array or
+        if the shape is not 2D.
+    """
     if not isinstance(X, np.ndarray):
         raise ValueError("Input must be a numpy array.")
     if X.ndim != 2:
         raise ValueError("Input must be a 2D numpy array.")
 
-    # Flatten index of the max
     flat_index = np.argmax(X)
-
-    # Convert to 2D index
     i, j = np.unravel_index(flat_index, X.shape)
-
     return i, j
 
 
 def wallis_product(n_terms):
+    """Implement the Wallis product to compute an approximation of pi.
+
+    See:
+    https://en.wikipedia.org/wiki/Wallis_product
+
+    Parameters
+    ----------
+    n_terms : int
+        Number of steps in the Wallis product. Note that `n_terms=0` will
+        consider the product to be `1`.
+
+    Returns
+    -------
+    pi : float
+        The approximation of order `n_terms` of pi using the Wallis product.
+    """
     if n_terms < 0:
         raise ValueError("n_terms must be non-negative.")
-
     if n_terms == 0:
         return 1.0
 
     n = np.arange(1, n_terms + 1)
     terms = (4 * n * n) / (4 * n * n - 1)
-
     product = np.prod(terms)
-
-    return 2 * product
-

From b9784aa1f45558988e37d5a07648e6113291ee24 Mon Sep 17 00:00:00 2001
From: ryanbalech1 <ryan.balech@polytechnique.edu>
Date: Sun, 16 Nov 2025 14:59:07 +0100
Subject: [PATCH 3/6] Fix Wallis product return

---
 numpy_questions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/numpy_questions.py b/numpy_questions.py
index a53ad088..853790ed 100644
--- a/numpy_questions.py
+++ b/numpy_questions.py
@@ -72,3 +72,4 @@ def wallis_product(n_terms):
     n = np.arange(1, n_terms + 1)
     terms = (4 * n * n) / (4 * n * n - 1)
     product = np.prod(terms)
+    return 2 * product

From b20641bcb19d409f4114e69d019c1c968f948769 Mon Sep 17 00:00:00 2001
From: ryanbalech1 <ryan.balech@polytechnique.edu>
Date: Sun, 16 Nov 2025 15:25:32 +0100
Subject: [PATCH 4/6] Fix OneNearestNeighbor implementation and mixin order

---
 sklearn_questions.py | 99 +++++++++++++++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 28 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index f65038c6..bf6b6299 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -20,55 +20,98 @@
 `pydocstyle` that you can also call at the root of the repo.
 """
 import numpy as np
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
-from sklearn.utils.validation import check_X_y
-from sklearn.utils.validation import check_array
-from sklearn.utils.validation import check_is_fitted
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils.validation import validate_data, check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
-class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
-    "OneNearestNeighbor classifier."
+class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
+    """One nearest neighbor classifier.
+
+    This estimator predicts the label of each sample using the label of
+    the closest training sample according to the Euclidean distance.
+    """
 
     def __init__(self):  # noqa: D107
+        # No hyper-parameters for this simple estimator.
         pass
 
     def fit(self, X, y):
-        """Write docstring.
-
-        And describe parameters
+        """Fit the OneNearestNeighbor classifier.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training input samples.
+        y : array-like of shape (n_samples,)
+            Target labels.
+
+        Returns
+        -------
+        self : OneNearestNeighbor
+            Fitted estimator.
         """
-        X, y = check_X_y(X, y)
+        # validate_data sets n_features_in_ and does input validation
+        X, y = validate_data(self, X, y)
         check_classification_targets(y)
+
+        # Store training data
+        self.X_train_ = X
+        self.y_train_ = y
+
+        # Attributes expected by scikit-learn
         self.classes_ = np.unique(y)
-        self.n_features_in_ = X.shape[1]
 
-        # XXX fix
         return self
 
     def predict(self, X):
-        """Write docstring.
+        """Predict class labels for the provided samples.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples.
 
-        And describe parameters
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            Predicted class labels.
         """
         check_is_fitted(self)
-        X = check_array(X)
-        y_pred = np.full(
-            shape=len(X), fill_value=self.classes_[0],
-            dtype=self.classes_.dtype
-        )
 
-        # XXX fix
+        # Use validate_data with reset=False so n_features_in_ is checked
+        X = validate_data(self, X, reset=False)
+
+        # Compute Euclidean distance from each test point to each train point
+        # X shape: (n_test, n_features)
+        # self.X_train_ shape: (n_train, n_features)
+        diff = X[:, np.newaxis, :] - self.X_train_[np.newaxis, :, :]
+        distances = np.linalg.norm(diff, axis=2)  # (n_test, n_train)
+
+        # Index of the nearest neighbor for each test sample
+        nearest_idx = np.argmin(distances, axis=1)
+
+        # Predicted label is label of the nearest training point
+        y_pred = self.y_train_[nearest_idx]
+
         return y_pred
 
     def score(self, X, y):
-        """Write docstring.
-
-        And describe parameters
+        """Compute the mean accuracy on the given test data and labels.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples.
+        y : array-like of shape (n_samples,)
+            True labels for X.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of predictions on X versus y.
         """
-        X, y = check_X_y(X, y)
+        # Validate and also check n_features_in_ against what was seen in fit
+        X, y = validate_data(self, X, y, reset=False)
         y_pred = self.predict(X)
-
-        # XXX fix
-        return y_pred.sum()
+        return float(np.mean(y_pred == y))

From 8ef8b66e9a97120bf90b119ccc447ed9b53ec539 Mon Sep 17 00:00:00 2001
From: ryanbalech1 <ryan.balech@polytechnique.edu>
Date: Sun, 16 Nov 2025 15:35:21 +0100
Subject: [PATCH 5/6] Remove validate_data import to fix CI

---
 sklearn_questions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index bf6b6299..cec65b08 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -20,8 +20,8 @@
 `pydocstyle` that you can also call at the root of the repo.
 """
 import numpy as np
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.utils.validation import validate_data, check_is_fitted
+from sklearn.base import ClassifierMixin, BaseEstimator
+from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 

From 84c554308c7c25add4dff8e6cb9901adce3fd02a Mon Sep 17 00:00:00 2001
From: ryanbalech1 <ryan.balech@polytechnique.edu>
Date: Sun, 16 Nov 2025 15:46:01 +0100
Subject: [PATCH 6/6] Implement OneNearestNeighbor without validate_data

---
 sklearn_questions.py | 86 ++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 52 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index cec65b08..879a9f3f 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -2,38 +2,18 @@
 
 The goal of this assignment is to implement by yourself a scikit-learn
 estimator for the OneNearestNeighbor and check that it is working properly.
-
-The nearest neighbor classifier predicts for a point X_i the target y_k of
-the training sample X_k which is the closest to X_i. We measure proximity with
-the Euclidean distance. The model will be evaluated with the accuracy (average
-number of samples corectly classified). You need to implement the `fit`,
-`predict` and `score` methods for this class. The code you write should pass
-the test we implemented. You can run the tests by calling at the root of the
-repo `pytest test_sklearn_questions.py`.
-
-We also ask to respect the pep8 convention: https://pep8.org. This will be
-enforced with `flake8`. You can check that there is no flake8 errors by
-calling `flake8` at the root of the repo.
-
-Finally, you need to write docstring similar to the one in `numpy_questions`
-for the methods you code and for the class. The docstring will be checked using
-`pydocstyle` that you can also call at the root of the repo.
 """
 import numpy as np
-from sklearn.base import ClassifierMixin, BaseEstimator
-from sklearn.utils.validation import check_is_fitted
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
 
 class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
-    """One nearest neighbor classifier.
-
-    This estimator predicts the label of each sample using the label of
-    the closest training sample according to the Euclidean distance.
-    """
+    """One nearest neighbor classifier."""
 
     def __init__(self):  # noqa: D107
-        # No hyper-parameters for this simple estimator.
+        # no hyperparameters
         pass
 
     def fit(self, X, y):
@@ -43,24 +23,22 @@ def fit(self, X, y):
         ----------
         X : array-like of shape (n_samples, n_features)
             Training input samples.
+
         y : array-like of shape (n_samples,)
-            Target labels.
+            Training class labels.
 
         Returns
         -------
         self : OneNearestNeighbor
-            Fitted estimator.
+            The fitted classifier.
         """
-        # validate_data sets n_features_in_ and does input validation
-        X, y = validate_data(self, X, y)
+        X, y = check_X_y(X, y)
         check_classification_targets(y)
 
-        # Store training data
-        self.X_train_ = X
-        self.y_train_ = y
-
-        # Attributes expected by scikit-learn
         self.classes_ = np.unique(y)
+        self.X_ = X
+        self.y_ = y
+        self.n_features_in_ = X.shape[1]
 
         return self
 
@@ -77,41 +55,45 @@ def predict(self, X):
         y_pred : ndarray of shape (n_samples,)
             Predicted class labels.
         """
-        check_is_fitted(self)
-
-        # Use validate_data with reset=False so n_features_in_ is checked
-        X = validate_data(self, X, reset=False)
-
-        # Compute Euclidean distance from each test point to each train point
+        check_is_fitted(self, attributes=["X_", "y_"])
+        X = check_array(X)
+
+        # Enforce consistency of number of features with training data
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError(
+                f"X has {X.shape[1]} features, but "
+                f"{self.__class__.__name__} is expecting "
+                f"{self.n_features_in_} features as input"
+            )
+
+        # Compute Euclidean distances to all training points
         # X shape: (n_test, n_features)
-        # self.X_train_ shape: (n_train, n_features)
-        diff = X[:, np.newaxis, :] - self.X_train_[np.newaxis, :, :]
-        distances = np.linalg.norm(diff, axis=2)  # (n_test, n_train)
-
-        # Index of the nearest neighbor for each test sample
+        # self.X_ shape: (n_train, n_features)
+        distances = np.linalg.norm(
+            self.X_[np.newaxis, :, :] - X[:, np.newaxis, :],
+            axis=2,
+        )
         nearest_idx = np.argmin(distances, axis=1)
-
-        # Predicted label is label of the nearest training point
-        y_pred = self.y_train_[nearest_idx]
+        y_pred = self.y_[nearest_idx]
 
         return y_pred
 
     def score(self, X, y):
-        """Compute the mean accuracy on the given test data and labels.
+        """Return the accuracy of the classifier on the given test data.
 
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
             Test samples.
+
         y : array-like of shape (n_samples,)
-            True labels for X.
+            True class labels.
 
         Returns
         -------
         score : float
-            Mean accuracy of predictions on X versus y.
+            Classification accuracy.
         """
-        # Validate and also check n_features_in_ against what was seen in fit
-        X, y = validate_data(self, X, y, reset=False)
+        X, y = check_X_y(X, y)
         y_pred = self.predict(X)
         return float(np.mean(y_pred == y))