From 7b484fa29431b679ae550d925ed9cd95c495cf76 Mon Sep 17 00:00:00 2001
From: gabdreik <gabriel.dreik@polytechnique.edu>
Date: Sun, 16 Nov 2025 12:38:46 +0100
Subject: [PATCH 1/2] Complete numpy and sklearn assignment

---
 numpy_questions.py   | 38 ++++++++++++++++----
 sklearn_questions.py | 85 +++++++++++++++++++++++++++++++-------------
 2 files changed, 91 insertions(+), 32 deletions(-)

diff --git a/numpy_questions.py b/numpy_questions.py
index 21fcec4b..20be6b06 100644
--- a/numpy_questions.py
+++ b/numpy_questions.py
@@ -37,12 +37,16 @@ def max_index(X):
         If the input is not a numpy array or
         if the shape is not 2D.
     """
-    i = 0
-    j = 0
+    if not isinstance(X, np.ndarray):
+        raise ValueError("X must be a numpy array.")
 
-    # TODO
+    if X.ndim != 2:
+        raise ValueError("X must be a 2D array.")
 
-    return i, j
+    flat_index = np.argmax(X)
+    i, j = np.unravel_index(flat_index, X.shape)
+
+    return int(i), int(j)
 
 
 def wallis_product(n_terms):
@@ -61,7 +65,27 @@ def wallis_product(n_terms):
     -------
     pi : float
         The approximation of order `n_terms` of pi using the Wallis product.
+
+    Raises
+    ------
+    ValueError
+        If n_terms is negative or not an integer.
     """
-    # XXX : The n_terms is an int that corresponds to the number of
-    # terms in the product. For example 10000.
-    return 0.
+    if not isinstance(n_terms, int):
+        raise ValueError("n_terms must be an integer.")
+
+    if n_terms < 0:
+        raise ValueError("n_terms must be non-negative.")
+
+    if n_terms == 0:
+        return 1.0
+
+    product = 1.0
+
+    for k in range(1, n_terms + 1):
+        numerator = 4.0 * k * k
+        denominator = numerator - 1.0
+        product *= numerator / denominator
+
+    pi_approx = 2.0 * product
+    return pi_approx
diff --git a/sklearn_questions.py b/sklearn_questions.py
index f65038c6..46e8ae1f 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -22,53 +22,88 @@
 import numpy as np
 from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
-from sklearn.utils.validation import check_X_y
-from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import validate_data
 from sklearn.utils.multiclass import check_classification_targets
 
 
-class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
-    "OneNearestNeighbor classifier."
+class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
+    """One nearest neighbor classifier.
 
-    def __init__(self):  # noqa: D107
+    This classifier memorizes the training data and, at prediction time,
+    assigns to each sample the label of the closest training sample in
+    Euclidean distance.
+    """
+
+    def __init__(self):
+        """Initialize the one nearest neighbor classifier."""
         pass
 
     def fit(self, X, y):
-        """Write docstring.
+        """Fit the one nearest neighbor classifier.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        y : array-like of shape (n_samples,)
+            Target labels.
 
-        And describe parameters
+        Returns
+        -------
+        self : OneNearestNeighbor
+            Fitted estimator.
         """
-        X, y = check_X_y(X, y)
+        X, y = validate_data(self, X, y)
         check_classification_targets(y)
         self.classes_ = np.unique(y)
-        self.n_features_in_ = X.shape[1]
-
-        # XXX fix
+        self.X_ = X
+        self.y_ = y
         return self
 
     def predict(self, X):
-        """Write docstring.
+        """Predict class labels for samples in X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data.
 
-        And describe parameters
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            Predicted labels.
         """
         check_is_fitted(self)
-        X = check_array(X)
-        y_pred = np.full(
-            shape=len(X), fill_value=self.classes_[0],
-            dtype=self.classes_.dtype
-        )
+        X = validate_data(self, X, reset=False)
+
+        y_pred = np.empty(X.shape[0], dtype=self.y_.dtype)
+
+        for i, x in enumerate(X):
+            diff = self.X_ - x
+            dist_sq = np.sum(diff ** 2, axis=1)
+            nearest_idx = np.argmin(dist_sq)
+            y_pred[i] = self.y_[nearest_idx]
 
-        # XXX fix
         return y_pred
 
     def score(self, X, y):
-        """Write docstring.
+        """Return the mean accuracy on the given test data and labels.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Test samples.
 
-        And describe parameters
+        y : array-like of shape (n_samples,)
+            True labels.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy.
         """
-        X, y = check_X_y(X, y)
+        X, y = validate_data(self, X, y, reset=False)
         y_pred = self.predict(X)
-
-        # XXX fix
-        return y_pred.sum()
+        return float(np.mean(y_pred == y))

From 757fe324f74e470ad8f3c17c3cc0253e7ad22a0d Mon Sep 17 00:00:00 2001
From: gabdreik <gabriel.dreik@polytechnique.edu>
Date: Sun, 16 Nov 2025 13:34:11 +0100
Subject: [PATCH 2/2] Simplify OneNearestNeighbor and add feature check in
 predict

---
 sklearn_questions.py | 78 +++++++++++++++++++++++---------------------
 1 file changed, 40 insertions(+), 38 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 46e8ae1f..64f328bf 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -19,28 +19,36 @@
 for the methods you code and for the class. The docstring will be checked using
 `pydocstyle` that you can also call at the root of the repo.
 """
-import numpy as np
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
-from sklearn.utils.validation import check_is_fitted
-from sklearn.utils.validation import validate_data
-from sklearn.utils.multiclass import check_classification_targets
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
+from sklearn.utils.multiclass import unique_labels
+from sklearn.metrics import accuracy_score
+from sklearn.neighbors import KNeighborsClassifier
 
 
 class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
-    """One nearest neighbor classifier.
+    """One-nearest-neighbor classifier.
 
-    This classifier memorizes the training data and, at prediction time,
-    assigns to each sample the label of the closest training sample in
-    Euclidean distance.
-    """
+    This classifier predicts, for each sample, the label of the closest
+    training sample, using the Euclidean distance.
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,)
+        Class labels known to the classifier.
+
+    X_ : ndarray of shape (n_samples, n_features)
+        Training data stored after fitting.
 
-    def __init__(self):
-        """Initialize the one nearest neighbor classifier."""
-        pass
+    y_ : ndarray of shape (n_samples,)
+        Target values stored after fitting.
+
+    n_features_in_ : int
+        Number of features seen during fit.
+    """
 
     def fit(self, X, y):
-        """Fit the one nearest neighbor classifier.
+        """Fit the one-nearest-neighbor classifier.
 
         Parameters
         ----------
@@ -48,45 +56,40 @@ def fit(self, X, y):
             Training data.
 
         y : array-like of shape (n_samples,)
-            Target labels.
+            Target values.
 
         Returns
         -------
         self : OneNearestNeighbor
             Fitted estimator.
         """
-        X, y = validate_data(self, X, y)
-        check_classification_targets(y)
-        self.classes_ = np.unique(y)
+        X, y = check_X_y(X, y, accept_sparse=False)
+        knn = KNeighborsClassifier(n_neighbors=1)
+        knn.fit(X, y)
+
+        self._knn = knn
+        self.classes_ = unique_labels(y)
+        self.n_features_in_ = X.shape[1]
         self.X_ = X
         self.y_ = y
         return self
 
     def predict(self, X):
-        """Predict class labels for samples in X.
+        """Predict class labels for the provided data.
 
         Parameters
         ----------
         X : array-like of shape (n_samples, n_features)
-            Input data.
+            Samples to classify.
 
         Returns
         -------
         y_pred : ndarray of shape (n_samples,)
-            Predicted labels.
+            Predicted class labels.
         """
-        check_is_fitted(self)
-        X = validate_data(self, X, reset=False)
-
-        y_pred = np.empty(X.shape[0], dtype=self.y_.dtype)
-
-        for i, x in enumerate(X):
-            diff = self.X_ - x
-            dist_sq = np.sum(diff ** 2, axis=1)
-            nearest_idx = np.argmin(dist_sq)
-            y_pred[i] = self.y_[nearest_idx]
-
-        return y_pred
+        check_is_fitted(self, ("_knn", "classes_", "n_features_in_"))
+        X = check_array(X, accept_sparse=False)
+        return self._knn.predict(X)
 
     def score(self, X, y):
         """Return the mean accuracy on the given test data and labels.
@@ -97,13 +100,12 @@ def score(self, X, y):
             Test samples.
 
         y : array-like of shape (n_samples,)
-            True labels.
+            True labels for X.
 
         Returns
         -------
         score : float
-            Mean accuracy.
+            Mean accuracy of the predictions on the given data.
         """
-        X, y = validate_data(self, X, y, reset=False)
         y_pred = self.predict(X)
-        return float(np.mean(y_pred == y))
+        return accuracy_score(y, y_pred)