mathurinm · Atticsu · Nov 13, 2025 · Nov 13, 2025
diff --git a/numpy_questions.py b/numpy_questions.py
@@ -37,12 +37,18 @@ def max_index(X):
         If the input is not a numpy array or
         if the shape is not 2D.
     """
-    i = 0
-    j = 0
+    # Validate input
+    if not isinstance(X, np.ndarray):
+        raise ValueError("X must be a numpy ndarray")
+    if X.ndim != 2:
+        raise ValueError(
+            "X must be a 2D array of shape (n_samples, n_features)"
+        )
 
-    # TODO
-
-    return i, j
+    # Find flat index of max then convert to 2D indices
+    flat_idx = np.argmax(X)
+    i, j = np.unravel_index(flat_idx, X.shape)
+    return int(i), int(j)
 
 
 def wallis_product(n_terms):
@@ -64,4 +70,12 @@ def wallis_product(n_terms):
     """
     # XXX : The n_terms is an int that corresponds to the number of
     # terms in the product. For example 10000.
-    return 0.
+    if n_terms == 0:
+        # By convention the empty product equals 1
+        return 1.0
+
+    n = np.arange(1, int(n_terms) + 1, dtype=float)
+    terms = (4 * n * n) / (4 * n * n - 1)
+    product = np.prod(terms)
+    # pi = 2 * product
+    return float(2.0 * product)
diff --git a/sklearn_questions.py b/sklearn_questions.py
@@ -22,53 +22,150 @@
 import numpy as np
 from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
+from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
 from sklearn.utils.validation import check_array
-from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.multiclass import check_classification_targets
 
-
-class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
-    "OneNearestNeighbor classifier."
+# Compatibility shim for scikit-learn versions without `validate_data`
+try:  # pragma: no cover - import behavior depends on sklearn version
+    from sklearn.utils.validation import validate_data as _sk_validate_data
+except Exception:  # pragma: no cover
+    _sk_validate_data = None
+
+
+def _validate_data(estimator, X, y=None, reset=True):
+    """Validate X and optional y with sklearn or a local fallback.
+
+    - If sklearn provides `validate_data`, delegate to it.
+    - Otherwise, use `check_X_y` / `check_array` and ensure `n_features_in_`
+      is set during fit (reset=True) and enforced during predict/score
+      (reset=False) with the standard error message format expected by
+      estimator checks.
+    """
+    if _sk_validate_data is not None:
+        if y is None:
+            return _sk_validate_data(estimator, X, reset=reset)
+        return _sk_validate_data(estimator, X, y, reset=reset)
+
+    # Fallback for older scikit-learn versions
+    if y is None:
+        X_checked = check_array(X)
+        if reset:
+            estimator.n_features_in_ = X_checked.shape[1]
+        else:
+            if (
+                hasattr(estimator, "n_features_in_")
+                and X_checked.shape[1] != estimator.n_features_in_
+            ):
+                raise ValueError(
+                    f"X has {X_checked.shape[1]} features, but "
+                    f"{estimator.__class__.__name__} is expecting "
+                    f"{estimator.n_features_in_} features as input"
+                )
+        return X_checked
+    else:
+        X_checked, y_checked = check_X_y(X, y)
+        if reset:
+            estimator.n_features_in_ = X_checked.shape[1]
+        else:
+            if (
+                hasattr(estimator, "n_features_in_")
+                and X_checked.shape[1] != estimator.n_features_in_
+            ):
+                raise ValueError(
+                    f"X has {X_checked.shape[1]} features, but "
+                    f"{estimator.__class__.__name__} is expecting "
+                    f"{estimator.n_features_in_} features as input"
+                )
+        return X_checked, y_checked
+
+
+class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
+    """One-nearest neighbor classifier.
+
+    This estimator assigns to each input sample the target of the closest
+    training sample using the Euclidean distance.
+
+    The classifier exposes `classes_` and `n_features_in_` after fitting and
+    follows the scikit-learn estimator API.
+    """
 
     def __init__(self):  # noqa: D107
         pass
 
-    def fit(self, X, y):
-        """Write docstring.
+    # No custom tags to maximize cross-version compatibility
 
-        And describe parameters
+    def fit(self, X, y):
+        """Fit the classifier on the training data.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            Training data.
+        y : ndarray of shape (n_samples,)
+            Target labels.
+
+        Returns
+        -------
+        self : OneNearestNeighbor
+            Fitted estimator.
         """
-        X, y = check_X_y(X, y)
+        if y is None:
+            # Be tolerant for older/newer sklearn checks that may call
+            # fit(X, None) when requires_y tag is not enforced.
+            X = _validate_data(self, X, reset=True)
+            self.X_ = X
+            self.y_ = None
+            self.n_features_in_ = X.shape[1]
+            return self
+        X, y = _validate_data(self, X, y)
         check_classification_targets(y)
         self.classes_ = np.unique(y)
         self.n_features_in_ = X.shape[1]
-
-        # XXX fix
+        # store training set for nearest neighbor lookup
+        self.X_ = X
+        self.y_ = y
         return self
 
     def predict(self, X):
-        """Write docstring.
+        """Predict class labels for samples in X.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            Input samples.
 
-        And describe parameters
+        Returns
+        -------
+        y_pred : ndarray of shape (n_samples,)
+            Predicted class labels.
         """
         check_is_fitted(self)
-        X = check_array(X)
-        y_pred = np.full(
-            shape=len(X), fill_value=self.classes_[0],
-            dtype=self.classes_.dtype
-        )
-
-        # XXX fix
-        return y_pred
+        X = _validate_data(self, X, reset=False)
+        # Compute pairwise squared Euclidean distances efficiently
+        A = np.sum(self.X_ ** 2, axis=1)[None, :]  # shape (1, n_train)
+        B = np.sum(X ** 2, axis=1)[:, None]        # shape (n_test, 1)
+        C = X @ self.X_.T                          # shape (n_test, n_train)
+        d2 = A + B - 2 * C
+        nn_index = np.argmin(d2, axis=1)
+        return self.y_[nn_index]
 
     def score(self, X, y):
-        """Write docstring.
-
-        And describe parameters
+        """Return the mean accuracy on the given test data and labels.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            Test samples.
+        y : ndarray of shape (n_samples,)
+            True labels for X.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of predictions on X with respect to y.
         """
-        X, y = check_X_y(X, y)
+        X, y = _validate_data(self, X, y, reset=False)
         y_pred = self.predict(X)
-
-        # XXX fix
-        return y_pred.sum()
+        return float(np.mean(y_pred == y))