From b515b2b3182d5bcd3165058cb9b65e7545b25483 Mon Sep 17 00:00:00 2001 From: NASR Date: Thu, 13 Nov 2025 17:52:51 +0100 Subject: [PATCH 1/4] Implement answers in numpy_questions.py --- numpy_questions.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..b713978a 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -40,7 +40,8 @@ def max_index(X): i = 0 j = 0 - # TODO + lin_idx = np.argmax(X) + i, j = np.unravel_index(lin_idx, X.shape) return i, j @@ -64,4 +65,14 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + + if not isinstance(n_terms, int) or n_terms < 0: + raise ValueError("n_terms must be a non-negative integer") + + if n_terms == 0: + return 2.0 # produit vide = 1 ⇒ pi ≈ 2 * 1 + + k = np.arange(1, n_terms + 1, dtype=float) + terms = (4 * k * k) / (4 * k * k - 1) + return 2.0 * float(np.prod(terms, dtype=float)) + From 5246ed15e7ece5dd1108b033f81850b26d818786 Mon Sep 17 00:00:00 2001 From: NASR Date: Thu, 13 Nov 2025 19:13:54 +0100 Subject: [PATCH 2/4] Najib Nasr: implement numpy & sklearn questions (tests + style OK) --- numpy_questions.py | 14 ++++---- sklearn_questions.py | 83 ++++++++++++++++++++++++++++---------------- 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index b713978a..908d0a9a 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -5,7 +5,7 @@ * Use automated tools to validate the code (`pytest` and `flake8`) * Submit a Pull-Request on github to practice `git`. -The two functions below are skeleton functions. The docstrings explain what +The two functions below are skeleton functions. The docstrings explain wha are the inputs, the outputs and the expected error. Fill the function to complete the assignment. The code should be able to pass the test that we wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of @@ -39,7 +39,8 @@ def max_index(X): """ i = 0 j = 0 - + if not isinstance(X, np.ndarray) or X.ndim != 2: + raise ValueError("X must be a 2D numpy.ndarray") lin_idx = np.argmax(X) i, j = np.unravel_index(lin_idx, X.shape) @@ -50,17 +51,17 @@ def wallis_product(n_terms): """Implement the Wallis product to compute an approximation of pi. See: - https://en.wikipedia.org/wiki/Wallis_product + https://en.wikipedia.org/wiki/Wallis_produc Parameters ---------- - n_terms : int + n_terms : in Number of steps in the Wallis product. Note that `n_terms=0` will consider the product to be `1`. Returns ------- - pi : float + pi : floa The approximation of order `n_terms` of pi using the Wallis product. """ # XXX : The n_terms is an int that corresponds to the number of @@ -70,9 +71,8 @@ def wallis_product(n_terms): raise ValueError("n_terms must be a non-negative integer") if n_terms == 0: - return 2.0 # produit vide = 1 ⇒ pi ≈ 2 * 1 + return 1.0 k = np.arange(1, n_terms + 1, dtype=float) terms = (4 * k * k) / (4 * k * k - 1) return 2.0 * float(np.prod(terms, dtype=float)) - diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..ee6bcd0d 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -22,53 +22,78 @@ import numpy as np from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_is_fitted, validate_data from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): + """OneNearestNeighbor classifier. + + A simple 1-nearest-neighbor classifier using Euclidean distance. + """ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. - - And describe parameters + """Fit the classifier. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training samples. + y : array-like of shape (n_samples,) + Target labels. + + Returns + ------- + self : OneNearestNeighbor + Fitted estimator. """ - X, y = check_X_y(X, y) + X, y = validate_data(self, X, y) check_classification_targets(y) self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - - # XXX fix + self.X_ = X + self.y_ = y return self def predict(self, X): - """Write docstring. + """Predict class labels for samples in X. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Samples to classify. - And describe parameters + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted labels. """ - check_is_fitted(self) - X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype - ) + check_is_fitted(self, attributes=["X_", "y_"]) - # XXX fix - return y_pred + X = validate_data(self, X, reset=False) - def score(self, X, y): - """Write docstring. + diff = X[:, None, :] - self.X_[None, :, :] + dist2 = (diff ** 2).sum(axis=2) + nn_idx = np.argmin(dist2, axis=1) + return self.y_[nn_idx] - And describe parameters + def score(self, X, y): + """Return the mean accuracy on the given test data and labels. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Test samples. + y : array-like of shape (n_samples,) + True labels. + + Returns + ------- + score : float + Mean accuracy of predictions w.r.t. `y`. """ - X, y = check_X_y(X, y) + X, y = validate_data(self, X, y, reset=False) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return float(np.mean(y_pred == y)) From b3fb6914a43193f1ecb358dbc655aa4dda6c1580 Mon Sep 17 00:00:00 2001 From: NASR Date: Thu, 13 Nov 2025 19:19:58 +0100 Subject: [PATCH 3/4] CI: retrigger From 0dfcd0fb5cd091eb5ec137938f826bd1116b6bba Mon Sep 17 00:00:00 2001 From: NASR Date: Thu, 13 Nov 2025 20:19:24 +0100 Subject: [PATCH 4/4] Fix: resolved compatibility and validation issues in OneNearestNeighbor --- sklearn_questions.py | 71 ++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index ee6bcd0d..2083c223 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -20,9 +20,8 @@ `pydocstyle` that you can also call at the root of the repo. """ import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_is_fitted, validate_data +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import check_is_fitted, check_X_y, check_array from sklearn.utils.multiclass import check_classification_targets @@ -35,6 +34,36 @@ class OneNearestNeighbor(ClassifierMixin, BaseEstimator): def __init__(self): # noqa: D107 pass + # ----------------- compatibility helpers ----------------- + def _fit_validate_compat(self, X, y): + """Validate X,y and set n_features_in_ (new/old sklearn).""" + try: + # new sklearn exposes _validate_data on estimators + X, y = self._validate_data(X, y) + except AttributeError: + # old sklearn fallback + X, y = check_X_y(X, y) + self.n_features_in_ = X.shape[1] + return X, y + + def _predict_validate_compat(self, X): + """Validate X at predict time; check n_features_in_ if needed.""" + try: + X = self._validate_data(X, reset=False) + except AttributeError: + X = check_array(X) + nfi = getattr(self, "n_features_in_", None) + if nfi is not None and X.shape[1] != nfi: + msg = ( + f"X has {X.shape[1]} features, but " + f"{self.__class__.__name__} is expecting " + f"{nfi} features as input" + ) + raise ValueError(msg) + + return X + # --------------------------------------------------------- + def fit(self, X, y): """Fit the classifier. @@ -50,7 +79,7 @@ def fit(self, X, y): self : OneNearestNeighbor Fitted estimator. """ - X, y = validate_data(self, X, y) + X, y = self._fit_validate_compat(X, y) check_classification_targets(y) self.classes_ = np.unique(y) self.X_ = X @@ -63,37 +92,35 @@ def predict(self, X): Parameters ---------- X : array-like of shape (n_samples, n_features) - Samples to classify. Returns ------- y_pred : ndarray of shape (n_samples,) - Predicted labels. """ check_is_fitted(self, attributes=["X_", "y_"]) + X = self._predict_validate_compat(X) - X = validate_data(self, X, reset=False) - + # squared distances between X (n,d) and self.X_ (m,d) diff = X[:, None, :] - self.X_[None, :, :] dist2 = (diff ** 2).sum(axis=2) nn_idx = np.argmin(dist2, axis=1) return self.y_[nn_idx] def score(self, X, y): - """Return the mean accuracy on the given test data and labels. - - Parameters - ---------- - X : array-like of shape (n_samples, n_features) - Test samples. - y : array-like of shape (n_samples,) - True labels. + """Return the mean accuracy on the given test data and labels.""" + try: + X, y = self._validate_data(X, y, reset=False) + except AttributeError: + X_chk, y_chk = check_X_y(X, y) + nfi = getattr(self, "n_features_in_", None) + if nfi is not None and X_chk.shape[1] != nfi: + msg = ( + f"X has {X_chk.shape[1]} features, but " + f"{self.__class__.__name__} is expecting " + f"{nfi} features as input" + ) + raise ValueError(msg) + X, y = X_chk, y_chk - Returns - ------- - score : float - Mean accuracy of predictions w.r.t. `y`. - """ - X, y = validate_data(self, X, y, reset=False) y_pred = self.predict(X) return float(np.mean(y_pred == y))