From 567990c59896a52e154840ee5cc5ac45229a9eb2 Mon Sep 17 00:00:00 2001 From: andrfr21 Date: Fri, 14 Nov 2025 19:09:58 +0100 Subject: [PATCH 1/4] Complete numpy assignment functions --- numpy_questions.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..a5425bcd 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -40,7 +40,16 @@ def max_index(X): i = 0 j = 0 - # TODO + if not isinstance(X, np.ndarray): + raise ValueError("Input must be a numpy array") + + if X.ndim != 2: + raise ValueError("Input must be a 2D array") + + flat_index = np.argmax(X) + i, j = np.unravel_index(flat_index, X.shape) + + i, j = int(i), int(j) return i, j @@ -64,4 +73,11 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + if n_terms == 0: + return 2.0 + + product = 1.0 + for n in range(1, n_terms + 1): + product *= (4 * n * n) / (4 * n * n - 1) + + return 2 * product From 612b85583cd4836dd3efe30a2a8c1d8660edcb23 Mon Sep 17 00:00:00 2001 From: andrfr21 Date: Fri, 14 Nov 2025 19:17:02 +0100 Subject: [PATCH 2/4] Fix flake8 whitespace errors --- numpy_questions.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index a5425bcd..a44aa1e9 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -42,13 +42,13 @@ def max_index(X): if not isinstance(X, np.ndarray): raise ValueError("Input must be a numpy array") - + if X.ndim != 2: raise ValueError("Input must be a 2D array") - + flat_index = np.argmax(X) i, j = np.unravel_index(flat_index, X.shape) - + i, j = int(i), int(j) return i, j @@ -74,10 +74,10 @@ def wallis_product(n_terms): # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. if n_terms == 0: - return 2.0 - + return 1.0 + product = 1.0 for n in range(1, n_terms + 1): product *= (4 * n * n) / (4 * n * n - 1) - + return 2 * product From 65523e61d2f3cdd44048ff4ad729c5ec01352a45 Mon Sep 17 00:00:00 2001 From: andrfr21 Date: Fri, 14 Nov 2025 19:30:11 +0100 Subject: [PATCH 3/4] Implement OneNearestNeighbor classifier --- sklearn_questions.py | 87 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 18 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..85550d22 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -22,53 +22,104 @@ import numpy as np from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import validate_data -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): + """OneNearestNeighbor classifier. + + This classifier predicts the class of a sample based on the class of + its nearest neighbor in the training set, using Euclidean distance. + """ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. + """Fit the OneNearestNeighbor classifier. + + Store the training data to use for predictions. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Training data. + y : ndarray of shape (n_samples,) + Target values (class labels). - And describe parameters + Returns + ------- + self : object + Returns self. """ - X, y = check_X_y(X, y) + X, y = validate_data(self, X, y) check_classification_targets(y) self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - # XXX fix + # Store training data + self.X_ = X + self.y_ = y + return self def predict(self, X): - """Write docstring. + """Predict class labels for samples in X. + + For each sample, find the nearest neighbor in the training set + and return its class label. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Samples to predict. - And describe parameters + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted class labels. """ check_is_fitted(self) - X = check_array(X) + X = validate_data(self, X, reset=False) + y_pred = np.full( shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype ) - # XXX fix + # For each test sample, find the nearest training sample + for i, x_test in enumerate(X): + # Compute Euclidean distances to all training samples + distances = np.sqrt(np.sum((self.X_ - x_test) ** 2, axis=1)) + # Find the index of the nearest neighbor + nearest_idx = np.argmin(distances) + # Predict the class of the nearest neighbor + y_pred[i] = self.y_[nearest_idx] + return y_pred def score(self, X, y): - """Write docstring. + """Calculate the accuracy score. + + Compute the mean accuracy of predictions on the given test data. - And describe parameters + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Test samples. + y : ndarray of shape (n_samples,) + True labels for X. + + Returns + ------- + score : float + Mean accuracy of predictions. """ - X, y = check_X_y(X, y) + X, y = validate_data(self, X, y, reset=False) y_pred = self.predict(X) - # XXX fix - return y_pred.sum() + # Calculate accuracy: proportion of correct predictions + accuracy = np.mean(y_pred == y) + + return accuracy From 6e1b5c4111c380cf30fbd1a5112432de92d40a6f Mon Sep 17 00:00:00 2001 From: andrfr21 Date: Sun, 16 Nov 2025 13:10:23 +0100 Subject: [PATCH 4/4] Part B Submission Francois Andreani --- sklearn_questions.py | 80 ++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 85550d22..64c645ba 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -6,7 +6,7 @@ The nearest neighbor classifier predicts for a point X_i the target y_k of the training sample X_k which is the closest to X_i. We measure proximity with the Euclidean distance. The model will be evaluated with the accuracy (average -number of samples corectly classified). You need to implement the `fit`, +number of samples correctly classified). You need to implement the `fit`, `predict` and `score` methods for this class. The code you write should pass the test we implemented. You can run the tests by calling at the root of the repo `pytest test_sklearn_questions.py`. @@ -20,11 +20,9 @@ `pydocstyle` that you can also call at the root of the repo. """ import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_is_fitted +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import validate_data class OneNearestNeighbor(ClassifierMixin, BaseEstimator): @@ -40,23 +38,23 @@ def __init__(self): # noqa: D107 def fit(self, X, y): """Fit the OneNearestNeighbor classifier. - Store the training data to use for predictions. - Parameters ---------- X : ndarray of shape (n_samples, n_features) - Training data. + Training input samples. y : ndarray of shape (n_samples,) - Target values (class labels). + Target labels associated with each training sample. Returns ------- - self : object - Returns self. + self : OneNearestNeighbor + Fitted estimator. """ - X, y = validate_data(self, X, y) + X, y = check_X_y(X, y) check_classification_targets(y) + self.classes_ = np.unique(y) + self.n_features_in_ = X.shape[1] # Store training data self.X_ = X @@ -65,61 +63,55 @@ def fit(self, X, y): return self def predict(self, X): - """Predict class labels for samples in X. - - For each sample, find the nearest neighbor in the training set - and return its class label. + """Predict class labels for given samples. Parameters ---------- X : ndarray of shape (n_samples, n_features) - Samples to predict. + Samples for which to predict labels. Returns ------- y_pred : ndarray of shape (n_samples,) - Predicted class labels. + Predicted class label for each sample. """ check_is_fitted(self) - X = validate_data(self, X, reset=False) - - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype - ) - - # For each test sample, find the nearest training sample - for i, x_test in enumerate(X): - # Compute Euclidean distances to all training samples - distances = np.sqrt(np.sum((self.X_ - x_test) ** 2, axis=1)) - # Find the index of the nearest neighbor - nearest_idx = np.argmin(distances) - # Predict the class of the nearest neighbor - y_pred[i] = self.y_[nearest_idx] + X = check_array(X) + + # Check number of features against what was seen in fit + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but {self.__class__.__name__} " + f"is expecting {self.n_features_in_} features as input" + ) + + # Compute squared Euclidean distances to all training samples + diff = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :] + distances = np.sum(diff ** 2, axis=2) + + # Index of nearest neighbor for each sample + nearest_idx = np.argmin(distances, axis=1) + + # Predicted labels + y_pred = self.y_[nearest_idx] return y_pred def score(self, X, y): - """Calculate the accuracy score. - - Compute the mean accuracy of predictions on the given test data. + """Compute accuracy of the classifier. Parameters ---------- X : ndarray of shape (n_samples, n_features) Test samples. y : ndarray of shape (n_samples,) - True labels for X. + True target labels. Returns ------- score : float - Mean accuracy of predictions. + Accuracy of predictions: fraction of correctly classified samples. """ - X, y = validate_data(self, X, y, reset=False) + X, y = check_X_y(X, y) y_pred = self.predict(X) - - # Calculate accuracy: proportion of correct predictions - accuracy = np.mean(y_pred == y) - - return accuracy + return float(np.mean(y_pred == y))