From 7b484fa29431b679ae550d925ed9cd95c495cf76 Mon Sep 17 00:00:00 2001 From: gabdreik Date: Sun, 16 Nov 2025 12:38:46 +0100 Subject: [PATCH 1/2] Complete numpy and sklearn assignment --- numpy_questions.py | 38 ++++++++++++++++---- sklearn_questions.py | 85 +++++++++++++++++++++++++++++++------------- 2 files changed, 91 insertions(+), 32 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..20be6b06 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,12 +37,16 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - i = 0 - j = 0 + if not isinstance(X, np.ndarray): + raise ValueError("X must be a numpy array.") - # TODO + if X.ndim != 2: + raise ValueError("X must be a 2D array.") - return i, j + flat_index = np.argmax(X) + i, j = np.unravel_index(flat_index, X.shape) + + return int(i), int(j) def wallis_product(n_terms): @@ -61,7 +65,27 @@ def wallis_product(n_terms): ------- pi : float The approximation of order `n_terms` of pi using the Wallis product. + + Raises + ------ + ValueError + If n_terms is negative or not an integer. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + if not isinstance(n_terms, int): + raise ValueError("n_terms must be an integer.") + + if n_terms < 0: + raise ValueError("n_terms must be non-negative.") + + if n_terms == 0: + return 1.0 + + product = 1.0 + + for k in range(1, n_terms + 1): + numerator = 4.0 * k * k + denominator = numerator - 1.0 + product *= numerator / denominator + + pi_approx = 2.0 * product + return pi_approx diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..46e8ae1f 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -22,53 +22,88 @@ import numpy as np from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import validate_data from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): + """One nearest neighbor classifier. - def __init__(self): # noqa: D107 + This classifier memorizes the training data and, at prediction time, + assigns to each sample the label of the closest training sample in + Euclidean distance. + """ + + def __init__(self): + """Initialize the one nearest neighbor classifier.""" pass def fit(self, X, y): - """Write docstring. + """Fit the one nearest neighbor classifier. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data. + + y : array-like of shape (n_samples,) + Target labels. - And describe parameters + Returns + ------- + self : OneNearestNeighbor + Fitted estimator. """ - X, y = check_X_y(X, y) + X, y = validate_data(self, X, y) check_classification_targets(y) self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - - # XXX fix + self.X_ = X + self.y_ = y return self def predict(self, X): - """Write docstring. + """Predict class labels for samples in X. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Input data. - And describe parameters + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted labels. """ check_is_fitted(self) - X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype - ) + X = validate_data(self, X, reset=False) + + y_pred = np.empty(X.shape[0], dtype=self.y_.dtype) + + for i, x in enumerate(X): + diff = self.X_ - x + dist_sq = np.sum(diff ** 2, axis=1) + nearest_idx = np.argmin(dist_sq) + y_pred[i] = self.y_[nearest_idx] - # XXX fix return y_pred def score(self, X, y): - """Write docstring. + """Return the mean accuracy on the given test data and labels. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Test samples. - And describe parameters + y : array-like of shape (n_samples,) + True labels. + + Returns + ------- + score : float + Mean accuracy. """ - X, y = check_X_y(X, y) + X, y = validate_data(self, X, y, reset=False) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return float(np.mean(y_pred == y)) From 757fe324f74e470ad8f3c17c3cc0253e7ad22a0d Mon Sep 17 00:00:00 2001 From: gabdreik Date: Sun, 16 Nov 2025 13:34:11 +0100 Subject: [PATCH 2/2] Simplify OneNearestNeighbor and add feature check in predict --- sklearn_questions.py | 78 +++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 46e8ae1f..64f328bf 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -19,28 +19,36 @@ for the methods you code and for the class. The docstring will be checked using `pydocstyle` that you can also call at the root of the repo. """ -import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_is_fitted -from sklearn.utils.validation import validate_data -from sklearn.utils.multiclass import check_classification_targets +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted +from sklearn.utils.multiclass import unique_labels +from sklearn.metrics import accuracy_score +from sklearn.neighbors import KNeighborsClassifier class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - """One nearest neighbor classifier. + """One-nearest-neighbor classifier. - This classifier memorizes the training data and, at prediction time, - assigns to each sample the label of the closest training sample in - Euclidean distance. - """ + This classifier predicts, for each sample, the label of the closest + training sample, using the Euclidean distance. + + Attributes + ---------- + classes_ : ndarray of shape (n_classes,) + Class labels known to the classifier. + + X_ : ndarray of shape (n_samples, n_features) + Training data stored after fitting. - def __init__(self): - """Initialize the one nearest neighbor classifier.""" - pass + y_ : ndarray of shape (n_samples,) + Target values stored after fitting. + + n_features_in_ : int + Number of features seen during fit. + """ def fit(self, X, y): - """Fit the one nearest neighbor classifier. + """Fit the one-nearest-neighbor classifier. Parameters ---------- @@ -48,45 +56,40 @@ def fit(self, X, y): Training data. y : array-like of shape (n_samples,) - Target labels. + Target values. Returns ------- self : OneNearestNeighbor Fitted estimator. """ - X, y = validate_data(self, X, y) - check_classification_targets(y) - self.classes_ = np.unique(y) + X, y = check_X_y(X, y, accept_sparse=False) + knn = KNeighborsClassifier(n_neighbors=1) + knn.fit(X, y) + + self._knn = knn + self.classes_ = unique_labels(y) + self.n_features_in_ = X.shape[1] self.X_ = X self.y_ = y return self def predict(self, X): - """Predict class labels for samples in X. + """Predict class labels for the provided data. Parameters ---------- X : array-like of shape (n_samples, n_features) - Input data. + Samples to classify. Returns ------- y_pred : ndarray of shape (n_samples,) - Predicted labels. + Predicted class labels. """ - check_is_fitted(self) - X = validate_data(self, X, reset=False) - - y_pred = np.empty(X.shape[0], dtype=self.y_.dtype) - - for i, x in enumerate(X): - diff = self.X_ - x - dist_sq = np.sum(diff ** 2, axis=1) - nearest_idx = np.argmin(dist_sq) - y_pred[i] = self.y_[nearest_idx] - - return y_pred + check_is_fitted(self, ("_knn", "classes_", "n_features_in_")) + X = check_array(X, accept_sparse=False) + return self._knn.predict(X) def score(self, X, y): """Return the mean accuracy on the given test data and labels. @@ -97,13 +100,12 @@ def score(self, X, y): Test samples. y : array-like of shape (n_samples,) - True labels. + True labels for X. Returns ------- score : float - Mean accuracy. + Mean accuracy of the predictions on the given data. """ - X, y = validate_data(self, X, y, reset=False) y_pred = self.predict(X) - return float(np.mean(y_pred == y)) + return accuracy_score(y, y_pred)