From 75ead028ed1c7d71419616729f7229934e17ae73 Mon Sep 17 00:00:00 2001 From: felixmeyer6 Date: Thu, 13 Nov 2025 16:48:11 +0100 Subject: [PATCH 1/4] Answered NumPy and Sklearn questions --- numpy_questions.py | 25 ++++++++++----- sklearn_questions.py | 74 +++++++++++++++++++++++++++++++------------- 2 files changed, 70 insertions(+), 29 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..7e3200eb 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -15,6 +15,7 @@ This will be enforced with `flake8`. You can check that there is no flake8 errors by calling `flake8` at the root of the repo. """ + import numpy as np @@ -37,12 +38,19 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - i = 0 - j = 0 - # TODO + if type(X) is not np.ndarray: + raise ValueError("Input is not a numpy array") + if X.ndim != 2: + raise ValueError("Input is not 2D") + + maxid = np.argmax(X) + ncols = X.shape[1] + + row = maxid // ncols + col = maxid % ncols - return i, j + return (row, col) def wallis_product(n_terms): @@ -62,6 +70,9 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + if n_terms == 0: + return 1 + else: + num = 4 * np.arange(1, n_terms + 1) ** 2 + pi = 2 * np.prod(num / (num - 1)) + return pi diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..07c31563 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -19,56 +19,86 @@ for the methods you code and for the class. The docstring will be checked using `pydocstyle` that you can also call at the root of the repo. """ + import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array -from sklearn.utils.validation import check_is_fitted +from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import check_is_fitted, check_X_y, validate_data -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): "OneNearestNeighbor classifier." def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. - - And describe parameters + """Fits a nearest neighbor model. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + The features vector. + y : ndarray of shape (n_samples, 1) + The target vector. + + Returns + ------- + The fitted model. """ X, y = check_X_y(X, y) + X, y = validate_data(self, X=X, y=y) check_classification_targets(y) self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] + self.X_ = X + self.y_ = y - # XXX fix return self def predict(self, X): - """Write docstring. + """Predicts the target y from a feature vector thanks to a nearest neighbor model that was previously fitted. - And describe parameters + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + The feature vector from which to predict y. + + Returns + ------- + y_pred : ndarray of shape (n_samples, 1) + The predicted value for y. """ check_is_fitted(self) - X = check_array(X) + X = validate_data(self, X=X, reset=False) y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype + shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype ) - # XXX fix + distances = np.linalg.norm( + X[:, np.newaxis, :] - self.X_[np.newaxis, :, :], axis=2 + ) + nearest_idx = np.argmin(distances, axis=1) + y_pred = self.y_[nearest_idx] + return y_pred def score(self, X, y): - """Write docstring. - - And describe parameters + """Returns the score of a model by evaluating its prediction against the ground truth. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + The features vector. + y : ndarray of shape (n_samples, 1) + The target vector. + + Returns + ------- + score : float + The score of the model. """ X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + score = np.mean(y_pred == y) + return score From 021dda376189b47c30a9f1e6c0fedb74797f9836 Mon Sep 17 00:00:00 2001 From: felixmeyer6 Date: Thu, 13 Nov 2025 16:58:43 +0100 Subject: [PATCH 2/4] Fix lines over 79 chars --- sklearn_questions.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 07c31563..d4efb8e1 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -57,7 +57,8 @@ def fit(self, X, y): return self def predict(self, X): - """Predicts the target y from a feature vector thanks to a nearest neighbor model that was previously fitted. + """Predicts the target y from a feature vector thanks to a nearest + neighbor model that was previously fitted. Parameters ---------- @@ -84,7 +85,8 @@ def predict(self, X): return y_pred def score(self, X, y): - """Returns the score of a model by evaluating its prediction against the ground truth. + """Returns the score of a model by evaluating its prediction against + the ground truth. Parameters ---------- From 006e82a61c0681b9872cd711946815a65e37fe02 Mon Sep 17 00:00:00 2001 From: felixmeyer6 Date: Thu, 13 Nov 2025 17:02:37 +0100 Subject: [PATCH 3/4] Fix validate_data missing --- sklearn_questions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index d4efb8e1..13f6fc29 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -23,7 +23,7 @@ import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import check_is_fitted, check_X_y, validate_data +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y class OneNearestNeighbor(ClassifierMixin, BaseEstimator): @@ -47,7 +47,7 @@ def fit(self, X, y): The fitted model. """ X, y = check_X_y(X, y) - X, y = validate_data(self, X=X, y=y) + X = check_array(X) check_classification_targets(y) self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] @@ -71,7 +71,7 @@ def predict(self, X): The predicted value for y. """ check_is_fitted(self) - X = validate_data(self, X=X, reset=False) + X = check_array(X) y_pred = np.full( shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype ) From 7be25a60f77bd568839e935a26b78b3618e05f42 Mon Sep 17 00:00:00 2001 From: felixmeyer6 Date: Thu, 13 Nov 2025 17:11:04 +0100 Subject: [PATCH 4/4] Fix doc style --- numpy_questions.py | 1 - sklearn_questions.py | 10 ++++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 7e3200eb..a6104d44 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -38,7 +38,6 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - if type(X) is not np.ndarray: raise ValueError("Input is not a numpy array") if X.ndim != 2: diff --git a/sklearn_questions.py b/sklearn_questions.py index 13f6fc29..9cce2612 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -27,13 +27,13 @@ class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - "OneNearestNeighbor classifier." + """OneNearestNeighbor classifier.""" def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Fits a nearest neighbor model. + """Fit a nearest neighbor model. Parameters ---------- @@ -57,8 +57,7 @@ def fit(self, X, y): return self def predict(self, X): - """Predicts the target y from a feature vector thanks to a nearest - neighbor model that was previously fitted. + """Predict target from a feature vector with a nearest neighbor model. Parameters ---------- @@ -85,8 +84,7 @@ def predict(self, X): return y_pred def score(self, X, y): - """Returns the score of a model by evaluating its prediction against - the ground truth. + """Return the score of a model evaluating against ground truth. Parameters ----------