From 4834c8afc5eb728b2ead2f78a0249a753b433b7c Mon Sep 17 00:00:00 2001 From: Justin Martin Date: Thu, 13 Nov 2025 14:09:51 +0100 Subject: [PATCH 1/5] Update Justin's last name to 'Justin X' --- students.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/students.txt b/students.txt index 91aaa9d6..7984ff0c 100644 --- a/students.txt +++ b/students.txt @@ -61,7 +61,7 @@ Liu Guangyue Liu Yunxian Lucille Maximilien Mahé Blanche -Martin Justin +Martin Justin X Massias Mathurin Massoud Alexandre Mayette Scott From f2ded824c4f49ba2ead3fffb48be19a6b1c3b0af Mon Sep 17 00:00:00 2001 From: justinmartin Date: Thu, 13 Nov 2025 15:41:56 +0100 Subject: [PATCH 2/5] Changes made to the questions files --- numpy_questions.py | 21 +++++++++---- sklearn_questions.py | 72 ++++++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 26 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..0a5b45c5 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -15,6 +15,8 @@ This will be enforced with `flake8`. You can check that there is no flake8 errors by calling `flake8` at the root of the repo. """ + + import numpy as np @@ -39,9 +41,13 @@ def max_index(X): """ i = 0 j = 0 - - # TODO - + if not isinstance(X, np.ndarray): + raise ValueError("Input must be a numpy array") + if len(X.shape) != 2: + raise ValueError("Input must be a 2D array") + + id = np.argmax(X) + i, j = np.unravel_index(id, X.shape) return i, j @@ -62,6 +68,9 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + if n_terms == 0: + return 1 + pi = 1 + for i in range(1, n_terms + 1): + pi = pi * (4 * i**2) / (4 * i**2 - 1) + return pi * 2 diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..0c0da5da 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -23,52 +23,84 @@ from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import validate_data from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): "OneNearestNeighbor classifier." def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. - - And describe parameters """ - X, y = check_X_y(X, y) + Fit the OneNearestNeighbor classifier according to X, y. + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Training data, with n_samples the number of samples and + n_features the number of features. + y : ndarray of shape (n_samples,) + Target data, with n_samples the number of samples + + Returns + ------- + self : object + Fitted estimator. + """ + X, y = validate_data(self, X, y) check_classification_targets(y) self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - - # XXX fix + self.X_ = X + self.y_ = y return self def predict(self, X): - """Write docstring. + """ + Predict the labels based on X (the data provided) with + the NearestNeighbor Estimator. + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Data to predict, with n_samples the number of samples and + n_features the number of features. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) with the predicted values. + Predicted values for X. - And describe parameters """ check_is_fitted(self) - X = check_array(X) + X = validate_data(self, X, reset=False) y_pred = np.full( shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype ) - - # XXX fix + for i in range(len(X)): + d = np.linalg.norm(self.X_ - X[i, :], axis=1) + nearest_index = d.argmin() + y_pred[i] = self.y_[nearest_index] return y_pred def score(self, X, y): - """Write docstring. - - And describe parameters + """ + Score the prediction by comparing the data with + the output of the predict function. + Parameters + ---------- + X : ndarray of shape (n_sample, n_features) + Data to predict. + y : ndarray of shape (n_sample, ) + Targeted data. + Returns + ------- + score : float + Mean accuracy of the model on the X, y dataset. """ X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + y_pred = (y_pred == y) + return y_pred.sum()/len(y_pred) From ac0764d2cc02d313a8f628caa4f75330ef6c8419 Mon Sep 17 00:00:00 2001 From: justinmartin Date: Thu, 13 Nov 2025 15:59:38 +0100 Subject: [PATCH 3/5] Updated sklearn questions --- sklearn_questions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 0c0da5da..3d39a8ae 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -23,8 +23,8 @@ from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin from sklearn.utils.validation import check_X_y +from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted -from sklearn.utils.validation import validate_data from sklearn.utils.multiclass import check_classification_targets @@ -50,9 +50,10 @@ def fit(self, X, y): self : object Fitted estimator. """ - X, y = validate_data(self, X, y) + X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) + self.n_features_in_ = X.shape[1] self.X_ = X self.y_ = y return self @@ -74,7 +75,7 @@ def predict(self, X): """ check_is_fitted(self) - X = validate_data(self, X, reset=False) + X = check_array(X) y_pred = np.full( shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype From 3de8b752f1731d2aa09a3582c98ce924416a6bb9 Mon Sep 17 00:00:00 2001 From: justinmartin Date: Thu, 13 Nov 2025 16:29:49 +0100 Subject: [PATCH 4/5] Updates made on the format --- sklearn_questions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 3d39a8ae..ba924a6e 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -29,7 +29,7 @@ class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - "OneNearestNeighbor classifier." + """OneNearestNeighbor classifier.""" def __init__(self): # noqa: D107 pass @@ -37,6 +37,7 @@ def __init__(self): # noqa: D107 def fit(self, X, y): """ Fit the OneNearestNeighbor classifier according to X, y. + Parameters ---------- X : ndarray of shape (n_samples, n_features) @@ -56,12 +57,13 @@ def fit(self, X, y): self.n_features_in_ = X.shape[1] self.X_ = X self.y_ = y + return self def predict(self, X): """ - Predict the labels based on X (the data provided) with - the NearestNeighbor Estimator. + Predict the labels based on X (the data provided) with the NearestNeighbor Estimator. + Parameters ---------- X : ndarray of shape (n_samples, n_features) @@ -84,12 +86,13 @@ def predict(self, X): d = np.linalg.norm(self.X_ - X[i, :], axis=1) nearest_index = d.argmin() y_pred[i] = self.y_[nearest_index] + return y_pred def score(self, X, y): """ - Score the prediction by comparing the data with - the output of the predict function. + Score the prediction by comparing the data with the output of the predict function. + Parameters ---------- X : ndarray of shape (n_sample, n_features) @@ -104,4 +107,5 @@ def score(self, X, y): X, y = check_X_y(X, y) y_pred = self.predict(X) y_pred = (y_pred == y) + return y_pred.sum()/len(y_pred) From efcf4402288a3b875e2d6d0ccfbd7ddd1f44b9fe Mon Sep 17 00:00:00 2001 From: justinmartin Date: Thu, 13 Nov 2025 16:35:57 +0100 Subject: [PATCH 5/5] Formatting update --- numpy_questions.py | 1 - sklearn_questions.py | 7 ++----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 0a5b45c5..d17d5a5a 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -45,7 +45,6 @@ def max_index(X): raise ValueError("Input must be a numpy array") if len(X.shape) != 2: raise ValueError("Input must be a 2D array") - id = np.argmax(X) i, j = np.unravel_index(id, X.shape) return i, j diff --git a/sklearn_questions.py b/sklearn_questions.py index ba924a6e..eae45ad9 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -57,12 +57,11 @@ def fit(self, X, y): self.n_features_in_ = X.shape[1] self.X_ = X self.y_ = y - return self def predict(self, X): """ - Predict the labels based on X (the data provided) with the NearestNeighbor Estimator. + Predict the labels based on X with the NearestNeighbor Estimator. Parameters ---------- @@ -86,12 +85,11 @@ def predict(self, X): d = np.linalg.norm(self.X_ - X[i, :], axis=1) nearest_index = d.argmin() y_pred[i] = self.y_[nearest_index] - return y_pred def score(self, X, y): """ - Score the prediction by comparing the data with the output of the predict function. + Score the prediction with the predict function. Parameters ---------- @@ -107,5 +105,4 @@ def score(self, X, y): X, y = check_X_y(X, y) y_pred = self.predict(X) y_pred = (y_pred == y) - return y_pred.sum()/len(y_pred)