From 60c14c934ffc403f97c6e1e6d3ac1c3d50dd4f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 08:43:02 +0100 Subject: [PATCH 1/7] Implement OneNearestNeighbor and fix numpy_questions --- numpy_questions.py | 54 +++-------- sklearn_questions.py | 96 ++++++++++--------- ...7\232\204GitHub\351\202\256\347\256\261\"" | 7 ++ ...2\204GitHub\351\202\256\347\256\261\".pub" | 1 + 4 files changed, 73 insertions(+), 85 deletions(-) create mode 100644 "ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\"" create mode 100644 "ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\".pub" diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..a9ddba9b 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -19,49 +19,25 @@ def max_index(X): - """Return the index of the maximum in a numpy array. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_features) - The input array. - - Returns - ------- - (i, j) : tuple(int) - The row and columnd index of the maximum. + if not isinstance(X, np.ndarray): + raise ValueError("Input must be a numpy array") + if X.ndim != 2: + raise ValueError("Input must be a 2D array") + + max_idx = np.argmax(X) + i, j = np.unravel_index(max_idx, X.shape) + + return i, j - Raises - ------ - ValueError - If the input is not a numpy array or - if the shape is not 2D. - """ - i = 0 - j = 0 - # TODO - return i, j def wallis_product(n_terms): - """Implement the Wallis product to compute an approximation of pi. - - See: - https://en.wikipedia.org/wiki/Wallis_product - - Parameters - ---------- - n_terms : int - Number of steps in the Wallis product. Note that `n_terms=0` will - consider the product to be `1`. + if n_terms == 0: + return 1.0 - Returns - ------- - pi : float - The approximation of order `n_terms` of pi using the Wallis product. - """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + product = 1.0 + for k in range(1, n_terms + 1): + product *= (4 * k**2) / (4 * k**2 - 1) + return 2 * product diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..df23ac6e 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,74 +1,78 @@ """Assignment - making a sklearn estimator. -The goal of this assignment is to implement by yourself a scikit-learn -estimator for the OneNearestNeighbor and check that it is working properly. - -The nearest neighbor classifier predicts for a point X_i the target y_k of -the training sample X_k which is the closest to X_i. We measure proximity with -the Euclidean distance. The model will be evaluated with the accuracy (average -number of samples corectly classified). You need to implement the `fit`, -`predict` and `score` methods for this class. The code you write should pass -the test we implemented. You can run the tests by calling at the root of the -repo `pytest test_sklearn_questions.py`. - -We also ask to respect the pep8 convention: https://pep8.org. This will be -enforced with `flake8`. You can check that there is no flake8 errors by -calling `flake8` at the root of the repo. - -Finally, you need to write docstring similar to the one in `numpy_questions` -for the methods you code and for the class. The docstring will be checked using -`pydocstyle` that you can also call at the root of the repo. +Custom implementation of a OneNearestNeighbor classifier. """ + import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array -from sklearn.utils.validation import check_is_fitted +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import ( + check_X_y, check_is_fitted +) from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): + """One Nearest Neighbor classifier. + + Assigns to each sample the label of the closest training point + using Euclidean distance. + """ - def __init__(self): # noqa: D107 + def __init__(self): pass def fit(self, X, y): - """Write docstring. + """Fit the OneNearestNeighbor classifier. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Training data. - And describe parameters + y : ndarray of shape (n_samples,) + Target labels. + + Returns + ------- + self : object + Fitted classifier. """ - X, y = check_X_y(X, y) + X, y = self._validate_data(X, y, reset=True) check_classification_targets(y) + + self.X_train_ = X + self.y_train_ = y self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - # XXX fix return self def predict(self, X): - """Write docstring. + """Predict class labels for samples in X. - And describe parameters + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Input samples. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted labels. """ check_is_fitted(self) - X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype + + X = self._validate_data(X, reset=False) + + distances = np.linalg.norm( + X[:, None, :] - self.X_train_[None, :, :], + axis=2 ) - # XXX fix - return y_pred + nearest_idx = np.argmin(distances, axis=1) + return self.y_train_[nearest_idx] def score(self, X, y): - """Write docstring. - - And describe parameters - """ + """Return accuracy score.""" X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return np.mean(y_pred == y) diff --git "a/ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\"" "b/ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\"" new file mode 100644 index 00000000..7669557c --- /dev/null +++ "b/ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\"" @@ -0,0 +1,7 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACC+nIN2ZmV7i3tLTK82iLZ55hTImb3J2vo2gxHCqFY1PgAAAJg6M29MOjNv +TAAAAAtzc2gtZWQyNTUxOQAAACC+nIN2ZmV7i3tLTK82iLZ55hTImb3J2vo2gxHCqFY1Pg +AAAEAvo6TJa/cpJpuuaNQAx+6V9yzlXpTyZVqILJlLmTsZh76cg3ZmZXuLe0tMrzaItnnm +FMiZvcna+jaDEcKoVjU+AAAAEuS9oOeahEdpdEh1YumCrueusQECAw== +-----END OPENSSH PRIVATE KEY----- diff --git "a/ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\".pub" "b/ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\".pub" new file mode 100644 index 00000000..e79c515d --- /dev/null +++ "b/ssh-keygen -t ed25519 -C \"\344\275\240\347\232\204GitHub\351\202\256\347\256\261\".pub" @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL6cg3ZmZXuLe0tMrzaItnnmFMiZvcna+jaDEcKoVjU+ 你的GitHub邮箱 From 7ea550e7518c146b2e9171218cd190ffb8dff371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 09:10:23 +0100 Subject: [PATCH 2/7] Fix flake8 style issues and remove extra blank lines --- sklearn_questions.py | 54 ++++---------------------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index df23ac6e..5db378c8 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,43 +1,15 @@ -"""Assignment - making a sklearn estimator. - -Custom implementation of a OneNearestNeighbor classifier. -""" - import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import ( - check_X_y, check_is_fitted -) +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - """One Nearest Neighbor classifier. - - Assigns to each sample the label of the closest training point - using Euclidean distance. - """ - +class OneNearestNeighbor(BaseEstimator, ClassifierMixin): def __init__(self): pass def fit(self, X, y): - """Fit the OneNearestNeighbor classifier. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_features) - Training data. - - y : ndarray of shape (n_samples,) - Target labels. - - Returns - ------- - self : object - Fitted classifier. - """ - X, y = self._validate_data(X, y, reset=True) + X, y = check_X_y(X, y) check_classification_targets(y) self.X_train_ = X @@ -47,32 +19,16 @@ def fit(self, X, y): return self def predict(self, X): - """Predict class labels for samples in X. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_features) - Input samples. - - Returns - ------- - y_pred : ndarray of shape (n_samples,) - Predicted labels. - """ check_is_fitted(self) - - X = self._validate_data(X, reset=False) + X = check_array(X) distances = np.linalg.norm( X[:, None, :] - self.X_train_[None, :, :], - axis=2 - ) - + axis=2) nearest_idx = np.argmin(distances, axis=1) return self.y_train_[nearest_idx] def score(self, X, y): - """Return accuracy score.""" X, y = check_X_y(X, y) y_pred = self.predict(X) return np.mean(y_pred == y) From 3765321678412b9622142f084b871885122a06c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 09:14:01 +0100 Subject: [PATCH 3/7] Fix flake8 style issues and remove extra blank lines --- numpy_questions.py | 27 --------------------------- sklearn_questions.py | 7 ------- 2 files changed, 34 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index a9ddba9b..4d83d0c1 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,42 +1,15 @@ -"""Assignment - using numpy and making a PR. - -The goals of this assignment are: - * Use numpy in practice with two easy exercises. - * Use automated tools to validate the code (`pytest` and `flake8`) - * Submit a Pull-Request on github to practice `git`. - -The two functions below are skeleton functions. The docstrings explain what -are the inputs, the outputs and the expected error. Fill the function to -complete the assignment. The code should be able to pass the test that we -wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of -the repo. It should say that 2 tests ran with success. - -We also ask to respect the pep8 convention: https://pep8.org. -This will be enforced with `flake8`. You can check that there is no flake8 -errors by calling `flake8` at the root of the repo. -""" import numpy as np - - def max_index(X): if not isinstance(X, np.ndarray): raise ValueError("Input must be a numpy array") if X.ndim != 2: raise ValueError("Input must be a 2D array") - max_idx = np.argmax(X) i, j = np.unravel_index(max_idx, X.shape) - return i, j - - - - - def wallis_product(n_terms): if n_terms == 0: return 1.0 - product = 1.0 for k in range(1, n_terms + 1): product *= (4 * k**2) / (4 * k**2 - 1) diff --git a/sklearn_questions.py b/sklearn_questions.py index 5db378c8..5d43d0d0 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -2,32 +2,25 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import check_classification_targets - - class OneNearestNeighbor(BaseEstimator, ClassifierMixin): def __init__(self): pass - def fit(self, X, y): X, y = check_X_y(X, y) check_classification_targets(y) - self.X_train_ = X self.y_train_ = y self.classes_ = np.unique(y) - return self def predict(self, X): check_is_fitted(self) X = check_array(X) - distances = np.linalg.norm( X[:, None, :] - self.X_train_[None, :, :], axis=2) nearest_idx = np.argmin(distances, axis=1) return self.y_train_[nearest_idx] - def score(self, X, y): X, y = check_X_y(X, y) y_pred = self.predict(X) From ed3dca203442e16b1d27998d7935f6126f20b5fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 09:51:20 +0100 Subject: [PATCH 4/7] Fix flake8 formatting errors --- numpy_questions.py | 9 ++++++++- sklearn_questions.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 4d83d0c1..aa397f7c 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,16 +1,23 @@ import numpy as np + + def max_index(X): if not isinstance(X, np.ndarray): raise ValueError("Input must be a numpy array") if X.ndim != 2: raise ValueError("Input must be a 2D array") + max_idx = np.argmax(X) i, j = np.unravel_index(max_idx, X.shape) return i, j + + def wallis_product(n_terms): if n_terms == 0: return 1.0 + product = 1.0 for k in range(1, n_terms + 1): - product *= (4 * k**2) / (4 * k**2 - 1) + product *= (4 * k ** 2) / (4 * k ** 2 - 1) + return 2 * product diff --git a/sklearn_questions.py b/sklearn_questions.py index 5d43d0d0..1354fd92 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -2,25 +2,34 @@ from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import check_classification_targets + + class OneNearestNeighbor(BaseEstimator, ClassifierMixin): def __init__(self): pass + def fit(self, X, y): X, y = check_X_y(X, y) check_classification_targets(y) + self.X_train_ = X self.y_train_ = y self.classes_ = np.unique(y) + return self def predict(self, X): check_is_fitted(self) X = check_array(X) + distances = np.linalg.norm( X[:, None, :] - self.X_train_[None, :, :], - axis=2) + axis=2 + ) + nearest_idx = np.argmin(distances, axis=1) return self.y_train_[nearest_idx] + def score(self, X, y): X, y = check_X_y(X, y) y_pred = self.predict(X) From 77893d997ce0036db82d05c61f4a75ac12f749c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 09:55:38 +0100 Subject: [PATCH 5/7] Fix flake8 formatting errors --- numpy_questions.py | 33 +++++++++++++++++++++++++------ sklearn_questions.py | 46 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index aa397f7c..ed049655 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,23 +1,44 @@ -import numpy as np +"""Numpy utility functions.""" +import numpy as np def max_index(X): + """Return the indices of the maximum value in a 2D array. + + Parameters + ---------- + X : np.ndarray, shape (n_rows, n_cols) + Input 2D array. + + Returns + ------- + i, j : int, int + Row and column indices of the maximum element. + """ if not isinstance(X, np.ndarray): raise ValueError("Input must be a numpy array") if X.ndim != 2: raise ValueError("Input must be a 2D array") - max_idx = np.argmax(X) i, j = np.unravel_index(max_idx, X.shape) return i, j - def wallis_product(n_terms): + """Compute the Wallis product approximation of pi. + + Parameters + ---------- + n_terms : int + Number of terms in the product. + + Returns + ------- + float + Approximation of pi using the Wallis formula. + """ if n_terms == 0: return 1.0 - product = 1.0 for k in range(1, n_terms + 1): - product *= (4 * k ** 2) / (4 * k ** 2 - 1) - + product *= (4 * k**2) / (4 * k**2 - 1) return 2 * product diff --git a/sklearn_questions.py b/sklearn_questions.py index 1354fd92..9c3309dd 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,36 +1,64 @@ +"""Custom One Nearest Neighbor classifier using sklearn interface.""" + import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import check_classification_targets - class OneNearestNeighbor(BaseEstimator, ClassifierMixin): + """One Nearest Neighbor classifier. + + Predicts the label of a sample as the label of the closest training point + using Euclidean distance. + """ + def __init__(self): + """Initialize the classifier (no parameters).""" pass def fit(self, X, y): + """Fit the classifier on training data. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Training data. + y : ndarray of shape (n_samples,) + Target labels. + + Returns + ------- + self : OneNearestNeighbor + Fitted classifier. + """ X, y = check_X_y(X, y) check_classification_targets(y) - self.X_train_ = X self.y_train_ = y self.classes_ = np.unique(y) - return self def predict(self, X): - check_is_fitted(self) - X = check_array(X) + """Predict labels for input samples X. - distances = np.linalg.norm( - X[:, None, :] - self.X_train_[None, :, :], - axis=2 - ) + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Input data. + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted labels. + """ + check_is_fitted(self) + X = check_array(X) + distances = np.linalg.norm(X[:, None, :] - self.X_train_[None, :, :], axis=2) nearest_idx = np.argmin(distances, axis=1) return self.y_train_[nearest_idx] def score(self, X, y): + """Compute accuracy of the classifier on test data.""" X, y = check_X_y(X, y) y_pred = self.predict(X) return np.mean(y_pred == y) From 1a9209c77663026f026838524ecb2d804638db2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 10:01:35 +0100 Subject: [PATCH 6/7] Fix flake8 formatting errors --- numpy_questions.py | 22 +++++++++++++--------- sklearn_questions.py | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index ed049655..6096e914 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,43 +1,47 @@ -"""Numpy utility functions.""" +"""Numpy related utility functions.""" import numpy as np + def max_index(X): - """Return the indices of the maximum value in a 2D array. + """Return the indices of the maximum value in a 2D numpy array. Parameters ---------- - X : np.ndarray, shape (n_rows, n_cols) - Input 2D array. + X : np.ndarray + Input 2D array Returns ------- - i, j : int, int - Row and column indices of the maximum element. + tuple + Tuple of (row_index, column_index) of the maximum element. """ if not isinstance(X, np.ndarray): raise ValueError("Input must be a numpy array") if X.ndim != 2: raise ValueError("Input must be a 2D array") + max_idx = np.argmax(X) i, j = np.unravel_index(max_idx, X.shape) return i, j + def wallis_product(n_terms): - """Compute the Wallis product approximation of pi. + """Compute approximation of pi using Wallis product formula. Parameters ---------- n_terms : int - Number of terms in the product. + Number of terms to include in the product. Returns ------- float - Approximation of pi using the Wallis formula. + Approximation of pi. """ if n_terms == 0: return 1.0 + product = 1.0 for k in range(1, n_terms + 1): product *= (4 * k**2) / (4 * k**2 - 1) diff --git a/sklearn_questions.py b/sklearn_questions.py index 9c3309dd..46c148a3 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,14 +1,15 @@ -"""Custom One Nearest Neighbor classifier using sklearn interface.""" +"""Custom sklearn estimator: One Nearest Neighbor classifier.""" import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import check_classification_targets + class OneNearestNeighbor(BaseEstimator, ClassifierMixin): """One Nearest Neighbor classifier. - Predicts the label of a sample as the label of the closest training point + Assigns to each sample the label of the closest training point using Euclidean distance. """ @@ -17,34 +18,37 @@ def __init__(self): pass def fit(self, X, y): - """Fit the classifier on training data. + """Fit the OneNearestNeighbor classifier. Parameters ---------- X : ndarray of shape (n_samples, n_features) Training data. + y : ndarray of shape (n_samples,) Target labels. Returns ------- - self : OneNearestNeighbor + self : object Fitted classifier. """ X, y = check_X_y(X, y) check_classification_targets(y) + self.X_train_ = X self.y_train_ = y self.classes_ = np.unique(y) + return self def predict(self, X): - """Predict labels for input samples X. + """Predict class labels for samples in X. Parameters ---------- X : ndarray of shape (n_samples, n_features) - Input data. + Input samples. Returns ------- @@ -53,12 +57,29 @@ def predict(self, X): """ check_is_fitted(self) X = check_array(X) - distances = np.linalg.norm(X[:, None, :] - self.X_train_[None, :, :], axis=2) + + distances = np.linalg.norm( + X[:, None, :] - self.X_train_[None, :, :], + axis=2 + ) nearest_idx = np.argmin(distances, axis=1) return self.y_train_[nearest_idx] def score(self, X, y): - """Compute accuracy of the classifier on test data.""" + """Return accuracy score of the classifier. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Input samples. + y : ndarray of shape (n_samples,) + True labels. + + Returns + ------- + float + Accuracy score. + """ X, y = check_X_y(X, y) y_pred = self.predict(X) return np.mean(y_pred == y) From 33de9645b1fe3a8cba391ffb9e0a8ce028938c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E5=B9=BF=E6=82=A6?= Date: Fri, 14 Nov 2025 10:05:19 +0100 Subject: [PATCH 7/7] Fix flake8 formatting errors --- sklearn_questions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn_questions.py b/sklearn_questions.py index 46c148a3..7cea3119 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -39,6 +39,7 @@ def fit(self, X, y): self.X_train_ = X self.y_train_ = y self.classes_ = np.unique(y) + self.n_features_in_ = X.shape[1] return self