From 92ad2126f5009aacb3e4ae141b66af138437d978 Mon Sep 17 00:00:00 2001 From: romaissaemelhaoui-ux Date: Fri, 14 Nov 2025 20:14:05 +0100 Subject: [PATCH 1/3] work done numpy file --- numpy_questions.py | 55 ++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..7308a56c 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,20 +1,10 @@ -"""Assignment - using numpy and making a PR. - -The goals of this assignment are: - * Use numpy in practice with two easy exercises. - * Use automated tools to validate the code (`pytest` and `flake8`) - * Submit a Pull-Request on github to practice `git`. - -The two functions below are skeleton functions. The docstrings explain what -are the inputs, the outputs and the expected error. Fill the function to -complete the assignment. The code should be able to pass the test that we -wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of -the repo. It should say that 2 tests ran with success. - -We also ask to respect the pep8 convention: https://pep8.org. -This will be enforced with `flake8`. You can check that there is no flake8 -errors by calling `flake8` at the root of the repo. """ +Assignment - using numpy and making a PR. + +This module implements two simple numerical functions: max_index and +wallis_product. These functions are tested using pytest. +""" + import numpy as np @@ -29,7 +19,7 @@ def max_index(X): Returns ------- (i, j) : tuple(int) - The row and columnd index of the maximum. + The row and column index of the maximum. Raises ------ @@ -37,12 +27,19 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - i = 0 - j = 0 + # Check type + if not isinstance(X, np.ndarray): + raise ValueError("X must be a numpy ndarray.") + + # Check shape (must be 2D) + if X.ndim != 2: + raise ValueError("X must be a 2D array.") - # TODO + # Find flat index of max and convert it to 2D indices + flat_idx = np.argmax(X) + i, j = np.unravel_index(flat_idx, X.shape) - return i, j + return int(i), int(j) def wallis_product(n_terms): @@ -62,6 +59,16 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + if not isinstance(n_terms, (int, np.integer)) or n_terms < 0: + raise ValueError("n_terms must be a non-negative integer.") + + # Empty product convention: product = 1 when n_terms = 0 + if n_terms == 0: + return 1.0 + + k = np.arange(1, n_terms + 1, dtype=float) + terms = (2 * k) ** 2 / ((2 * k - 1) * (2 * k + 1)) + product = np.prod(terms) + + # Wallis product gives pi / 2, so multiply by 2 + return 2.0 * product From 82066403725fe0d37955ad4bb3300b45a182b0f1 Mon Sep 17 00:00:00 2001 From: romaissaemelhaoui-ux Date: Fri, 14 Nov 2025 20:16:53 +0100 Subject: [PATCH 2/3] question 2 done --- sklearn_questions.py | 120 +++++++++++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 45 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..3766b908 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,74 +1,104 @@ -"""Assignment - making a sklearn estimator. - -The goal of this assignment is to implement by yourself a scikit-learn -estimator for the OneNearestNeighbor and check that it is working properly. - -The nearest neighbor classifier predicts for a point X_i the target y_k of -the training sample X_k which is the closest to X_i. We measure proximity with -the Euclidean distance. The model will be evaluated with the accuracy (average -number of samples corectly classified). You need to implement the `fit`, -`predict` and `score` methods for this class. The code you write should pass -the test we implemented. You can run the tests by calling at the root of the -repo `pytest test_sklearn_questions.py`. - -We also ask to respect the pep8 convention: https://pep8.org. This will be -enforced with `flake8`. You can check that there is no flake8 errors by -calling `flake8` at the root of the repo. - -Finally, you need to write docstring similar to the one in `numpy_questions` -for the methods you code and for the class. The docstring will be checked using -`pydocstyle` that you can also call at the root of the repo. """ +Assignment - creating a scikit-learn estimator. + +This module implements a OneNearestNeighbor classifier that follows the +scikit-learn API, including fit, predict, and score methods. +""" + import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array -from sklearn.utils.validation import check_is_fitted +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import ( + check_X_y, + check_is_fitted, + validate_data, +) from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): + """One-Nearest-Neighbor classifier. + + This classifier assigns to each test sample the label of the closest + training sample according to the Euclidean distance. + """ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. + """Fit the OneNearestNeighbor classifier. - And describe parameters + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Training input samples. + + y : ndarray of shape (n_samples,) + Target labels. + + Returns + ------- + self : OneNearestNeighbor + Fitted estimator. """ - X, y = check_X_y(X, y) + # validate_data will: + # - check shapes and types + # - set self.n_features_in_ + X, y = validate_data(self, X, y) check_classification_targets(y) + + self.X_ = X + self.y_ = y self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - # XXX fix return self def predict(self, X): - """Write docstring. + """Predict class labels for samples in X. - And describe parameters + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Input samples. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Predicted labels for each input sample. """ check_is_fitted(self) - X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype + + # reset=False -> do not overwrite n_features_in_, + # but check consistency with what was seen in fit. + X = validate_data(self, X, reset=False) + + # Pairwise Euclidean distances: shape (n_test, n_train) + distances = np.linalg.norm( + X[:, np.newaxis, :] - self.X_[np.newaxis, :, :], + axis=2, ) - # XXX fix - return y_pred + # Index of closest training point for each test sample + nn_idx = np.argmin(distances, axis=1) + + return self.y_[nn_idx] def score(self, X, y): - """Write docstring. + """Compute the accuracy of predictions on X compared to y. - And describe parameters + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Test input samples. + + y : ndarray of shape (n_samples,) + True labels. + + Returns + ------- + score : float + Accuracy = proportion of correctly classified samples. """ X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return np.mean(y_pred == y) From 5d89b88c35478c2c109625c227202618da9a9633 Mon Sep 17 00:00:00 2001 From: romaissaemelhaoui-ux Date: Sun, 16 Nov 2025 12:37:21 +0100 Subject: [PATCH 3/3] part B completed --- sklearn_questions.py | 75 ++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 3766b908..3fef7710 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -6,21 +6,16 @@ """ import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import ( - check_X_y, - check_is_fitted, - validate_data, -) +from sklearn.base import BaseEstimator +from sklearn.base import ClassifierMixin +from sklearn.utils.validation import check_X_y +from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - """One-Nearest-Neighbor classifier. - - This classifier assigns to each test sample the label of the closest - training sample according to the Euclidean distance. - """ + """OneNearestNeighbor classifier.""" def __init__(self): # noqa: D107 pass @@ -32,73 +27,77 @@ def fit(self, X, y): ---------- X : ndarray of shape (n_samples, n_features) Training input samples. - y : ndarray of shape (n_samples,) - Target labels. + Target labels associated with each training sample. Returns ------- - self : OneNearestNeighbor + self : object Fitted estimator. """ - # validate_data will: - # - check shapes and types - # - set self.n_features_in_ - X, y = validate_data(self, X, y) + X, y = check_X_y(X, y) check_classification_targets(y) + self.classes_ = np.unique(y) + self.n_features_in_ = X.shape[1] + # XXX fix self.X_ = X self.y_ = y - self.classes_ = np.unique(y) return self def predict(self, X): - """Predict class labels for samples in X. + """Predict class labels for given samples. Parameters ---------- X : ndarray of shape (n_samples, n_features) - Input samples. + Samples for which to predict labels. Returns ------- y_pred : ndarray of shape (n_samples,) - Predicted labels for each input sample. + Predicted class label for each sample. """ check_is_fitted(self) + X = check_array(X) + y_pred = np.full( + shape=len(X), fill_value=self.classes_[0], + dtype=self.classes_.dtype + ) - # reset=False -> do not overwrite n_features_in_, - # but check consistency with what was seen in fit. - X = validate_data(self, X, reset=False) + # XXX fix + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but OneNearestNeighbor " + f"is expecting {self.n_features_in_} features as input" + ) - # Pairwise Euclidean distances: shape (n_test, n_train) - distances = np.linalg.norm( - X[:, np.newaxis, :] - self.X_[np.newaxis, :, :], - axis=2, - ) + diff = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :] + distances = np.sum(diff ** 2, axis=2) - # Index of closest training point for each test sample - nn_idx = np.argmin(distances, axis=1) + nearest_idx = np.argmin(distances, axis=1) + y_pred[:] = self.y_[nearest_idx] - return self.y_[nn_idx] + return y_pred def score(self, X, y): - """Compute the accuracy of predictions on X compared to y. + """Compute accuracy of the classifier. Parameters ---------- X : ndarray of shape (n_samples, n_features) - Test input samples. - + Test samples. y : ndarray of shape (n_samples,) - True labels. + True target labels. Returns ------- score : float - Accuracy = proportion of correctly classified samples. + Accuracy of predictions: fraction of correctly classified samples. """ X, y = check_X_y(X, y) y_pred = self.predict(X) + + # XXX fix return np.mean(y_pred == y)