From 8736b0405573231a221e1c63fa46fa896604d8bf Mon Sep 17 00:00:00 2001 From: blanchemahe Date: Fri, 14 Nov 2025 13:32:22 +0100 Subject: [PATCH 1/4] added X after my name --- students.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/students.txt b/students.txt index 7aeb6a0d..77b0d545 100644 --- a/students.txt +++ b/students.txt @@ -60,7 +60,7 @@ Liard Eléanor Liu Guangyue X Liu Yunxian Lucille Maximilien X -Mahé Blanche +Mahé Blanche X Martin Justin X Massias Mathurin Massoud Alexandre.....X From 20b8aa9fcd51ad6fa9a27f733121b05992417280 Mon Sep 17 00:00:00 2001 From: blanchemahe Date: Sun, 16 Nov 2025 10:28:52 +0100 Subject: [PATCH 2/4] completed numpy and sklearn questions --- numpy_questions.py | 20 ++++++++++++++- sklearn_questions.py | 58 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..8f43d2a0 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -41,6 +41,18 @@ def max_index(X): j = 0 # TODO + if not isinstance(X, np.ndarray): + raise ValueError + if X.ndim != 2: + raise ValueError + + max = X[i,j] + for index_row in range(X.shape[0]): + for index_col in range(X.shape[1]): + if X[index_row, index_col] > max: + max = X[index_row, index_col] + i = index_row + j = index_col return i, j @@ -64,4 +76,10 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + if n_terms == 0: + return 1 + + pi_over_2 = 1 + for n in range(1, n_terms+1): + pi_over_2 *= (4*(n**2))/(4*(n**2)-1) + return 2*pi_over_2 diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..608377b5 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -35,9 +35,20 @@ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. - - And describe parameters + """Fit the OneNearestNeighbor classifier on training data. + + Parameters + ---------- + self : defines the instance of the class OneNearestNeighbor we are + working on + X : ndarray of the training data, + with shape (n_observations, p_features) + y : 1-darray of the labels associated with each dimension of X, + with shape (n_observations) + + Returns + ------- + self : maintains the instance """ X, y = check_X_y(X, y) check_classification_targets(y) @@ -45,12 +56,23 @@ def fit(self, X, y): self.n_features_in_ = X.shape[1] # XXX fix + self.X_ = X + self.y_ = y # store X and y as data the model has "learned", + # ensure we have trained on X and y return self def predict(self, X): - """Write docstring. + """Predict y label for input data X using the OneNearestNeighbor rule + + Parameters + ------- + self : still maintain the instance of the class + X : ndarray of test data, with shape (n_observations, p_features) - And describe parameters + Returns + ------- + y_pred : 1-darray of labels predicted for the test data X, + with shape (n_observations,) """ check_is_fitted(self) X = check_array(X) @@ -60,15 +82,33 @@ def predict(self, X): ) # XXX fix + idx = 0 + for x in X: + euclidean_distances = np.sqrt(np.sum((self.X_ - x) ** 2, axis=1)) + NN_index = np.argmin(euclidean_distances) + NN = self.y_[NN_index] + y_pred[idx] = NN + idx += 1 return y_pred def score(self, X, y): - """Write docstring. - - And describe parameters + """Score the model performance by evaluating + the proportion of y_pred that were accurate predictions. + + Parameters + ------- + X : ndarray of test data, with shape (n_observations, p_features) + y : 1d-array of the true labels associated with test samples X, + with shape (n_observations,) + + Returns + ------- + a score : float in [0,1] + reflecting the proportion of accurate predictions """ X, y = check_X_y(X, y) y_pred = self.predict(X) # XXX fix - return y_pred.sum() + #return y_pred.sum() + return sum(y_pred == y)/len(y) From 915163cee6a075c812045e6eee62ca0c2cdc4da0 Mon Sep 17 00:00:00 2001 From: blanchemahe Date: Sun, 16 Nov 2025 11:33:24 +0100 Subject: [PATCH 3/4] amended code woth flake8 and pydocstyle tests locally --- numpy_questions.py | 4 ++-- sklearn_questions.py | 24 +++++++++++------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 8f43d2a0..e2b0885d 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -46,7 +46,7 @@ def max_index(X): if X.ndim != 2: raise ValueError - max = X[i,j] + max = X[i, j] for index_row in range(X.shape[0]): for index_col in range(X.shape[1]): if X[index_row, index_col] > max: @@ -78,7 +78,7 @@ def wallis_product(n_terms): # terms in the product. For example 10000. if n_terms == 0: return 1 - + pi_over_2 = 1 for n in range(1, n_terms+1): pi_over_2 *= (4*(n**2))/(4*(n**2)-1) diff --git a/sklearn_questions.py b/sklearn_questions.py index 608377b5..f9582d91 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -29,7 +29,7 @@ class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." + """OneNearestNeighbor classifier.""" def __init__(self): # noqa: D107 pass @@ -39,11 +39,11 @@ def fit(self, X, y): Parameters ---------- - self : defines the instance of the class OneNearestNeighbor we are + self : defines the instance of the class OneNearestNeighbor we are working on - X : ndarray of the training data, + X : ndarray of the training data, with shape (n_observations, p_features) - y : 1-darray of the labels associated with each dimension of X, + y : 1-darray of the labels associated with each dimension of X, with shape (n_observations) Returns @@ -57,12 +57,12 @@ def fit(self, X, y): # XXX fix self.X_ = X - self.y_ = y # store X and y as data the model has "learned", - # ensure we have trained on X and y + self.y_ = y # store X and y as "learned" data, + # ensure we have trained on X and y return self def predict(self, X): - """Predict y label for input data X using the OneNearestNeighbor rule + """Predict y label for input data X using the OneNearestNeighbor rule. Parameters ------- @@ -71,7 +71,7 @@ def predict(self, X): Returns ------- - y_pred : 1-darray of labels predicted for the test data X, + y_pred : 1-darray of labels predicted for the test data X, with shape (n_observations,) """ check_is_fitted(self) @@ -92,23 +92,21 @@ def predict(self, X): return y_pred def score(self, X, y): - """Score the model performance by evaluating - the proportion of y_pred that were accurate predictions. + """Score model performance by evaluating proportion of accurate y_pred. Parameters ------- X : ndarray of test data, with shape (n_observations, p_features) - y : 1d-array of the true labels associated with test samples X, + y : 1d-array of the true labels associated with test samples X, with shape (n_observations,) Returns ------- - a score : float in [0,1] + a score : float in [0,1] reflecting the proportion of accurate predictions """ X, y = check_X_y(X, y) y_pred = self.predict(X) # XXX fix - #return y_pred.sum() return sum(y_pred == y)/len(y) From b22d0ebaf4c4a5fac9caf928fe35af1d1a97e949 Mon Sep 17 00:00:00 2001 From: blanchemahe Date: Sun, 16 Nov 2025 11:43:04 +0100 Subject: [PATCH 4/4] fixed inconsistency with flake 8 --- sklearn_questions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index f9582d91..eedf237e 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -57,7 +57,7 @@ def fit(self, X, y): # XXX fix self.X_ = X - self.y_ = y # store X and y as "learned" data, + self.y_ = y # store X and y as "learned" data, # ensure we have trained on X and y return self