From 72912821cb5ef7fecf0ffa69dcb505361676f931 Mon Sep 17 00:00:00 2001 From: coasensi Date: Thu, 13 Nov 2025 16:21:31 +0100 Subject: [PATCH 1/4] questions --- numpy_questions.py | 23 ++++++++++++++++++- sklearn_questions.py | 54 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..1faff71f 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -39,6 +39,15 @@ def max_index(X): """ i = 0 j = 0 + max = 0 + + for row in range(X.shape[0]): + for column in range(X.shape[1]): + if X[row,column] > max: + i = row + j = column + max = X[row,column] + # TODO @@ -64,4 +73,16 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + + pi=2 + + + if n_terms == 0: + n_terms = 1 + + for i in range(1,n_terms): + pi *= (4*i**2)/(4*i**2-1) + + return pi + + diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..f20cfa5b 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -38,19 +38,47 @@ def fit(self, X, y): """Write docstring. And describe parameters + + Parameters + ----------- + self : instance of the class (OneNearestNeighbor) + + X : matrix of the features ; independent and explanatory variables + + y : matrix of the explained variable + + Returns + ------- + self : fitted estimator + """ X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] - + # XXX fix + + self.X_train_ = X + self.y_train_ = y return self def predict(self, X): """Write docstring. And describe parameters + + Parameters + ---------- + + self : instance of the class (OneNearestNeighbor) + + X : features matrix + + Returns + ------- + + y_pred : predictions for y based on inputted X """ check_is_fitted(self) X = check_array(X) @@ -60,15 +88,37 @@ def predict(self, X): ) # XXX fix + + for i, X in enumerate(X): + #compute distance to all training samples + distances = np.linalg.norm(self.X_train_ - x, axis=1) + #pick smallest distance index + index_min = np.argmin(distances) + #assign the label + y_pred[i] = self.y_train_[index_min] return y_pred def score(self, X, y): """Write docstring. And describe parameters + + Parameters + ---------- + + self : instance of the class (OneNearestNeighbor) + + X : features matrix + + y : explained variables matrix + + Returns + ------- + + """ X, y = check_X_y(X, y) y_pred = self.predict(X) # XXX fix - return y_pred.sum() + return np.mean(y_pred == y) From d57f18c2fe013c2e54803f9574d22ec6e2b4b9de Mon Sep 17 00:00:00 2001 From: coasensi Date: Thu, 13 Nov 2025 17:48:58 +0100 Subject: [PATCH 2/4] commit 2 --- numpy_questions.py | 41 +++++++++---------------- sklearn_questions.py | 72 ++++++++++++++++++-------------------------- 2 files changed, 45 insertions(+), 68 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 1faff71f..e4bc7fce 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,22 +37,18 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - i = 0 - j = 0 - max = 0 + if X.ndim() != 2: + raise ValueError("input is not of dimension 2") + if type(X) is not np.ndarray: + raise ValueError("input is not np array") + + max = np.argmax(X) + n_columns = X.shape[1] - for row in range(X.shape[0]): - for column in range(X.shape[1]): - if X[row,column] > max: - i = row - j = column - max = X[row,column] - - - # TODO - - return i, j + row = max // n_columns + column = max % n_columns + return (row, column) def wallis_product(n_terms): """Implement the Wallis product to compute an approximation of pi. @@ -71,18 +67,11 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - - pi=2 - - if n_terms == 0: - n_terms = 1 - - for i in range(1,n_terms): - pi *= (4*i**2)/(4*i**2-1) - - return pi + return 1 + else: + n = 4 * np.arange(1, n_terms + 1) ** 2 + pi = 2 * np.prod(n / (n - 1)) + return pi diff --git a/sklearn_questions.py b/sklearn_questions.py index f20cfa5b..b86cfad4 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -35,90 +35,78 @@ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. - - And describe parameters + """ Parameters ----------- self : instance of the class (OneNearestNeighbor) - X : matrix of the features ; independent and explanatory variables + X : array of shape(n_samples, n_features) + matrix of the features; independent and explanatory variables - y : matrix of the explained variable + y : array of shape(n_samples,) + matrix of the explained variable Returns ------- - self : fitted estimator + self : object + fitted estimator """ X, y = check_X_y(X, y) check_classification_targets(y) + self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] - - # XXX fix self.X_train_ = X self.y_train_ = y + return self def predict(self, X): - """Write docstring. - - And describe parameters - + + """ Parameters ---------- - self : instance of the class (OneNearestNeighbor) - - X : features matrix + X : array of shape(n_samples, n_features) + matrix of the features; independent and explanatory variables Returns ------- - y_pred : predictions for y based on inputted X + y_pred : array of shape(n_samples,) + predictions for y """ check_is_fitted(self) X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype - ) - - # XXX fix - - for i, X in enumerate(X): - #compute distance to all training samples - distances = np.linalg.norm(self.X_train_ - x, axis=1) - #pick smallest distance index - index_min = np.argmin(distances) - #assign the label - y_pred[i] = self.y_train_[index_min] - return y_pred + y_pred = [] + if X.shape[1] != self.n_features_in_: + raise ValueError(f"X has {X.shape[1]} features but expects {self.n_features_in_} features as input") - def score(self, X, y): - """Write docstring. + for value in X: + distance = np.linalg.norm(self.X_train_ - value, axis=1) + index = np.argmin(distance) + y_pred.append(self.y_train_[index]) - And describe parameters + return np.array(y_pred) - Parameters + def score(self, X, y): + """Parameters ---------- - self : instance of the class (OneNearestNeighbor) - - X : features matrix + X : array of shape(n_samples, n_features) + matrix of the features; independent and explanatory variables - y : explained variables matrix + y : array of shape(n_samples,) + explained variable Returns ------- - + accuracy score : type float """ X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix return np.mean(y_pred == y) From e933f7ffe0e2d865e07fe7d3e4a117a885f57945 Mon Sep 17 00:00:00 2001 From: coasensi Date: Fri, 14 Nov 2025 12:24:46 +0100 Subject: [PATCH 3/4] tests --- numpy_questions.py | 13 +++++++------ sklearn_questions.py | 24 +++++++++++++++--------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index e4bc7fce..59e66dd7 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,11 +37,13 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - if X.ndim() != 2: - raise ValueError("input is not of dimension 2") + if type(X) is not np.ndarray: raise ValueError("input is not np array") - + + if X.ndim != 2: + raise ValueError("input is not of dimension 2") + max = np.argmax(X) n_columns = X.shape[1] @@ -50,6 +52,7 @@ def max_index(X): return (row, column) + def wallis_product(n_terms): """Implement the Wallis product to compute an approximation of pi. @@ -68,10 +71,8 @@ def wallis_product(n_terms): The approximation of order `n_terms` of pi using the Wallis product. """ if n_terms == 0: - return 1 + return 1 else: n = 4 * np.arange(1, n_terms + 1) ** 2 pi = 2 * np.prod(n / (n - 1)) return pi - - diff --git a/sklearn_questions.py b/sklearn_questions.py index b86cfad4..68e806af 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -22,13 +22,12 @@ import numpy as np from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array +from sklearn.utils.validation import validate_data from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): "OneNearestNeighbor classifier." def __init__(self): # noqa: D107 @@ -53,11 +52,13 @@ def fit(self, X, y): fitted estimator """ - X, y = check_X_y(X, y) + # X, y = check_X_y(X, y) + X, y = validate_data(self, X, y, ensure_2d=True) + check_classification_targets(y) self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] + # self.n_features_in_ = X.shape[1] self.X_train_ = X self.y_train_ = y @@ -65,7 +66,6 @@ def fit(self, X, y): return self def predict(self, X): - """ Parameters ---------- @@ -80,10 +80,15 @@ def predict(self, X): predictions for y """ check_is_fitted(self) - X = check_array(X) + + # X = check_array(X) + X = validate_data(self, X, reset=False, ensure_2d=True) + y_pred = [] if X.shape[1] != self.n_features_in_: - raise ValueError(f"X has {X.shape[1]} features but expects {self.n_features_in_} features as input") + raise ValueError( + f"""X has {X.shape[1]} features but expects + {self.n_features_in_} features as input""") for value in X: distance = np.linalg.norm(self.X_train_ - value, axis=1) @@ -107,6 +112,7 @@ def score(self, X, y): accuracy score : type float """ - X, y = check_X_y(X, y) + check_is_fitted(self) + X, y = validate_data(self, X, y, ensure_2d=True, reset=False) y_pred = self.predict(X) return np.mean(y_pred == y) From 7167b068ca96681a0c706cd479d16a5885c2f8db Mon Sep 17 00:00:00 2001 From: charlie Date: Fri, 14 Nov 2025 14:20:08 +0100 Subject: [PATCH 4/4] tests + python 3.8 compatibility --- numpy_questions.py | 1 - sklearn_questions.py | 26 ++++++++++++-------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 59e66dd7..2b964fb3 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,7 +37,6 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - if type(X) is not np.ndarray: raise ValueError("input is not np array") diff --git a/sklearn_questions.py b/sklearn_questions.py index 68e806af..477e5f35 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -22,20 +22,20 @@ import numpy as np from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin -from sklearn.utils.validation import validate_data +from sklearn.utils.validation import check_X_y, check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - "OneNearestNeighbor classifier." + """OneNearestNeighbor classifier.""" def __init__(self): # noqa: D107 pass def fit(self, X, y): + """Fit the OneNearestNeighbor model. - """ Parameters ----------- self : instance of the class (OneNearestNeighbor) @@ -52,13 +52,12 @@ def fit(self, X, y): fitted estimator """ - # X, y = check_X_y(X, y) - X, y = validate_data(self, X, y, ensure_2d=True) + X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) - # self.n_features_in_ = X.shape[1] + self.n_features_in_ = X.shape[1] self.X_train_ = X self.y_train_ = y @@ -66,23 +65,21 @@ def fit(self, X, y): return self def predict(self, X): - """ + """Predict the class. + Parameters ---------- - X : array of shape(n_samples, n_features) matrix of the features; independent and explanatory variables Returns ------- - y_pred : array of shape(n_samples,) predictions for y """ check_is_fitted(self) - # X = check_array(X) - X = validate_data(self, X, reset=False, ensure_2d=True) + X = check_array(X) y_pred = [] if X.shape[1] != self.n_features_in_: @@ -98,9 +95,10 @@ def predict(self, X): return np.array(y_pred) def score(self, X, y): - """Parameters - ---------- + """Compute accuracy score. + Parameters + ---------- X : array of shape(n_samples, n_features) matrix of the features; independent and explanatory variables @@ -113,6 +111,6 @@ def score(self, X, y): """ check_is_fitted(self) - X, y = validate_data(self, X, y, ensure_2d=True, reset=False) + X, y = check_X_y(X, y) y_pred = self.predict(X) return np.mean(y_pred == y)