From 9757ba78d812c75b89dedcddcb12627698c073d4 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chauhan Date: Sun, 16 Nov 2025 15:40:59 +0100 Subject: [PATCH 1/6] Complete Part B: numpy and sklearn questions --- numpy_questions.py | 24 ++++++++++++-------- sklearn_questions.py | 54 +++++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 32 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..98a30318 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -37,10 +37,13 @@ def max_index(X): If the input is not a numpy array or if the shape is not 2D. """ - i = 0 - j = 0 + if not isinstance(X, np.ndarray): + raise ValueError("Input must be a numpy array.") + if X.ndim != 2: + raise ValueError("Input must be a 2D array.") - # TODO + flat_index = np.argmax(X) + i, j = np.unravel_index(flat_index, X.shape) return i, j @@ -48,9 +51,6 @@ def max_index(X): def wallis_product(n_terms): """Implement the Wallis product to compute an approximation of pi. - See: - https://en.wikipedia.org/wiki/Wallis_product - Parameters ---------- n_terms : int @@ -62,6 +62,12 @@ def wallis_product(n_terms): pi : float The approximation of order `n_terms` of pi using the Wallis product. """ - # XXX : The n_terms is an int that corresponds to the number of - # terms in the product. For example 10000. - return 0. + if n_terms == 0: + return 1.0 + + product = 1.0 + + for k in range(1, n_terms + 1): + product *= (4 * k * k) / (4 * k * k - 1) + + return 2 * product diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..5969ef29 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -26,49 +26,57 @@ from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import validate_data -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): "OneNearestNeighbor classifier." def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. + """Fit the OneNearestNeighbor classifier.""" + X, y = validate_data( + self, X, y, + dtype="numeric", + ensure_2d=True + ) - And describe parameters - """ - X, y = check_X_y(X, y) check_classification_targets(y) + + # Store training data + self.X_ = X + self.y_ = y + + # Unique classes self.classes_ = np.unique(y) - self.n_features_in_ = X.shape[1] - # XXX fix return self def predict(self, X): - """Write docstring. - - And describe parameters - """ + """Predict class labels using 1-nearest neighbor.""" check_is_fitted(self) - X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype + + X = validate_data( + self, X, + dtype="numeric", + ensure_2d=True, + reset=False ) - # XXX fix + n_test = X.shape[0] + y_pred = np.empty(n_test, dtype=self.y_.dtype) + + for i in range(n_test): + distances = np.sum((self.X_ - X[i]) ** 2, axis=1) + nn_index = np.argmin(distances) + y_pred[i] = self.y_[nn_index] + return y_pred def score(self, X, y): - """Write docstring. - - And describe parameters - """ + """Return mean accuracy.""" X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return np.mean(y_pred == y) From 8e26399f74ce43cee97da22781745a31303debcc Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chauhan Date: Sun, 16 Nov 2025 15:50:45 +0100 Subject: [PATCH 2/6] Bhavesh: Fixed flake8 and pydocstyle issues --- numpy_questions.py | 1 - sklearn_questions.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 98a30318..0c2b4506 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -64,7 +64,6 @@ def wallis_product(n_terms): """ if n_terms == 0: return 1.0 - product = 1.0 for k in range(1, n_terms + 1): diff --git a/sklearn_questions.py b/sklearn_questions.py index 5969ef29..5bcc74d4 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -23,14 +23,13 @@ from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import validate_data class OneNearestNeighbor(ClassifierMixin, BaseEstimator): - "OneNearestNeighbor classifier." + """OneNearestNeighbor classifier.""" def __init__(self): # noqa: D107 pass From 2beb1c62fea549bed491e62c66422351cb1d4d5d Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chauhan Date: Sun, 16 Nov 2025 16:09:44 +0100 Subject: [PATCH 3/6] Bhavesh: CI Fix 1 --- sklearn_questions.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 5bcc74d4..a9c10b9d 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -26,6 +26,7 @@ from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets from sklearn.utils.validation import validate_data +from sklearn.utils.validation import check_array class OneNearestNeighbor(ClassifierMixin, BaseEstimator): @@ -36,11 +37,8 @@ def __init__(self): # noqa: D107 def fit(self, X, y): """Fit the OneNearestNeighbor classifier.""" - X, y = validate_data( - self, X, y, - dtype="numeric", - ensure_2d=True - ) + # Validate input (numeric only) + X, y = check_X_y(X, y, dtype="numeric") check_classification_targets(y) @@ -48,15 +46,19 @@ def fit(self, X, y): self.X_ = X self.y_ = y - # Unique classes + # Required sklearn attribute self.classes_ = np.unique(y) + # Required for compatibility with predict + self.n_features_in_ = X.shape[1] + return self def predict(self, X): """Predict class labels using 1-nearest neighbor.""" check_is_fitted(self) + # Validate input and check feature consistency X = validate_data( self, X, dtype="numeric", @@ -76,6 +78,6 @@ def predict(self, X): def score(self, X, y): """Return mean accuracy.""" - X, y = check_X_y(X, y) + X, y = check_X_y(X, y, dtype="numeric") y_pred = self.predict(X) return np.mean(y_pred == y) From 2354fb044e708809a01084dfa0c4224540d66c07 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chauhan Date: Sun, 16 Nov 2025 16:27:54 +0100 Subject: [PATCH 4/6] Bhavesh: CI Fix 2 --- sklearn_questions.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index a9c10b9d..f547d6cc 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -20,13 +20,37 @@ `pydocstyle` that you can also call at the root of the repo. """ import numpy as np + from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin + from sklearn.utils.validation import check_X_y +from sklearn.utils.validation import check_array from sklearn.utils.validation import check_is_fitted from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import validate_data -from sklearn.utils.validation import check_array + +# Try importing validate_data from newer sklearn +try: + from sklearn.utils.validation import validate_data +except ImportError: + # Fallback validate_data for older sklearn versions (used on CI) + def validate_data(estimator, X, y=None, **kwargs): + """Fallback implementation of validate_data + for older sklearn versions.""" + if y is not None: + X, y = check_X_y(X, y, **kwargs) + estimator.n_features_in_ = X.shape[1] + return X, y + else: + X_checked = check_array(X, **kwargs) + if kwargs.get("reset") is False: + if X_checked.shape[1] != estimator.n_features_in_: + raise ValueError( + f"X has {X_checked.shape[1]} features, but " + f"{estimator.__class__.__name__} was fitted with " + f"{estimator.n_features_in_} features." + ) + return X_checked class OneNearestNeighbor(ClassifierMixin, BaseEstimator): From 6191e47743924c4cac16633b87db10fdce363a49 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chauhan Date: Sun, 16 Nov 2025 16:40:02 +0100 Subject: [PATCH 5/6] Bhavesh: CI Fix 3 --- sklearn_questions.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index f547d6cc..32abb587 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -35,8 +35,27 @@ except ImportError: # Fallback validate_data for older sklearn versions (used on CI) def validate_data(estimator, X, y=None, **kwargs): - """Fallback implementation of validate_data - for older sklearn versions.""" + """ + Fallback implementation of validate_data for older sklearn versions. + + Parameters + ---------- + estimator : estimator instance + The estimator calling this function. + X : array-like + Input data. + y : array-like, optional + Target values. + kwargs : dict + Additional arguments, such as dtype or ensure_2d. + + Returns + ------- + X : ndarray + Validated input data. + y : ndarray, optional + Validated target values when provided. + """ if y is not None: X, y = check_X_y(X, y, **kwargs) estimator.n_features_in_ = X.shape[1] From 6493ddb15ff2bfd90693dd5d22e15c1b01c72d34 Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chauhan Date: Sun, 16 Nov 2025 16:43:15 +0100 Subject: [PATCH 6/6] Bhavesh: CI Fix 4 --- sklearn_questions.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/sklearn_questions.py b/sklearn_questions.py index 32abb587..9df31309 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -47,28 +47,17 @@ def validate_data(estimator, X, y=None, **kwargs): y : array-like, optional Target values. kwargs : dict - Additional arguments, such as dtype or ensure_2d. - - Returns - ------- - X : ndarray - Validated input data. - y : ndarray, optional - Validated target values when provided. + Additional arguments (e.g., dtype, ensure_2d, reset). """ + # Remove unsupported kwargs for check_array/check_X_y + kwargs.pop("reset", None) + if y is not None: X, y = check_X_y(X, y, **kwargs) estimator.n_features_in_ = X.shape[1] return X, y else: X_checked = check_array(X, **kwargs) - if kwargs.get("reset") is False: - if X_checked.shape[1] != estimator.n_features_in_: - raise ValueError( - f"X has {X_checked.shape[1]} features, but " - f"{estimator.__class__.__name__} was fitted with " - f"{estimator.n_features_in_} features." - ) return X_checked