Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,18 @@ def max_index(X):
If the input is not a numpy array or
if the shape is not 2D.
"""
i = 0
j = 0
# Validate input
if not isinstance(X, np.ndarray):
raise ValueError("X must be a numpy ndarray")
if X.ndim != 2:
raise ValueError(
"X must be a 2D array of shape (n_samples, n_features)"
)

# TODO

return i, j
# Find flat index of max then convert to 2D indices
flat_idx = np.argmax(X)
i, j = np.unravel_index(flat_idx, X.shape)
return int(i), int(j)


def wallis_product(n_terms):
Expand All @@ -64,4 +70,12 @@ def wallis_product(n_terms):
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
if n_terms == 0:
# By convention the empty product equals 1
return 1.0

n = np.arange(1, int(n_terms) + 1, dtype=float)
terms = (4 * n * n) / (4 * n * n - 1)
product = np.prod(terms)
# pi = 2 * product
return float(2.0 * product)
151 changes: 124 additions & 27 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,53 +22,150 @@
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_X_y
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.multiclass import check_classification_targets


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
# Compatibility shim for scikit-learn versions without `validate_data`
try: # pragma: no cover - import behavior depends on sklearn version
from sklearn.utils.validation import validate_data as _sk_validate_data
except Exception: # pragma: no cover
_sk_validate_data = None


def _validate_data(estimator, X, y=None, reset=True):
"""Validate X and optional y with sklearn or a local fallback.

- If sklearn provides `validate_data`, delegate to it.
- Otherwise, use `check_X_y` / `check_array` and ensure `n_features_in_`
is set during fit (reset=True) and enforced during predict/score
(reset=False) with the standard error message format expected by
estimator checks.
"""
if _sk_validate_data is not None:
if y is None:
return _sk_validate_data(estimator, X, reset=reset)
return _sk_validate_data(estimator, X, y, reset=reset)

# Fallback for older scikit-learn versions
if y is None:
X_checked = check_array(X)
if reset:
estimator.n_features_in_ = X_checked.shape[1]
else:
if (
hasattr(estimator, "n_features_in_")
and X_checked.shape[1] != estimator.n_features_in_
):
raise ValueError(
f"X has {X_checked.shape[1]} features, but "
f"{estimator.__class__.__name__} is expecting "
f"{estimator.n_features_in_} features as input"
)
return X_checked
else:
X_checked, y_checked = check_X_y(X, y)
if reset:
estimator.n_features_in_ = X_checked.shape[1]
else:
if (
hasattr(estimator, "n_features_in_")
and X_checked.shape[1] != estimator.n_features_in_
):
raise ValueError(
f"X has {X_checked.shape[1]} features, but "
f"{estimator.__class__.__name__} is expecting "
f"{estimator.n_features_in_} features as input"
)
return X_checked, y_checked


class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
"""One-nearest neighbor classifier.

This estimator assigns to each input sample the target of the closest
training sample using the Euclidean distance.

The classifier exposes `classes_` and `n_features_in_` after fitting and
follows the scikit-learn estimator API.
"""

def __init__(self): # noqa: D107
pass

def fit(self, X, y):
"""Write docstring.
# No custom tags to maximize cross-version compatibility

And describe parameters
def fit(self, X, y):
"""Fit the classifier on the training data.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Training data.
y : ndarray of shape (n_samples,)
Target labels.

Returns
-------
self : OneNearestNeighbor
Fitted estimator.
"""
X, y = check_X_y(X, y)
if y is None:
# Be tolerant for older/newer sklearn checks that may call
# fit(X, None) when requires_y tag is not enforced.
X = _validate_data(self, X, reset=True)
self.X_ = X
self.y_ = None
self.n_features_in_ = X.shape[1]
return self
X, y = _validate_data(self, X, y)
check_classification_targets(y)
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
# store training set for nearest neighbor lookup
self.X_ = X
self.y_ = y
return self

def predict(self, X):
"""Write docstring.
"""Predict class labels for samples in X.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Input samples.

And describe parameters
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels.
"""
check_is_fitted(self)
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
return y_pred
X = _validate_data(self, X, reset=False)
# Compute pairwise squared Euclidean distances efficiently
A = np.sum(self.X_ ** 2, axis=1)[None, :] # shape (1, n_train)
B = np.sum(X ** 2, axis=1)[:, None] # shape (n_test, 1)
C = X @ self.X_.T # shape (n_test, n_train)
d2 = A + B - 2 * C
nn_index = np.argmin(d2, axis=1)
return self.y_[nn_index]

def score(self, X, y):
"""Write docstring.

And describe parameters
"""Return the mean accuracy on the given test data and labels.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Test samples.
y : ndarray of shape (n_samples,)
True labels for X.

Returns
-------
score : float
Mean accuracy of predictions on X with respect to y.
"""
X, y = check_X_y(X, y)
X, y = _validate_data(self, X, y, reset=False)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
return float(np.mean(y_pred == y))