Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@ def max_index(X):
If the input is not a numpy array or
if the shape is not 2D.
"""
i = 0
j = 0
if not isinstance(X, np.ndarray):
raise ValueError("X must be a numpy array.")
if X.ndim != 2:
raise ValueError("X must be a 2D array.")

# TODO

return i, j
flat_idx = np.argmax(X)
i, j = np.unravel_index(flat_idx, X.shape)
return int(i), int(j)


def wallis_product(n_terms):
Expand All @@ -62,6 +64,13 @@ def wallis_product(n_terms):
pi : float
The approximation of order `n_terms` of pi using the Wallis product.
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
if n_terms < 0:
raise ValueError("n_terms must be non-negative.")

if n_terms == 0:
return 1.0

n = np.arange(1, n_terms + 1, dtype=float)
terms = (2 * n / (2 * n - 1)) * (2 * n / (2 * n + 1))
product = np.prod(terms)
return float(2 * product)
107 changes: 80 additions & 27 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,59 +16,112 @@
calling `flake8` at the root of the repo.

Finally, you need to write docstring similar to the one in `numpy_questions`
for the methods you code and for the class. The docstring will be checked using
`pydocstyle` that you can also call at the root of the repo.
for the methods you code and for the class. The docstring will be checked
using `pydocstyle` that you can also call at the root of the repo.
"""
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.utils.validation import check_X_y
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.base import ClassifierMixin, BaseEstimator
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import check_classification_targets


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
"""One-nearest-neighbor classifier.

This classifier predicts, for each input sample, the target of the
closest training sample in Euclidean distance.
"""

def __init__(self): # noqa: D107
"""Initialize the OneNearestNeighbor classifier."""
# This estimator has no hyper-parameters.
pass

def fit(self, X, y):
"""Write docstring.

And describe parameters
"""Fit the one-nearest-neighbor classifier.

The training samples and their labels are stored so that predictions
can be made by finding the closest training sample to each new point.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target labels.

Returns
-------
self : OneNearestNeighbor
Fitted estimator.
"""
X, y = check_X_y(X, y)
check_classification_targets(y)

# Store training data and targets
self.X_ = X
self.y_ = y

# Attributes expected by scikit-learn
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
return self

def predict(self, X):
"""Write docstring.
"""Predict class labels for samples in X.

Each sample in X is assigned the label of the closest training sample
stored during :meth:`fit`, using the Euclidean distance.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Input samples to classify.

And describe parameters
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels for each sample in X.
"""
check_is_fitted(self)
check_is_fitted(self, attributes=["X_", "y_", "n_features_in_"])
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
# Enforce consistency with the number of features seen during fit
n_features = X.shape[1]
if n_features != self.n_features_in_:
msg = (
f"X has {n_features} features, but {self.__class__.__name__} "
f"is expecting {self.n_features_in_} features as input"
)
raise ValueError(msg)

# Compute squared Euclidean distances to all training samples:
# diff shape: (n_samples_test, n_samples_train, n_features)
diff = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :]
distances = np.sum(diff ** 2, axis=2)

# Index of nearest neighbor in the training set for each test sample
nearest_idx = np.argmin(distances, axis=1)
y_pred = self.y_[nearest_idx]

return y_pred

def score(self, X, y):
"""Write docstring.

And describe parameters
"""Return the mean accuracy on the given test data and labels.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.
y : array-like of shape (n_samples,)
True labels for X.

Returns
-------
score : float
Mean accuracy of predictions on X with respect to y.
"""
X, y = check_X_y(X, y)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
return float(np.mean(y_pred == y))