Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,16 @@ def max_index(X):
i = 0
j = 0

# TODO
if not isinstance(X, np.ndarray):
raise ValueError("Input must be a numpy array")

if X.ndim != 2:
raise ValueError("Input must be a 2D array")

flat_index = np.argmax(X)
i, j = np.unravel_index(flat_index, X.shape)

i, j = int(i), int(j)

return i, j

Expand All @@ -64,4 +73,11 @@ def wallis_product(n_terms):
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
if n_terms == 0:
return 1.0

product = 1.0
for n in range(1, n_terms + 1):
product *= (4 * n * n) / (4 * n * n - 1)

return 2 * product
93 changes: 68 additions & 25 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
The nearest neighbor classifier predicts for a point X_i the target y_k of
the training sample X_k which is the closest to X_i. We measure proximity with
the Euclidean distance. The model will be evaluated with the accuracy (average
number of samples corectly classified). You need to implement the `fit`,
number of samples correctly classified). You need to implement the `fit`,
`predict` and `score` methods for this class. The code you write should pass
the test we implemented. You can run the tests by calling at the root of the
repo `pytest test_sklearn_questions.py`.
Expand All @@ -20,55 +20,98 @@
`pydocstyle` that you can also call at the root of the repo.
"""
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.utils.validation import check_X_y
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import check_classification_targets


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
"""OneNearestNeighbor classifier.

This classifier predicts the class of a sample based on the class of
its nearest neighbor in the training set, using Euclidean distance.
"""

def __init__(self): # noqa: D107
pass

def fit(self, X, y):
"""Write docstring.

And describe parameters
"""Fit the OneNearestNeighbor classifier.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Training input samples.
y : ndarray of shape (n_samples,)
Target labels associated with each training sample.

Returns
-------
self : OneNearestNeighbor
Fitted estimator.
"""
X, y = check_X_y(X, y)
check_classification_targets(y)

self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
# Store training data
self.X_ = X
self.y_ = y

return self

def predict(self, X):
"""Write docstring.
"""Predict class labels for given samples.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Samples for which to predict labels.

And describe parameters
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class label for each sample.
"""
check_is_fitted(self)
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
# Check number of features against what was seen in fit
if X.shape[1] != self.n_features_in_:
raise ValueError(
f"X has {X.shape[1]} features, but {self.__class__.__name__} "
f"is expecting {self.n_features_in_} features as input"
)

# Compute squared Euclidean distances to all training samples
diff = X[:, np.newaxis, :] - self.X_[np.newaxis, :, :]
distances = np.sum(diff ** 2, axis=2)

# Index of nearest neighbor for each sample
nearest_idx = np.argmin(distances, axis=1)

# Predicted labels
y_pred = self.y_[nearest_idx]

return y_pred

def score(self, X, y):
"""Write docstring.

And describe parameters
"""Compute accuracy of the classifier.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Test samples.
y : ndarray of shape (n_samples,)
True target labels.

Returns
-------
score : float
Accuracy of predictions: fraction of correctly classified samples.
"""
X, y = check_X_y(X, y)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
return float(np.mean(y_pred == y))