Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ def max_index(X):
If the input is not a numpy array or
if the shape is not 2D.
"""
i = 0
j = 0
if not isinstance(X, np.ndarray):
raise ValueError("Input must be a numpy array.")
if X.ndim != 2:
raise ValueError(f"Input array must be 2D, not {X.ndim} dimensions.")

# TODO
index = np.argmax(X)
(i, j) = np.unravel_index(index, X.shape)

return i, j

Expand All @@ -62,6 +65,15 @@ def wallis_product(n_terms):
pi : float
The approximation of order `n_terms` of pi using the Wallis product.
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
if n_terms == 0:
pi = 1.0

else:
product = 1.0
for n in range(1, n_terms + 1):
num = 4.0 * n ** 2
den = num - 1.0
product *= (num / den)
pi = 2.0 * product

return pi
103 changes: 66 additions & 37 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,9 @@
"""Assignment - making a sklearn estimator.

The goal of this assignment is to implement by yourself a scikit-learn
estimator for the OneNearestNeighbor and check that it is working properly.

The nearest neighbor classifier predicts for a point X_i the target y_k of
the training sample X_k which is the closest to X_i. We measure proximity with
the Euclidean distance. The model will be evaluated with the accuracy (average
number of samples corectly classified). You need to implement the `fit`,
`predict` and `score` methods for this class. The code you write should pass
the test we implemented. You can run the tests by calling at the root of the
repo `pytest test_sklearn_questions.py`.

We also ask to respect the pep8 convention: https://pep8.org. This will be
enforced with `flake8`. You can check that there is no flake8 errors by
calling `flake8` at the root of the repo.

Finally, you need to write docstring similar to the one in `numpy_questions`
for the methods you code and for the class. The docstring will be checked using
`pydocstyle` that you can also call at the root of the repo.
"""
Implementation of a One-Nearest Neighbor classifier.

Implementation of a One-Nearest Neighbor classifier adhering to
the scikit-learn estimator interface.

"""
import numpy as np
from sklearn.base import BaseEstimator
Expand All @@ -29,46 +15,89 @@


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
"""One-Nearest-Neighbor classifier.

This classifier predicts the class of a sample by finding the
closest sample in the training data (using Euclidean distance)
and assigning its class.
"""

def __init__(self): # noqa: D107
"""Initialize the OneNearestNeighbor classifier."""
pass

def fit(self, X, y):
"""Write docstring.

And describe parameters
"""Fit the One-Nearest-Neighbor classifier.

This method stores the training data (X and y) to be used
during prediction.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values.

Returns
-------
self : object
Returns the instance itself.
"""
X, y = check_X_y(X, y)
check_classification_targets(y)
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
self.X_ = X
self.y_ = y

return self

def predict(self, X):
"""Write docstring.
"""Predict the class labels for provided data.

And describe parameters
For each sample in X, finds the closest training sample
(using Euclidean distance) and returns its label.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.

Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels.
"""
check_is_fitted(self)
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
y_pred = np.empty(shape=len(X), dtype=self.y_.dtype)

for i, x_test in enumerate(X):
sq_distances = np.sum((self.X_ - x_test)**2, axis=1)
nearest_index = np.argmin(sq_distances)
y_pred[i] = self.y_[nearest_index]

return y_pred

def score(self, X, y):
"""Write docstring.

And describe parameters
"""Return the mean accuracy on the given test data and labels.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.
y : array-like of shape (n_samples,)
True labels for X.

Returns
-------
score : float
Mean accuracy of self.predict(X) wrt. y.
"""
X, y = check_X_y(X, y)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
return np.mean(y_pred == y)