Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
This will be enforced with `flake8`. You can check that there is no flake8
errors by calling `flake8` at the root of the repo.
"""

import numpy as np


Expand All @@ -37,12 +38,20 @@ def max_index(X):
If the input is not a numpy array or
if the shape is not 2D.
"""
i = 0
j = 0
if not isinstance(X, np.ndarray) or (X.ndim != 2):
raise ValueError

# TODO
max_i = 0
max_j = 0
max_val = X[0][0]

return i, j
for i in range(X.shape[0]):
for j in range(X.shape[1]):
if X[i][j] > max_val:
max_val = X[i][j]
max_i = i
max_j = j
return max_i, max_j


def wallis_product(n_terms):
Expand All @@ -64,4 +73,11 @@ def wallis_product(n_terms):
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
pi = 1

if n_terms == 0:
return pi
else:
for i in range(1, n_terms + 1):
pi = pi * (4 * i**2) / ((4 * i**2) - 1)
return 2 * pi
115 changes: 68 additions & 47 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,95 @@
"""Assignment - making a sklearn estimator.

The goal of this assignment is to implement by yourself a scikit-learn
estimator for the OneNearestNeighbor and check that it is working properly.

The nearest neighbor classifier predicts for a point X_i the target y_k of
the training sample X_k which is the closest to X_i. We measure proximity with
the Euclidean distance. The model will be evaluated with the accuracy (average
number of samples corectly classified). You need to implement the `fit`,
`predict` and `score` methods for this class. The code you write should pass
the test we implemented. You can run the tests by calling at the root of the
repo `pytest test_sklearn_questions.py`.

We also ask to respect the pep8 convention: https://pep8.org. This will be
enforced with `flake8`. You can check that there is no flake8 errors by
calling `flake8` at the root of the repo.

Finally, you need to write docstring similar to the one in `numpy_questions`
for the methods you code and for the class. The docstring will be checked using
`pydocstyle` that you can also call at the root of the repo.
"""
"""Assignment - making a sklearn estimator."""

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.utils.validation import check_X_y
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import check_classification_targets


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
"""OneNearestNeighbor classifier."""

def __init__(self): # noqa: D107
pass

def fit(self, X, y):
"""Write docstring.

And describe parameters
"""Fit the OneNearestNeighbor classifier.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training samples.
y : array-like of shape (n_samples,)
Target labels.

Returns
-------
self : object
Fitted estimator.
"""
X, y = check_X_y(X, y)
check_classification_targets(y)

self.X_ = X
self.y_ = y
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
return self

def predict(self, X):
"""Write docstring.

And describe parameters
"""Predict labels for the input samples.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to classify. Must have same number of features
as training data.

Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted labels for each input sample.
"""
check_is_fitted(self)
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
return y_pred
if X.shape[1] != self.n_features_in_:
raise ValueError(
f"X has {X.shape[1]} features, but "
f"{self.__class__.__name__} is expecting "
f"{self.n_features_in_} features as input."
)

def score(self, X, y):
"""Write docstring.
distances = np.linalg.norm(self.X_[None, :, :] - X[:, None, :], axis=2)
nearest_idx = np.argmin(distances, axis=1)

return self.y_[nearest_idx]

And describe parameters
def score(self, X, y):
"""Compute the accuracy of the classifier.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples used to evaluate the model.
y : array-like of shape (n_samples,)
True labels corresponding to X.

Returns
-------
score : float
Accuracy of predictions, between 0 and 1.
"""
check_is_fitted(self)
X, y = check_X_y(X, y)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
if X.shape[1] != self.n_features_in_:
raise ValueError(
f"X has {X.shape[1]} features, but "
f"{self.__class__.__name__} is expecting "
f"{self.n_features_in_} features as input."
)

y_pred = self.predict(X)
return np.mean(y_pred == y)