Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
This will be enforced with `flake8`. You can check that there is no flake8
errors by calling `flake8` at the root of the repo.
"""

import numpy as np


def max_index(X):
"""Return the index of the maximum in a numpy array.
"""Return the index of the maximum in a 2D numpy array.

Parameters
----------
Expand All @@ -37,12 +38,18 @@ def max_index(X):
If the input is not a numpy array or
if the shape is not 2D.
"""
# checking if the input is in correct format
if not isinstance(X, np.ndarray):
raise ValueError("Input X must be a numpy array")
if X.ndim != 2:
raise ValueError("Input X's shape must be 2D")

i = 0
j = 0

# TODO
i, j = np.unravel_index(np.argmax(X), X.shape)

return i, j
return int(i), int(j)


def wallis_product(n_terms):
Expand All @@ -62,6 +69,10 @@ def wallis_product(n_terms):
pi : float
The approximation of order `n_terms` of pi using the Wallis product.
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
if n_terms == 0:
return 1 # the product is equal to '1' if the number of terms is null

prod = 1
for n in range(1, n_terms+1):
prod *= 4 * (n**2) / (4 * (n**2) - 1)
return prod*2
76 changes: 58 additions & 18 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
for the methods you code and for the class. The docstring will be checked using
`pydocstyle` that you can also call at the root of the repo.
"""

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
Expand All @@ -29,46 +30,85 @@


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
"""OneNearestNeighbor classifier."""

def __init__(self): # noqa: D107
pass
pass # no parameters

def fit(self, X, y):
"""Write docstring.
"""Fit the OneNearestNeighbor classifier.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Input array.

y : ndarray of shape (n_samples,)
True labels of X.

And describe parameters
Returns
-------
self : object
Fitted estimator.
"""
X, y = check_X_y(X, y)
check_classification_targets(y)
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]
X, y = check_X_y(X, y) # check format X and y
check_classification_targets(y) # check y has classification labels
self.classes_ = np.unique(y) # stocking in order y values
self.n_features_in_ = X.shape[1] # nber columns of X

# storing the training data
self.X_ = X
self.y_ = y

# XXX fix
return self

def predict(self, X):
"""Write docstring.
"""Predict class labels of input samples.

And describe parameters
Parameters
----------
X : ndarray of shape (n_samples, n_features)
Input array whose labels need to be predicted.

Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels.
"""
check_is_fitted(self)
X = check_array(X)
check_is_fitted(self) # check fit() was called before
X = check_array(X) # check format of X
# create an empty table to fill it with predictions
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
for i in range(len(X)):
dist = np.linalg.norm(self.X_ - X[i], axis=1) # Euclidian distance
nearest_index = np.argmin(dist) # index smallest distance
y_pred[i] = self.y_[nearest_index] # label of closest

return y_pred

def score(self, X, y):
"""Write docstring.
"""Compute accuracy score of OneNearestNeighbor classifier.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Test samples.

y : ndarray of shape (n_samples,)
True labels of test samples (X).

And describe parameters
Returns
-------
score : float
Classification accuracy score.
"""
self._check_n_features(X, reset=False) # check nber features match X
X, y = check_X_y(X, y)
y_pred = self.predict(X)
y_pred = self.predict(X) # make prediction of labels
y_pred = (y_pred == y).astype(int)/len(y) # compare prediction to true

# XXX fix
return y_pred.sum()