Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions numpy_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
This will be enforced with `flake8`. You can check that there is no flake8
errors by calling `flake8` at the root of the repo.
"""

import numpy as np


Expand All @@ -40,7 +41,12 @@ def max_index(X):
i = 0
j = 0

# TODO
if not isinstance(X, np.ndarray):
raise ValueError("Input is not a numpy array")
if X.ndim != 2:
raise ValueError("Input array is not 2D")

i, j = np.unravel_index(np.argmax(X), X.shape)

return i, j

Expand All @@ -62,6 +68,12 @@ def wallis_product(n_terms):
pi : float
The approximation of order `n_terms` of pi using the Wallis product.
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
pi_half = 1.0

if n_terms == 0:
return pi_half

for n in range(1, n_terms + 1):
pi_half *= (4 * n * n) / (4 * n * n - 1)

return pi_half * 2
110 changes: 89 additions & 21 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
for the methods you code and for the class. The docstring will be checked using
`pydocstyle` that you can also call at the root of the repo.
"""

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
Expand All @@ -28,47 +29,114 @@
from sklearn.utils.multiclass import check_classification_targets


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
class OneNearestNeighbor(ClassifierMixin, BaseEstimator):
"""One-Nearest-Neighbor classifier.

This classifier assigns to each test sample the label of the
single closest training sample, using the Euclidean distance.

Methods
-------
fit(X, y)
Store the training data and labels.
predict(X)
Predict class labels for the input samples X.
score(X, y)
Compute the mean accuracy on the given test data and labels.
"""

def __init__(self): # noqa: D107
"""Initialize the OneNearestNeighbor classifier."""
pass

def fit(self, X, y):
"""Write docstring.

And describe parameters
"""Fit the OneNearestNeighbor model from the training dataset.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values.

Returns
-------
self : object
Fitted estimator.
"""
X, y = check_X_y(X, y)
# _validate_data handles n_features_in_ and consistent validation
X, y = check_X_y(
X,
y,
accept_sparse=False,
ensure_2d=True,
dtype=[np.float64, np.float32, np.int64, np.int32],
)
check_classification_targets(y)

self.X_train_ = X
self.y_train_ = y
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
return self

def predict(self, X):
"""Write docstring.
"""Predict class labels for samples in X.

And describe parameters
Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.

Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels for each sample.
"""
check_is_fitted(self)
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
check_is_fitted(self, ["X_train_", "y_train_", "n_features_in_"])

# Use check_array to enforce numeric dtype and check n_features
X = check_array(
X,
accept_sparse=False,
ensure_2d=True,
dtype=[np.float64, np.float32, np.int64, np.int32],
)

# Check feature consistency
if X.shape[1] != self.n_features_in_:
raise ValueError(
f"X has {X.shape[1]} features, but OneNearestNeighbor is "
f"expecting {self.n_features_in_} features as input."
)

# Compute Euclidean distances: shape (n_samples_test, n_samples_train)
distances = np.linalg.norm(
self.X_train_[np.newaxis, :, :] - X[:, np.newaxis, :],
axis=2,
)

# XXX fix
nearest_idx = np.argmin(distances, axis=1)
y_pred = self.y_train_[nearest_idx]

return y_pred

def score(self, X, y):
"""Write docstring.

And describe parameters
"""Return the mean accuracy on the given test data and labels.

Parameters
----------
X : array-like of shape (n_samples, n_features)
Test samples.
y : array-like of shape (n_samples,)
True labels for X.

Returns
-------
score : float
Mean accuracy of predictions.
"""
X, y = check_X_y(X, y)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
return np.mean(y_pred == y)