Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 23 additions & 42 deletions numpy_questions.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,48 @@
"""Assignment - using numpy and making a PR.
"""Numpy related utility functions."""

The goals of this assignment are:
* Use numpy in practice with two easy exercises.
* Use automated tools to validate the code (`pytest` and `flake8`)
* Submit a Pull-Request on github to practice `git`.

The two functions below are skeleton functions. The docstrings explain what
are the inputs, the outputs and the expected error. Fill the function to
complete the assignment. The code should be able to pass the test that we
wrote. To run the tests, use `pytest test_numpy_questions.py` at the root of
the repo. It should say that 2 tests ran with success.

We also ask to respect the pep8 convention: https://pep8.org.
This will be enforced with `flake8`. You can check that there is no flake8
errors by calling `flake8` at the root of the repo.
"""
import numpy as np


def max_index(X):
"""Return the index of the maximum in a numpy array.
"""Return the indices of the maximum value in a 2D numpy array.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
The input array.
X : np.ndarray
Input 2D array

Returns
-------
(i, j) : tuple(int)
The row and columnd index of the maximum.

Raises
------
ValueError
If the input is not a numpy array or
if the shape is not 2D.
tuple
Tuple of (row_index, column_index) of the maximum element.
"""
i = 0
j = 0

# TODO
if not isinstance(X, np.ndarray):
raise ValueError("Input must be a numpy array")
if X.ndim != 2:
raise ValueError("Input must be a 2D array")

max_idx = np.argmax(X)
i, j = np.unravel_index(max_idx, X.shape)
return i, j


def wallis_product(n_terms):
"""Implement the Wallis product to compute an approximation of pi.

See:
https://en.wikipedia.org/wiki/Wallis_product
"""Compute approximation of pi using Wallis product formula.

Parameters
----------
n_terms : int
Number of steps in the Wallis product. Note that `n_terms=0` will
consider the product to be `1`.
Number of terms to include in the product.

Returns
-------
pi : float
The approximation of order `n_terms` of pi using the Wallis product.
float
Approximation of pi.
"""
# XXX : The n_terms is an int that corresponds to the number of
# terms in the product. For example 10000.
return 0.
if n_terms == 0:
return 1.0

product = 1.0
for k in range(1, n_terms + 1):
product *= (4 * k**2) / (4 * k**2 - 1)
return 2 * product
102 changes: 57 additions & 45 deletions sklearn_questions.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,86 @@
"""Assignment - making a sklearn estimator.

The goal of this assignment is to implement by yourself a scikit-learn
estimator for the OneNearestNeighbor and check that it is working properly.

The nearest neighbor classifier predicts for a point X_i the target y_k of
the training sample X_k which is the closest to X_i. We measure proximity with
the Euclidean distance. The model will be evaluated with the accuracy (average
number of samples corectly classified). You need to implement the `fit`,
`predict` and `score` methods for this class. The code you write should pass
the test we implemented. You can run the tests by calling at the root of the
repo `pytest test_sklearn_questions.py`.

We also ask to respect the pep8 convention: https://pep8.org. This will be
enforced with `flake8`. You can check that there is no flake8 errors by
calling `flake8` at the root of the repo.

Finally, you need to write docstring similar to the one in `numpy_questions`
for the methods you code and for the class. The docstring will be checked using
`pydocstyle` that you can also call at the root of the repo.
"""
"""Custom sklearn estimator: One Nearest Neighbor classifier."""

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.utils.validation import check_X_y
from sklearn.utils.validation import check_array
from sklearn.utils.validation import check_is_fitted
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import check_classification_targets


class OneNearestNeighbor(BaseEstimator, ClassifierMixin):
"OneNearestNeighbor classifier."
"""One Nearest Neighbor classifier.

Assigns to each sample the label of the closest training point
using Euclidean distance.
"""

def __init__(self): # noqa: D107
def __init__(self):
"""Initialize the classifier (no parameters)."""
pass

def fit(self, X, y):
"""Write docstring.
"""Fit the OneNearestNeighbor classifier.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Training data.

And describe parameters
y : ndarray of shape (n_samples,)
Target labels.

Returns
-------
self : object
Fitted classifier.
"""
X, y = check_X_y(X, y)
check_classification_targets(y)

self.X_train_ = X
self.y_train_ = y
self.classes_ = np.unique(y)
self.n_features_in_ = X.shape[1]

# XXX fix
return self

def predict(self, X):
"""Write docstring.
"""Predict class labels for samples in X.

And describe parameters
Parameters
----------
X : ndarray of shape (n_samples, n_features)
Input samples.

Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted labels.
"""
check_is_fitted(self)
X = check_array(X)
y_pred = np.full(
shape=len(X), fill_value=self.classes_[0],
dtype=self.classes_.dtype
)

# XXX fix
return y_pred
distances = np.linalg.norm(
X[:, None, :] - self.X_train_[None, :, :],
axis=2
)
nearest_idx = np.argmin(distances, axis=1)
return self.y_train_[nearest_idx]

def score(self, X, y):
"""Write docstring.

And describe parameters
"""Return accuracy score of the classifier.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
Input samples.
y : ndarray of shape (n_samples,)
True labels.

Returns
-------
float
Accuracy score.
"""
X, y = check_X_y(X, y)
y_pred = self.predict(X)

# XXX fix
return y_pred.sum()
return np.mean(y_pred == y)
7 changes: 7 additions & 0 deletions ssh-keygen -t ed25519 -C "你的GitHub邮箱"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
QyNTUxOQAAACC+nIN2ZmV7i3tLTK82iLZ55hTImb3J2vo2gxHCqFY1PgAAAJg6M29MOjNv
TAAAAAtzc2gtZWQyNTUxOQAAACC+nIN2ZmV7i3tLTK82iLZ55hTImb3J2vo2gxHCqFY1Pg
AAAEAvo6TJa/cpJpuuaNQAx+6V9yzlXpTyZVqILJlLmTsZh76cg3ZmZXuLe0tMrzaItnnm
FMiZvcna+jaDEcKoVjU+AAAAEuS9oOeahEdpdEh1YumCrueusQECAw==
-----END OPENSSH PRIVATE KEY-----
1 change: 1 addition & 0 deletions ssh-keygen -t ed25519 -C "你的GitHub邮箱".pub
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL6cg3ZmZXuLe0tMrzaItnnmFMiZvcna+jaDEcKoVjU+ 你的GitHub邮箱