diff --git a/numpy_questions.py b/numpy_questions.py index 21fcec4b..30b4b059 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -39,8 +39,16 @@ def max_index(X): """ i = 0 j = 0 - - # TODO + if X is None: + raise ValueError + if not isinstance(X, (np.ndarray)): + raise ValueError + if X.ndim != 2: + raise ValueError + for k in range(X.shape[0]): + for m in range(X.shape[0]): + if X[i, j] < X[k, m]: + i, j = k, m return i, j @@ -64,4 +72,9 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + if n_terms == 0: + return 1 + pi = 2 + for i in range(n_terms): + pi = pi * (4 * (i+1)**2)/((4 * (i+1)**2)-1) + return pi diff --git a/sklearn_questions.py b/sklearn_questions.py index f65038c6..a313cdeb 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -28,47 +28,91 @@ from sklearn.utils.multiclass import check_classification_targets -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." +class OneNearestNeighbor(ClassifierMixin, BaseEstimator): + """OneNearestNeighbor classifier. + + This classifier uses the 1-nearest neighbor method: + it assigns a new data point to the same class as the closest sample + in the training set, with closeness determined by Euclidean distance + """ def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring. - - And describe parameters + """Train the 1-nearest neighbor classifier. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Training data, where n_samples is the number of samples and + n_features is the number of features. + y : array-like of shape (n_samples,) + Target values. + + Returns + ------- + self : object + Returns self. """ X, y = check_X_y(X, y) check_classification_targets(y) self.classes_ = np.unique(y) self.n_features_in_ = X.shape[1] - # XXX fix + self.X_train_ = X + self.y_train_ = y return self def predict(self, X): - """Write docstring. - - And describe parameters + """Predict the class labels for the provided data. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Test samples, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + y_pred : ndarray of shape (n_samples,) + Class labels for each data sample. """ - check_is_fitted(self) + check_is_fitted(self, ['X_train_', 'y_train_']) X = check_array(X) + if X.shape[1] != self.n_features_in_: + raise ValueError( + f"X has {X.shape[1]} features, but OneNearestNeighbor " + f"is expecting {self.n_features_in_} features as input." + ) y_pred = np.full( shape=len(X), fill_value=self.classes_[0], dtype=self.classes_.dtype ) + for i, x in enumerate(X): + distances = np.sqrt(np.sum((self.X_train_ - x)**2, axis=1)) + nearest_idx = np.argmin(distances) + y_pred[i] = self.y_train_[nearest_idx] - # XXX fix return y_pred def score(self, X, y): - """Write docstring. - - And describe parameters + """Return the accuracy on the given test data and labels. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Test samples, where n_samples is the number of samples and + n_features is the number of features. + y : array-like of shape (n_samples,) + True labels for X. + + Returns + ------- + score : float + Accuracy of the classifier, which is the fraction of correctly + classified samples. """ X, y = check_X_y(X, y) y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() + return np.mean(y_pred == y)