From c350f0fa5cfb57a4d21f623fba348dc22dd52bca Mon Sep 17 00:00:00 2001 From: Tiziano Santoro Date: Fri, 30 Nov 2018 21:52:00 +0000 Subject: [PATCH] Autoformat crate This was done by running the `cargo fmt` command on the crate. --- benches/examples/cross_validation.rs | 30 ++- benches/examples/k_means.rs | 22 +- benches/examples/nnet.rs | 24 +- benches/examples/svm.rs | 23 +- benches/lib.rs | 2 +- examples/k-means_generating_cluster.rs | 31 +-- examples/naive_bayes_dogs.rs | 84 ++++--- examples/nnet-and_gate.rs | 24 +- examples/svm-sign_learner.rs | 16 +- src/analysis/confusion_matrix.rs | 52 ++-- src/analysis/cross_validation.rs | 124 ++++++---- src/analysis/score.rs | 83 ++++--- src/data/transforms/minmax.rs | 100 +++++--- src/data/transforms/mod.rs | 4 +- src/data/transforms/normalize.rs | 27 ++- src/data/transforms/shuffle.rs | 24 +- src/data/transforms/standardize.rs | 54 +++-- src/datasets/iris.rs | 21 +- src/datasets/mod.rs | 16 +- src/datasets/trees.rs | 17 +- src/learning/dbscan.rs | 66 +++-- src/learning/error.rs | 10 +- src/learning/glm.rs | 33 +-- src/learning/gmm.rs | 56 +++-- src/learning/gp.rs | 44 ++-- src/learning/k_means.rs | 77 +++--- src/learning/knn/binary_tree.rs | 320 +++++++++++++++---------- src/learning/knn/brute_force.rs | 22 +- src/learning/knn/mod.rs | 65 +++-- src/learning/lin_reg.rs | 24 +- src/learning/logistic_reg.rs | 33 +-- src/learning/naive_bayes.rs | 114 +++++---- src/learning/nnet/mod.rs | 179 ++++++++------ src/learning/nnet/net_layer.rs | 114 ++++++--- src/learning/optim/fmincg.rs | 18 +- src/learning/optim/grad_desc.rs | 136 ++++++----- src/learning/pca.rs | 70 +++--- src/learning/svm.rs | 20 +- src/learning/toolkit/activ_fn.rs | 8 +- src/learning/toolkit/cost_fn.rs | 2 +- src/learning/toolkit/kernel.rs | 37 ++- src/learning/toolkit/rand_utils.rs | 8 +- src/learning/toolkit/regularization.rs | 23 +- src/lib.rs | 42 ++-- src/prelude.rs | 6 +- src/stats/dist/exponential.rs | 12 +- src/stats/dist/gaussian.rs | 28 +-- src/stats/dist/mod.rs | 2 +- tests/learning/dbscan.rs | 25 +- tests/learning/gp.rs | 18 +- tests/learning/k_means.rs | 31 +-- tests/learning/knn.rs | 76 +++--- tests/learning/lin_reg.rs | 56 ++++- tests/learning/optim/grad_desc.rs | 102 +++++--- tests/learning/pca.rs | 164 ++++++++----- tests/lib.rs | 8 +- 56 files changed, 1641 insertions(+), 1186 deletions(-) diff --git a/benches/examples/cross_validation.rs b/benches/examples/cross_validation.rs index e0450fb3..9a9aba31 100644 --- a/benches/examples/cross_validation.rs +++ b/benches/examples/cross_validation.rs @@ -1,9 +1,9 @@ -use rusty_machine::linalg::{Matrix, BaseMatrix}; -use rusty_machine::learning::{LearningResult, SupModel}; -use rusty_machine::analysis::score::row_accuracy; -use rusty_machine::analysis::cross_validation::k_fold_validate; use rand::{thread_rng, Rng}; -use test::{Bencher, black_box}; +use rusty_machine::analysis::cross_validation::k_fold_validate; +use rusty_machine::analysis::score::row_accuracy; +use rusty_machine::learning::{LearningResult, SupModel}; +use rusty_machine::linalg::{BaseMatrix, Matrix}; +use test::{black_box, Bencher}; fn generate_data(rows: usize, cols: usize) -> Matrix { let mut rng = thread_rng(); @@ -22,14 +22,14 @@ fn generate_data(rows: usize, cols: usize) -> Matrix { /// matrices when trained. Its prediction for each row is the /// sum of the row's elements plus the precalculated training sum. struct DummyModel { - sum: f64 + sum: f64, } impl SupModel, Matrix> for DummyModel { fn predict(&self, inputs: &Matrix) -> LearningResult> { let predictions: Vec = inputs .row_iter() - .map(|row| { self.sum + sum(row.iter()) }) + .map(|row| self.sum + sum(row.iter())) .collect(); Ok(Matrix::new(inputs.rows(), 1, predictions)) } @@ -40,12 +40,12 @@ impl SupModel, Matrix> for DummyModel { } } -fn sum<'a, I: Iterator>(x: I) -> f64 { +fn sum<'a, I: Iterator>(x: I) -> f64 { x.fold(0f64, |acc, x| acc + x) } macro_rules! bench { - ($name:ident: $params:expr) => { + ($name:ident : $params:expr) => { #[bench] fn $name(b: &mut Bencher) { let (rows, cols, k) = $params; @@ -54,12 +54,16 @@ macro_rules! bench { b.iter(|| { let mut model = DummyModel { sum: 0f64 }; - let _ = black_box( - k_fold_validate(&mut model, &inputs, &targets, k, row_accuracy) - ); + let _ = black_box(k_fold_validate( + &mut model, + &inputs, + &targets, + k, + row_accuracy, + )); }); } - } + }; } bench!(bench_10_10_3: (10, 10, 3)); diff --git a/benches/examples/k_means.rs b/benches/examples/k_means.rs index 3791962f..2b768b76 100644 --- a/benches/examples/k_means.rs +++ b/benches/examples/k_means.rs @@ -1,19 +1,19 @@ -use rusty_machine::linalg::{Matrix, BaseMatrix}; use rusty_machine::learning::k_means::KMeansClassifier; use rusty_machine::learning::UnSupModel; +use rusty_machine::linalg::{BaseMatrix, Matrix}; -use rand::thread_rng; -use rand::distributions::IndependentSample; use rand::distributions::normal::Normal; +use rand::distributions::IndependentSample; +use rand::thread_rng; -use test::{Bencher, black_box}; +use test::{black_box, Bencher}; fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64) -> Matrix { assert!(centroids.cols() > 0, "Centroids cannot be empty."); assert!(centroids.rows() > 0, "Centroids cannot be empty."); assert!(noise >= 0f64, "Noise must be non-negative."); - let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid * - centroids.cols()); + let mut raw_cluster_data = + Vec::with_capacity(centroids.rows() * points_per_centroid * centroids.cols()); let mut rng = thread_rng(); let normal_rv = Normal::new(0f64, noise); @@ -32,14 +32,15 @@ fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64 } } - Matrix::new(centroids.rows() * points_per_centroid, - centroids.cols(), - raw_cluster_data) + Matrix::new( + centroids.rows() * points_per_centroid, + centroids.cols(), + raw_cluster_data, + ) } #[bench] fn k_means_train(b: &mut Bencher) { - const SAMPLES_PER_CENTROID: usize = 2000; // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5). let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]); @@ -55,7 +56,6 @@ fn k_means_train(b: &mut Bencher) { #[bench] fn k_means_predict(b: &mut Bencher) { - const SAMPLES_PER_CENTROID: usize = 2000; // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5). let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]); diff --git a/benches/examples/nnet.rs b/benches/examples/nnet.rs index d67e434d..331b0f1e 100644 --- a/benches/examples/nnet.rs +++ b/benches/examples/nnet.rs @@ -1,15 +1,15 @@ -use test::{Bencher, black_box}; +use test::{black_box, Bencher}; use rand::{random, Closed01}; use std::vec::Vec; -use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; -use rusty_machine::learning::toolkit::regularization::Regularization; -use rusty_machine::learning::toolkit::activ_fn::Sigmoid; +use rusty_machine::learning::nnet::{BCECriterion, NeuralNet}; use rusty_machine::learning::optim::grad_desc::StochasticGD; +use rusty_machine::learning::toolkit::activ_fn::Sigmoid; +use rusty_machine::learning::toolkit::regularization::Regularization; -use rusty_machine::linalg::Matrix; use rusty_machine::learning::SupModel; +use rusty_machine::linalg::Matrix; fn generate_data() -> (Matrix, Matrix, Matrix) { const THRESHOLD: f64 = 0.7; @@ -34,12 +34,7 @@ fn generate_data() -> (Matrix, Matrix, Matrix) { let inputs = Matrix::new(SAMPLES, 2, input_data); let targets = Matrix::new(SAMPLES, 1, label_data); - let test_cases = vec![ - 0.0, 0.0, - 0.0, 1.0, - 1.0, 1.0, - 1.0, 0.0, - ]; + let test_cases = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]; let test_inputs = Matrix::new(test_cases.len() / 2, 2, test_cases); (inputs, targets, test_inputs) @@ -52,7 +47,12 @@ fn nnet_and_gate_train(b: &mut Bencher) { let criterion = BCECriterion::new(Regularization::L2(0.)); b.iter(|| { - let mut model = black_box(NeuralNet::mlp(layers, criterion, StochasticGD::default(), Sigmoid)); + let mut model = black_box(NeuralNet::mlp( + layers, + criterion, + StochasticGD::default(), + Sigmoid, + )); let _ = black_box(model.train(&inputs, &targets).unwrap()); }) } diff --git a/benches/examples/svm.rs b/benches/examples/svm.rs index 3fa68f5f..88d3dc74 100644 --- a/benches/examples/svm.rs +++ b/benches/examples/svm.rs @@ -1,23 +1,21 @@ use rusty_machine::learning::svm::SVM; // Necessary for the training trait. -use rusty_machine::learning::SupModel; use rusty_machine::learning::toolkit::kernel::HyperTan; +use rusty_machine::learning::SupModel; use rusty_machine::linalg::Matrix; use rusty_machine::linalg::Vector; -use test::{Bencher, black_box}; +use test::{black_box, Bencher}; fn generate_data() -> (Matrix, Vector) { // Training data - let inputs = Matrix::new(11, 1, vec![ - -0.1, -2., -9., -101., -666.7, - 0., 0.1, 1., 11., 99., 456.7 - ]); - let targets = Vector::new(vec![ - -1., -1., -1., -1., -1., - 1., 1., 1., 1., 1., 1. - ]); + let inputs = Matrix::new( + 11, + 1, + vec![-0.1, -2., -9., -101., -666.7, 0., 0.1, 1., 11., 99., 456.7], + ); + let targets = Vector::new(vec![-1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1.]); (inputs, targets) } @@ -43,7 +41,10 @@ fn svm_sign_learner_train(b: &mut Bencher) { fn svm_sign_learner_predict(b: &mut Bencher) { let (inputs, targets) = generate_data(); - let test_data = (-1000..1000).filter(|&x| x % 100 == 0).map(|x| x as f64).collect::>(); + let test_data = (-1000..1000) + .filter(|&x| x % 100 == 0) + .map(|x| x as f64) + .collect::>(); let test_inputs = Matrix::new(test_data.len(), 1, test_data); let mut svm_mod = SVM::new(HyperTan::new(100., 0.), 0.3); let _ = svm_mod.train(&inputs, &targets); diff --git a/benches/lib.rs b/benches/lib.rs index e617bcbe..ac5353d7 100644 --- a/benches/lib.rs +++ b/benches/lib.rs @@ -1,8 +1,8 @@ #![feature(test)] +extern crate rand; extern crate rusty_machine; extern crate test; -extern crate rand; mod examples { mod cross_validation; diff --git a/examples/k-means_generating_cluster.rs b/examples/k-means_generating_cluster.rs index 078851df..456f913b 100644 --- a/examples/k-means_generating_cluster.rs +++ b/examples/k-means_generating_cluster.rs @@ -1,23 +1,20 @@ -extern crate rusty_machine; extern crate rand; +extern crate rusty_machine; -use rusty_machine::linalg::{Matrix, BaseMatrix}; use rusty_machine::learning::k_means::KMeansClassifier; use rusty_machine::learning::UnSupModel; +use rusty_machine::linalg::{BaseMatrix, Matrix}; -use rand::thread_rng; -use rand::distributions::IndependentSample; use rand::distributions::normal::Normal; +use rand::distributions::IndependentSample; +use rand::thread_rng; -fn generate_data(centroids: &Matrix, - points_per_centroid: usize, - noise: f64) - -> Matrix { +fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64) -> Matrix { assert!(centroids.cols() > 0, "Centroids cannot be empty."); assert!(centroids.rows() > 0, "Centroids cannot be empty."); assert!(noise >= 0f64, "Noise must be non-negative."); - let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid * - centroids.cols()); + let mut raw_cluster_data = + Vec::with_capacity(centroids.rows() * points_per_centroid * centroids.cols()); let mut rng = thread_rng(); let normal_rv = Normal::new(0f64, noise); @@ -36,9 +33,11 @@ fn generate_data(centroids: &Matrix, } } - Matrix::new(centroids.rows() * points_per_centroid, - centroids.cols(), - raw_cluster_data) + Matrix::new( + centroids.rows() * points_per_centroid, + centroids.cols(), + raw_cluster_data, + ) } fn main() { @@ -46,8 +45,10 @@ fn main() { const SAMPLES_PER_CENTROID: usize = 2000; - println!("Generating {0} samples from each centroids:", - SAMPLES_PER_CENTROID); + println!( + "Generating {0} samples from each centroids:", + SAMPLES_PER_CENTROID + ); // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5). let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]); println!("{}", centroids); diff --git a/examples/naive_bayes_dogs.rs b/examples/naive_bayes_dogs.rs index 8a4c45cc..33ad3ac5 100644 --- a/examples/naive_bayes_dogs.rs +++ b/examples/naive_bayes_dogs.rs @@ -1,13 +1,12 @@ -extern crate rusty_machine; extern crate rand; +extern crate rusty_machine; -use rand::Rand; -use rand::distributions::Sample; use rand::distributions::normal::Normal; +use rand::distributions::Sample; +use rand::Rand; use rusty_machine::learning::naive_bayes::{self, NaiveBayes}; -use rusty_machine::linalg::{Matrix, BaseMatrix}; use rusty_machine::learning::SupModel; - +use rusty_machine::linalg::{BaseMatrix, Matrix}; #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum Color { @@ -49,45 +48,46 @@ impl Rand for Dog { furriness: red_dog_furriness.sample(rng), speed: red_dog_speed.sample(rng), } - }, - Color::White => { - Dog { - color: Color::White, - friendliness: white_dog_friendliness.sample(rng), - furriness: white_dog_furriness.sample(rng), - speed: white_dog_speed.sample(rng), - } + } + Color::White => Dog { + color: Color::White, + friendliness: white_dog_friendliness.sample(rng), + furriness: white_dog_furriness.sample(rng), + speed: white_dog_speed.sample(rng), }, } } } -fn generate_dog_data(training_set_size: u32, test_set_size: u32) - -> (Matrix, Matrix, Matrix, Vec) { - let mut randomness = rand::StdRng::new() - .expect("we should be able to get an RNG"); +fn generate_dog_data( + training_set_size: u32, + test_set_size: u32, +) -> (Matrix, Matrix, Matrix, Vec) { + let mut randomness = rand::StdRng::new().expect("we should be able to get an RNG"); let rng = &mut randomness; // We'll train the model on these dogs let training_dogs = (0..training_set_size) - .map(|_| { Dog::rand(rng) }) + .map(|_| Dog::rand(rng)) .collect::>(); // ... and then use the model to make predictions about these dogs' color // given only their trait measurements. let test_dogs = (0..test_set_size) - .map(|_| { Dog::rand(rng) }) + .map(|_| Dog::rand(rng)) .collect::>(); // The model's `.train` method will take two matrices, each with a row for // each dog in the training set: the rows in the first matrix contain the // trait measurements; the rows in the second are either [1, 0] or [0, 1] // to indicate color. - let training_data: Vec = training_dogs.iter() + let training_data: Vec = training_dogs + .iter() .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed]) .collect(); let training_matrix: Matrix = training_data.chunks(3).collect(); - let target_data: Vec = training_dogs.iter() + let target_data: Vec = training_dogs + .iter() .flat_map(|dog| match dog.color { Color::Red => vec![1., 0.], Color::White => vec![0., 1.], @@ -96,7 +96,8 @@ fn generate_dog_data(training_set_size: u32, test_set_size: u32) let target_matrix: Matrix = target_data.chunks(2).collect(); // Build another matrix for the test set of dogs to make predictions about. - let test_data: Vec = test_dogs.iter() + let test_data: Vec = test_dogs + .iter() .flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed]) .collect(); let test_matrix: Matrix = test_data.chunks(3).collect(); @@ -121,34 +122,51 @@ fn evaluate_prediction(hits: &mut u32, dog: &Dog, prediction: &[f64]) -> (Color, fn main() { let (training_set_size, test_set_size) = (1000, 1000); // Generate all of our train and test data - let (training_matrix, target_matrix, test_matrix, test_dogs) = generate_dog_data(training_set_size, test_set_size); + let (training_matrix, target_matrix, test_matrix, test_dogs) = + generate_dog_data(training_set_size, test_set_size); // Train! let mut model = NaiveBayes::::new(); - model.train(&training_matrix, &target_matrix) + model + .train(&training_matrix, &target_matrix) .expect("failed to train model of dogs"); // Predict! - let predictions = model.predict(&test_matrix) + let predictions = model + .predict(&test_matrix) .expect("failed to predict dogs!?"); // Score how well we did. let mut hits = 0; let unprinted_total = test_set_size.saturating_sub(10) as usize; - for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) { + for (dog, prediction) in test_dogs + .iter() + .zip(predictions.row_iter()) + .take(unprinted_total) + { evaluate_prediction(&mut hits, dog, prediction.raw_slice()); } - + if unprinted_total > 0 { println!("..."); } - - for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) { + + for (dog, prediction) in test_dogs + .iter() + .zip(predictions.row_iter()) + .skip(unprinted_total) + { let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice()); - println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}", - dog.color, actual_color, accurate); + println!( + "Predicted: {:?}; Actual: {:?}; Accurate? {:?}", + dog.color, actual_color, accurate + ); } - println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size, - (f64::from(hits))/(f64::from(test_set_size)) * 100.); + println!( + "Accuracy: {}/{} = {:.1}%", + hits, + test_set_size, + (f64::from(hits)) / (f64::from(test_set_size)) * 100. + ); } diff --git a/examples/nnet-and_gate.rs b/examples/nnet-and_gate.rs index 9fd7ac5e..7eb1a5e2 100644 --- a/examples/nnet-and_gate.rs +++ b/examples/nnet-and_gate.rs @@ -1,16 +1,16 @@ -extern crate rusty_machine; extern crate rand; +extern crate rusty_machine; use rand::{random, Closed01}; use std::vec::Vec; -use rusty_machine::learning::nnet::{NeuralNet, BCECriterion}; -use rusty_machine::learning::toolkit::regularization::Regularization; -use rusty_machine::learning::toolkit::activ_fn::Sigmoid; +use rusty_machine::learning::nnet::{BCECriterion, NeuralNet}; use rusty_machine::learning::optim::grad_desc::StochasticGD; +use rusty_machine::learning::toolkit::activ_fn::Sigmoid; +use rusty_machine::learning::toolkit::regularization::Regularization; -use rusty_machine::linalg::Matrix; use rusty_machine::learning::SupModel; +use rusty_machine::linalg::Matrix; // AND gate fn main() { @@ -50,18 +50,8 @@ fn main() { // Our train function returns a Result<(), E> model.train(&inputs, &targets).unwrap(); - let test_cases = vec![ - 0.0, 0.0, - 0.0, 1.0, - 1.0, 1.0, - 1.0, 0.0, - ]; - let expected = vec![ - 0.0, - 0.0, - 1.0, - 0.0, - ]; + let test_cases = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0]; + let expected = vec![0.0, 0.0, 1.0, 0.0]; let test_inputs = Matrix::new(test_cases.len() / 2, 2, test_cases); let res = model.predict(&test_inputs).unwrap(); diff --git a/examples/svm-sign_learner.rs b/examples/svm-sign_learner.rs index b1a16907..6403e354 100644 --- a/examples/svm-sign_learner.rs +++ b/examples/svm-sign_learner.rs @@ -2,8 +2,8 @@ extern crate rusty_machine; use rusty_machine::learning::svm::SVM; // Necessary for the training trait. -use rusty_machine::learning::SupModel; use rusty_machine::learning::toolkit::kernel::HyperTan; +use rusty_machine::learning::SupModel; use rusty_machine::linalg::Matrix; use rusty_machine::linalg::Vector; @@ -19,14 +19,12 @@ fn main() { println!("Training..."); // Training data - let inputs = Matrix::new(11, 1, vec![ - -0.1, -2., -9., -101., -666.7, - 0., 0.1, 1., 11., 99., 456.7 - ]); - let targets = Vector::new(vec![ - -1., -1., -1., -1., -1., - 1., 1., 1., 1., 1., 1. - ]); + let inputs = Matrix::new( + 11, + 1, + vec![-0.1, -2., -9., -101., -666.7, 0., 0.1, 1., 11., 99., 456.7], + ); + let targets = Vector::new(vec![-1., -1., -1., -1., -1., 1., 1., 1., 1., 1., 1.]); // Trainee let mut svm_mod = SVM::new(HyperTan::new(100., 0.), 0.3); diff --git a/src/analysis/confusion_matrix.rs b/src/analysis/confusion_matrix.rs index 7dc4f343..78219647 100644 --- a/src/analysis/confusion_matrix.rs +++ b/src/analysis/confusion_matrix.rs @@ -1,8 +1,8 @@ //! Module to compute the confusion matrix of a set of predictions. -use std::hash::Hash; -use std::collections::HashMap; use linalg::Matrix; +use std::collections::HashMap; +use std::hash::Hash; /// Returns a square matrix C where C_ij is the count of the samples which were /// predicted to lie in the class with jth label but actually lie in the class with @@ -41,29 +41,39 @@ use linalg::Matrix; /// /// - If user-provided labels are not distinct. /// - If predictions and targets have different lengths. -pub fn confusion_matrix(predictions: &[T], - targets: &[T], - labels: Option>) -> Matrix - where T: Ord + Eq + Hash + Copy +pub fn confusion_matrix( + predictions: &[T], + targets: &[T], + labels: Option>, +) -> Matrix +where + T: Ord + Eq + Hash + Copy, { - assert!(predictions.len() == targets.len(), - "predictions and targets have different lengths"); + assert!( + predictions.len() == targets.len(), + "predictions and targets have different lengths" + ); let labels = match labels { Some(ls) => ls, - None => ordered_distinct(predictions, targets) + None => ordered_distinct(predictions, targets), }; let mut label_to_index: HashMap = HashMap::new(); for (i, l) in labels.iter().enumerate() { match label_to_index.insert(*l, i) { - None => {}, - Some(_) => { panic!("labels must be distinct"); } + None => {} + Some(_) => { + panic!("labels must be distinct"); + } } } - let mut counts = Matrix::new(labels.len(), labels.len(), - vec![0usize; labels.len() * labels.len()]); + let mut counts = Matrix::new( + labels.len(), + labels.len(), + vec![0usize; labels.len() * labels.len()], + ); for (truth, pred) in targets.iter().zip(predictions) { if label_to_index.contains_key(truth) && label_to_index.contains_key(pred) { @@ -90,7 +100,7 @@ mod tests { #[test] fn confusion_matrix_no_labels() { - let truth = vec![2, 0, 2, 2, 0, 1]; + let truth = vec![2, 0, 2, 2, 0, 1]; let predictions = vec![0, 0, 2, 2, 0, 2]; let confusion = confusion_matrix(&predictions, &truth, None); @@ -104,7 +114,7 @@ mod tests { #[test] fn confusion_matrix_with_labels_a_permutation_of_classes() { - let truth = vec![2, 0, 2, 2, 0, 1]; + let truth = vec![2, 0, 2, 2, 0, 1]; let predictions = vec![0, 0, 2, 2, 0, 2]; let labels = vec![2, 1, 0]; @@ -119,8 +129,8 @@ mod tests { #[test] fn confusion_matrix_accepts_labels_intersecting_targets_and_disjoint_from_predictions() { - let truth = vec![2, 0, 2, 2, 3, 1]; - let predictions = vec![0, 0, 2, 2, 0, 2]; + let truth = vec![2, 0, 2, 2, 3, 1]; + let predictions = vec![0, 0, 2, 2, 0, 2]; let labels = vec![1, 3]; let confusion = confusion_matrix(&predictions, &truth, Some(labels)); @@ -133,7 +143,7 @@ mod tests { #[test] fn confusion_matrix_accepts_labels_intersecting_predictions_and_disjoint_from_targets() { - let truth = vec![0, 0, 2, 2, 0, 2]; + let truth = vec![0, 0, 2, 2, 0, 2]; let predictions = vec![2, 0, 2, 2, 3, 1]; let labels = vec![1, 3]; @@ -147,7 +157,7 @@ mod tests { #[test] fn confusion_matrix_accepts_labels_disjoint_from_predictions_and_targets() { - let truth = vec![0, 0, 2, 2, 0, 2]; + let truth = vec![0, 0, 2, 2, 0, 2]; let predictions = vec![2, 0, 2, 2, 3, 1]; let labels = vec![4, 5]; @@ -162,7 +172,7 @@ mod tests { #[test] #[should_panic] fn confusion_matrix_rejects_duplicate_labels() { - let truth = vec![0, 0, 2, 2, 0, 2]; + let truth = vec![0, 0, 2, 2, 0, 2]; let predictions = vec![2, 0, 2, 2, 3, 1]; let labels = vec![1, 1]; @@ -172,7 +182,7 @@ mod tests { #[test] #[should_panic] fn confusion_matrix_rejects_mismatched_prediction_and_target_lengths() { - let truth = vec![0, 0, 2, 2, 0, 2]; + let truth = vec![0, 0, 2, 2, 0, 2]; let predictions = vec![2, 0, 2, 2]; let _ = confusion_matrix(&predictions, &truth, None); } diff --git a/src/analysis/cross_validation.rs b/src/analysis/cross_validation.rs index 2baf581f..29fae05f 100644 --- a/src/analysis/cross_validation.rs +++ b/src/analysis/cross_validation.rs @@ -1,11 +1,11 @@ //! Module for performing cross-validation of models. +use learning::toolkit::rand_utils::in_place_fisher_yates; +use learning::{LearningResult, SupModel}; +use linalg::{BaseMatrix, Matrix}; use std::cmp; use std::iter::Chain; use std::slice::Iter; -use linalg::{BaseMatrix, Matrix}; -use learning::{LearningResult, SupModel}; -use learning::toolkit::rand_utils::in_place_fisher_yates; /// Randomly splits the inputs into k 'folds'. For each fold a model /// is trained using all inputs except for that fold, and tested on the @@ -45,13 +45,16 @@ use learning::toolkit::rand_utils::in_place_fisher_yates; /// row_accuracy /// ).unwrap(); /// ``` -pub fn k_fold_validate(model: &mut M, - inputs: &Matrix, - targets: &Matrix, - k: usize, - score: S) -> LearningResult> - where S: Fn(&Matrix, &Matrix) -> f64, - M: SupModel, Matrix>, +pub fn k_fold_validate( + model: &mut M, + inputs: &Matrix, + targets: &Matrix, + k: usize, + score: S, +) -> LearningResult> +where + S: Fn(&Matrix, &Matrix) -> f64, + M: SupModel, Matrix>, { assert_eq!(inputs.rows(), targets.rows()); let num_samples = inputs.rows(); @@ -89,7 +92,7 @@ fn create_shuffled_indices(num_samples: usize) -> ShuffledIndices { /// a training set and a test set. struct Partition<'a> { train_indices_iter: TrainingIndices<'a>, - test_indices_iter: TestIndices<'a> + test_indices_iter: TestIndices<'a>, } #[derive(Clone)] @@ -98,7 +101,7 @@ struct TestIndices<'a>(Iter<'a, usize>); #[derive(Clone)] struct TrainingIndices<'a> { chain: Chain, Iter<'a, usize>>, - size: usize + size: usize, } impl<'a> TestIndices<'a> { @@ -115,7 +118,7 @@ impl<'a> Iterator for TestIndices<'a> { } } -impl <'a> ExactSizeIterator for TestIndices<'a> { +impl<'a> ExactSizeIterator for TestIndices<'a> { fn len(&self) -> usize { self.0.len() } @@ -126,7 +129,7 @@ impl<'a> TrainingIndices<'a> { let chain = left.iter().chain(right.iter()); TrainingIndices { chain: chain, - size: left.len() + right.len() + size: left.len() + right.len(), } } } @@ -139,7 +142,7 @@ impl<'a> Iterator for TrainingIndices<'a> { } } -impl <'a> ExactSizeIterator for TrainingIndices<'a> { +impl<'a> ExactSizeIterator for TrainingIndices<'a> { fn len(&self) -> usize { self.size } @@ -148,8 +151,8 @@ impl <'a> ExactSizeIterator for TrainingIndices<'a> { /// An iterator over the sets of indices required for k-fold cross validation. struct Folds<'a> { num_folds: usize, - indices: &'a[usize], - count: usize + indices: &'a [usize], + count: usize, } impl<'a> Folds<'a> { @@ -159,13 +162,15 @@ impl<'a> Folds<'a> { /// folds are the same size.) fn new(indices: &'a ShuffledIndices, num_folds: usize) -> Folds<'a> { let num_samples = indices.0.len(); - assert!(num_folds > 1 && num_samples >= num_folds, - "Require num_folds > 1 && num_samples >= num_folds"); + assert!( + num_folds > 1 && num_samples >= num_folds, + "Require num_folds > 1 && num_samples >= num_folds" + ); Folds { num_folds: num_folds, indices: &indices.0, - count: 0 + count: 0, } } } @@ -182,7 +187,7 @@ impl<'a> Iterator for Folds<'a> { let q = num_samples / self.num_folds; let r = num_samples % self.num_folds; let fold_start = self.count * q + cmp::min(self.count, r); - let fold_size = if self.count >= r {q} else {q + 1}; + let fold_size = if self.count >= r { q } else { q + 1 }; let fold_end = fold_start + fold_size; self.count += 1; @@ -192,14 +197,14 @@ impl<'a> Iterator for Folds<'a> { let infix = &self.indices[fold_start..fold_end]; Some(Partition { train_indices_iter: TrainingIndices::new(prefix, suffix), - test_indices_iter: TestIndices::new(infix) + test_indices_iter: TestIndices::new(infix), }) } } #[cfg(test)] mod tests { - use super::{ShuffledIndices, Folds}; + use super::{Folds, ShuffledIndices}; // k % n == 0 #[test] @@ -207,11 +212,14 @@ mod tests { let idxs = ShuffledIndices(vec![0, 1, 2, 3, 4, 5]); let folds = collect_folds(Folds::new(&idxs, 3)); - assert_eq!(folds, vec![ - (vec![2, 3, 4, 5], vec![0, 1]), - (vec![0, 1, 4, 5], vec![2, 3]), - (vec![0, 1, 2, 3], vec![4, 5]) - ]); + assert_eq!( + folds, + vec![ + (vec![2, 3, 4, 5], vec![0, 1]), + (vec![0, 1, 4, 5], vec![2, 3]), + (vec![0, 1, 2, 3], vec![4, 5]), + ] + ); } // k % n == 1 @@ -220,10 +228,10 @@ mod tests { let idxs = ShuffledIndices(vec![0, 1, 2, 3, 4]); let folds = collect_folds(Folds::new(&idxs, 2)); - assert_eq!(folds, vec![ - (vec![3, 4], vec![0, 1, 2]), - (vec![0, 1, 2], vec![3, 4]) - ]); + assert_eq!( + folds, + vec![(vec![3, 4], vec![0, 1, 2]), (vec![0, 1, 2], vec![3, 4])] + ); } // k % n == 2 @@ -232,12 +240,15 @@ mod tests { let idxs = ShuffledIndices(vec![0, 1, 2, 3, 4, 5]); let folds = collect_folds(Folds::new(&idxs, 4)); - assert_eq!(folds, vec![ - (vec![2, 3, 4, 5], vec![0, 1]), - (vec![0, 1, 4, 5], vec![2, 3]), - (vec![0, 1, 2, 3, 5], vec![4]), - (vec![0, 1, 2, 3, 4], vec![5]) - ]); + assert_eq!( + folds, + vec![ + (vec![2, 3, 4, 5], vec![0, 1]), + (vec![0, 1, 4, 5], vec![2, 3]), + (vec![0, 1, 2, 3, 5], vec![4]), + (vec![0, 1, 2, 3, 4], vec![5]), + ] + ); } // k == n @@ -246,12 +257,15 @@ mod tests { let idxs = ShuffledIndices(vec![0, 1, 2, 3]); let folds = collect_folds(Folds::new(&idxs, 4)); - assert_eq!(folds, vec![ - (vec![1, 2, 3], vec![0]), - (vec![0, 2, 3], vec![1]), - (vec![0, 1, 3], vec![2]), - (vec![0, 1, 2], vec![3]) - ]); + assert_eq!( + folds, + vec![ + (vec![1, 2, 3], vec![0]), + (vec![0, 2, 3], vec![1]), + (vec![0, 1, 3], vec![2]), + (vec![0, 1, 2], vec![3]), + ] + ); } #[test] @@ -268,18 +282,24 @@ mod tests { let idxs = ShuffledIndices(vec![5, 4, 3, 2, 1, 0]); let folds = collect_folds(Folds::new(&idxs, 3)); - assert_eq!(folds, vec![ - (vec![3, 2, 1, 0], vec![5, 4]), - (vec![5, 4, 1, 0], vec![3, 2]), - (vec![5, 4, 3, 2], vec![1, 0]) - ]); + assert_eq!( + folds, + vec![ + (vec![3, 2, 1, 0], vec![5, 4]), + (vec![5, 4, 1, 0], vec![3, 2]), + (vec![5, 4, 3, 2], vec![1, 0]), + ] + ); } fn collect_folds<'a>(folds: Folds<'a>) -> Vec<(Vec, Vec)> { folds - .map(|p| - (p.train_indices_iter.map(|x| *x).collect::>(), - p.test_indices_iter.map(|x| *x).collect::>())) + .map(|p| { + ( + p.train_indices_iter.map(|x| *x).collect::>(), + p.test_indices_iter.map(|x| *x).collect::>(), + ) + }) .collect::, Vec)>>() } } diff --git a/src/analysis/score.rs b/src/analysis/score.rs index 48c4010e..beca3569 100644 --- a/src/analysis/score.rs +++ b/src/analysis/score.rs @@ -2,10 +2,10 @@ //! how close predictions and truth are. All functions in this //! module obey the convention that higher is better. -use libnum::{Zero, One}; +use libnum::{One, Zero}; -use linalg::{BaseMatrix, Matrix}; use learning::toolkit::cost_fn::{CostFunc, MeanSqError}; +use linalg::{BaseMatrix, Matrix}; // ************************************ // Classification Scores @@ -32,11 +32,15 @@ use learning::toolkit::cost_fn::{CostFunc, MeanSqError}; /// /// - outputs and targets have different length pub fn accuracy(outputs: I1, targets: I2) -> f64 - where T: PartialEq, - I1: ExactSizeIterator + Iterator, - I2: ExactSizeIterator + Iterator +where + T: PartialEq, + I1: ExactSizeIterator + Iterator, + I2: ExactSizeIterator + Iterator, { - assert!(outputs.len() == targets.len(), "outputs and targets must have the same length"); + assert!( + outputs.len() == targets.len(), + "outputs and targets must have the same length" + ); let len = outputs.len() as f64; let correct = outputs .zip(targets) @@ -47,8 +51,10 @@ pub fn accuracy(outputs: I1, targets: I2) -> f64 /// Returns the fraction of outputs rows which match their target. pub fn row_accuracy(outputs: &Matrix, targets: &Matrix) -> f64 { - accuracy(outputs.row_iter().map(|r| r.raw_slice()), - targets.row_iter().map(|r| r.raw_slice())) + accuracy( + outputs.row_iter().map(|r| r.raw_slice()), + targets.row_iter().map(|r| r.raw_slice()), + ) } /// Returns the precision score for 2 class classification. @@ -76,10 +82,14 @@ pub fn row_accuracy(outputs: &Matrix, targets: &Matrix) -> f64 { /// - outputs and targets have different length /// - outputs or targets contains a value which is not 0 or 1 pub fn precision<'a, I, T>(outputs: I, targets: I) -> f64 - where I: ExactSizeIterator, - T: 'a + PartialEq + Zero + One +where + I: ExactSizeIterator, + T: 'a + PartialEq + Zero + One, { - assert!(outputs.len() == targets.len(), "outputs and targets must have the same length"); + assert!( + outputs.len() == targets.len(), + "outputs and targets must have the same length" + ); let mut tpfp = 0.0f64; let mut tp = 0.0f64; @@ -91,8 +101,7 @@ pub fn precision<'a, I, T>(outputs: I, targets: I) -> f64 tp += 1.0f64; } } - if ((*t != &T::zero()) & (*t != &T::one())) | - ((*o != &T::zero()) & (*o != &T::one())) { + if ((*t != &T::zero()) & (*t != &T::one())) | ((*o != &T::zero()) & (*o != &T::one())) { panic!("precision must be used for 2 class classification") } } @@ -124,10 +133,14 @@ pub fn precision<'a, I, T>(outputs: I, targets: I) -> f64 /// - outputs and targets have different length /// - outputs or targets contains a value which is not 0 or 1 pub fn recall<'a, I, T>(outputs: I, targets: I) -> f64 - where I: ExactSizeIterator, - T: 'a + PartialEq + Zero + One +where + I: ExactSizeIterator, + T: 'a + PartialEq + Zero + One, { - assert!(outputs.len() == targets.len(), "outputs and targets must have the same length"); + assert!( + outputs.len() == targets.len(), + "outputs and targets must have the same length" + ); let mut tpfn = 0.0f64; let mut tp = 0.0f64; @@ -139,8 +152,7 @@ pub fn recall<'a, I, T>(outputs: I, targets: I) -> f64 tp += 1.0f64; } } - if ((*t != &T::zero()) & (*t != &T::one())) | - ((*o != &T::zero()) & (*o != &T::one())) { + if ((*t != &T::zero()) & (*t != &T::one())) | ((*o != &T::zero()) & (*o != &T::one())) { panic!("recall must be used for 2 class classification") } } @@ -172,10 +184,14 @@ pub fn recall<'a, I, T>(outputs: I, targets: I) -> f64 /// - outputs and targets have different length /// - outputs or targets contains a value which is not 0 or 1 pub fn f1<'a, I, T>(outputs: I, targets: I) -> f64 - where I: ExactSizeIterator, - T: 'a + PartialEq + Zero + One +where + I: ExactSizeIterator, + T: 'a + PartialEq + Zero + One, { - assert!(outputs.len() == targets.len(), "outputs and targets must have the same length"); + assert!( + outputs.len() == targets.len(), + "outputs and targets must have the same length" + ); let mut tpos = 0.0f64; let mut fpos = 0.0f64; @@ -189,8 +205,7 @@ pub fn f1<'a, I, T>(outputs: I, targets: I) -> f64 } else if *o == &T::one() { fneg += 1.0f64; } - if ((*t != &T::zero()) & (*t != &T::one())) | - ((*o != &T::zero()) & (*o != &T::one())) { + if ((*t != &T::zero()) & (*t != &T::one())) | ((*o != &T::zero()) & (*o != &T::one())) { panic!("f1-score must be used for 2 class classification") } } @@ -205,22 +220,21 @@ pub fn f1<'a, I, T>(outputs: I, targets: I) -> f64 /// Returns the additive inverse of the mean-squared-error of the /// outputs. So higher is better, and the returned value is always /// negative. -pub fn neg_mean_squared_error(outputs: &Matrix, targets: &Matrix) -> f64 -{ +pub fn neg_mean_squared_error(outputs: &Matrix, targets: &Matrix) -> f64 { // MeanSqError divides the actual mean squared error by two. -2f64 * MeanSqError::cost(outputs, targets) } #[cfg(test)] mod tests { + use super::{accuracy, f1, neg_mean_squared_error, precision, recall}; use linalg::Matrix; - use super::{accuracy, precision, recall, f1, neg_mean_squared_error}; #[test] fn test_accuracy() { let outputs = [1, 2, 3, 4, 5, 6]; let targets = [1, 2, 3, 3, 5, 1]; - assert_eq!(accuracy(outputs.iter(), targets.iter()), 2f64/3f64); + assert_eq!(accuracy(outputs.iter(), targets.iter()), 2f64 / 3f64); let outputs = [1, 1, 1, 0, 0, 0]; let targets = [1, 1, 1, 0, 0, 1]; @@ -316,7 +330,6 @@ mod tests { assert_eq!(f1(outputs.iter(), targets.iter()), 0.5); } - #[test] #[should_panic] fn test_f1_outputs_not_2class() { @@ -337,21 +350,13 @@ mod tests { fn test_neg_mean_squared_error_1d() { let outputs = Matrix::new(3, 1, vec![1f64, 2f64, 3f64]); let targets = Matrix::new(3, 1, vec![2f64, 4f64, 3f64]); - assert_eq!(neg_mean_squared_error(&outputs, &targets), -5f64/3f64); + assert_eq!(neg_mean_squared_error(&outputs, &targets), -5f64 / 3f64); } #[test] fn test_neg_mean_squared_error_2d() { - let outputs = Matrix::new(3, 2, vec![ - 1f64, 2f64, - 3f64, 4f64, - 5f64, 6f64 - ]); - let targets = Matrix::new(3, 2, vec![ - 1.5f64, 2.5f64, - 5f64, 6f64, - 5.5f64, 6.5f64 - ]); + let outputs = Matrix::new(3, 2, vec![1f64, 2f64, 3f64, 4f64, 5f64, 6f64]); + let targets = Matrix::new(3, 2, vec![1.5f64, 2.5f64, 5f64, 6f64, 5.5f64, 6.5f64]); assert_eq!(neg_mean_squared_error(&outputs, &targets), -3f64); } } diff --git a/src/data/transforms/minmax.rs b/src/data/transforms/minmax.rs index cb2a2d53..eaf8406a 100644 --- a/src/data/transforms/minmax.rs +++ b/src/data/transforms/minmax.rs @@ -25,10 +25,10 @@ //! let transformed = transformer.transform(inputs).unwrap(); //! ``` +use super::{Invertible, TransformFitter, Transformer}; use learning::error::{Error, ErrorKind}; use learning::LearningResult; -use linalg::{Matrix, BaseMatrix, BaseMatrixMut, Vector}; -use super::{Invertible, Transformer, TransformFitter}; +use linalg::{BaseMatrix, BaseMatrixMut, Matrix, Vector}; use rulinalg::utils; @@ -38,14 +38,14 @@ use libnum::Float; #[derive(Debug)] pub struct MinMaxFitter { scaled_min: T, - scaled_max: T + scaled_max: T, } impl Default for MinMaxFitter { fn default() -> Self { MinMaxFitter { scaled_min: T::zero(), - scaled_max: T::one() + scaled_max: T::one(), } } } @@ -74,7 +74,7 @@ impl MinMaxFitter { pub fn new(min: T, max: T) -> Self { MinMaxFitter { scaled_min: min, - scaled_max: max + scaled_max: max, } } } @@ -88,12 +88,18 @@ impl TransformFitter, MinMaxScaler> for MinMaxFitter { let mut input_min_max = vec![(T::max_value(), T::min_value()); features]; for row in inputs.row_iter() { - for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() { + for (idx, (feature, min_max)) in + row.into_iter().zip(input_min_max.iter_mut()).enumerate() + { if !feature.is_finite() { - return Err(Error::new(ErrorKind::InvalidData, - format!("Data point in column {} cannot be \ - processed", - idx))); + return Err(Error::new( + ErrorKind::InvalidData, + format!( + "Data point in column {} cannot be \ + processed", + idx + ), + )); } // Update min if *feature < min_max.0 { @@ -109,26 +115,32 @@ impl TransformFitter, MinMaxScaler> for MinMaxFitter { // We'll scale each feature by a * x + b. // Where scales holds `a` per column and consts // holds `b`. - let scales = try!(input_min_max.iter() - .map(|&(x, y)| { - let s = (self.scaled_max - self.scaled_min) / (y - x); - if s.is_finite() { - Ok(s) - } else { - Err(Error::new(ErrorKind::InvalidData, - "Constant feature columns not supported.")) - } - }) - .collect::, _>>()); - - let consts = input_min_max.iter() + let scales = try!( + input_min_max + .iter() + .map(|&(x, y)| { + let s = (self.scaled_max - self.scaled_min) / (y - x); + if s.is_finite() { + Ok(s) + } else { + Err(Error::new( + ErrorKind::InvalidData, + "Constant feature columns not supported.", + )) + } + }) + .collect::, _>>() + ); + + let consts = input_min_max + .iter() .zip(scales.iter()) .map(|(&(_, x), &s)| self.scaled_max - x * s) .collect::>(); - + Ok(MinMaxScaler { scale_factors: Vector::new(scales), - const_factors: Vector::new(consts) + const_factors: Vector::new(consts), }) } } @@ -148,21 +160,30 @@ pub struct MinMaxScaler { const_factors: Vector, } - impl Transformer> for MinMaxScaler { fn transform(&mut self, mut inputs: Matrix) -> Result, Error> { if self.scale_factors.size() != inputs.cols() { - Err(Error::new(ErrorKind::InvalidData, - "Input data has different number of columns than fitted data.")) + Err(Error::new( + ErrorKind::InvalidData, + "Input data has different number of columns than fitted data.", + )) } else { for mut row in inputs.row_iter_mut() { - utils::in_place_vec_bin_op(row.raw_slice_mut(), self.scale_factors.data(), |x, &y| { - *x = *x * y; - }); - - utils::in_place_vec_bin_op(row.raw_slice_mut(), self.const_factors.data(), |x, &y| { - *x = *x + y; - }); + utils::in_place_vec_bin_op( + row.raw_slice_mut(), + self.scale_factors.data(), + |x, &y| { + *x = *x * y; + }, + ); + + utils::in_place_vec_bin_op( + row.raw_slice_mut(), + self.const_factors.data(), + |x, &y| { + *x = *x + y; + }, + ); } Ok(inputs) } @@ -170,12 +191,13 @@ impl Transformer> for MinMaxScaler { } impl Invertible> for MinMaxScaler { - fn inv_transform(&self, mut inputs: Matrix) -> Result, Error> { let features = self.scale_factors.size(); if inputs.cols() != features { - return Err(Error::new(ErrorKind::InvalidData, - "Input data has different number of columns than fitted data.")); + return Err(Error::new( + ErrorKind::InvalidData, + "Input data has different number of columns than fitted data.", + )); } for mut row in inputs.row_iter_mut() { @@ -190,8 +212,8 @@ impl Invertible> for MinMaxScaler { #[cfg(test)] mod tests { + use super::super::{Invertible, TransformFitter, Transformer}; use super::*; - use super::super::{Transformer, TransformFitter, Invertible}; use linalg::Matrix; use std::f64; diff --git a/src/data/transforms/mod.rs b/src/data/transforms/mod.rs index 9e0212f4..7a0adf1a 100644 --- a/src/data/transforms/mod.rs +++ b/src/data/transforms/mod.rs @@ -27,8 +27,8 @@ pub mod minmax; pub mod normalize; -pub mod standardize; pub mod shuffle; +pub mod standardize; use learning::LearningResult; @@ -50,7 +50,7 @@ pub trait Transformer { } /// Trait for invertible data transformers -pub trait Invertible : Transformer { +pub trait Invertible: Transformer { /// Maps the inputs using the inverse of the fitted transform. fn inv_transform(&self, inputs: T) -> LearningResult; } diff --git a/src/data/transforms/normalize.rs b/src/data/transforms/normalize.rs index 9ca6cf7a..32f0aa92 100644 --- a/src/data/transforms/normalize.rs +++ b/src/data/transforms/normalize.rs @@ -27,8 +27,8 @@ //! ``` use learning::error::{Error, ErrorKind}; -use linalg::{Matrix, MatrixSlice, BaseMatrix, BaseMatrixMut}; -use rulinalg::norm::{MatrixNorm, Euclidean}; +use linalg::{BaseMatrix, BaseMatrixMut, Matrix, MatrixSlice}; +use rulinalg::norm::{Euclidean, MatrixNorm}; use super::Transformer; @@ -46,10 +46,11 @@ use std::marker::PhantomData; /// See the module description for more information. #[derive(Debug)] pub struct Normalizer - where for<'a> M: MatrixNorm> +where + for<'a> M: MatrixNorm>, { norm: M, - _marker: PhantomData + _marker: PhantomData, } /// Create a `Normalizer` with a Euclidean norm. @@ -63,7 +64,8 @@ impl Default for Normalizer { } impl Normalizer - where for<'a> M: MatrixNorm> +where + for<'a> M: MatrixNorm>, { /// Constructs a new `Normalizer` with given norm. /// @@ -79,21 +81,23 @@ impl Normalizer pub fn new(norm: M) -> Self { Normalizer { norm: norm, - _marker: PhantomData + _marker: PhantomData, } } } impl Transformer> for Normalizer - where for<'a> M: MatrixNorm> +where + for<'a> M: MatrixNorm>, { fn transform(&mut self, mut inputs: Matrix) -> Result, Error> { let dists: Vec = inputs.row_iter().map(|m| self.norm.norm(&*m)).collect(); for (mut row, &d) in inputs.row_iter_mut().zip(dists.iter()) { - if !d.is_finite() { - return Err(Error::new(ErrorKind::InvalidData, - "Some data point is non-finite.")); + return Err(Error::new( + ErrorKind::InvalidData, + "Some data point is non-finite.", + )); } else if d != T::zero() { // no change if distance is 0 *row /= d; @@ -103,11 +107,10 @@ impl Transformer> for Normalizer } } - #[cfg(test)] mod tests { - use super::*; use super::super::Transformer; + use super::*; use linalg::Matrix; use std::f64; diff --git a/src/data/transforms/shuffle.rs b/src/data/transforms/shuffle.rs index 9f29b7da..9460feec 100644 --- a/src/data/transforms/shuffle.rs +++ b/src/data/transforms/shuffle.rs @@ -23,11 +23,11 @@ //! println!("{}", shuffled_mat); //! ``` -use learning::LearningResult; -use linalg::{Matrix, BaseMatrix, BaseMatrixMut}; use super::Transformer; +use learning::LearningResult; +use linalg::{BaseMatrix, BaseMatrixMut, Matrix}; -use rand::{Rng, thread_rng, ThreadRng}; +use rand::{thread_rng, Rng, ThreadRng}; /// The `Shuffler` /// @@ -90,11 +90,11 @@ impl Transformer> for Shuffler { #[cfg(test)] mod tests { - use linalg::Matrix; use super::super::Transformer; use super::Shuffler; + use linalg::Matrix; - use rand::{StdRng, SeedableRng}; + use rand::{SeedableRng, StdRng}; #[test] fn seeded_shuffle() { @@ -104,8 +104,10 @@ mod tests { let mat = Matrix::new(4, 2, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); let shuffled = shuffler.transform(mat).unwrap(); - assert_eq!(shuffled.into_vec(), - vec![3.0, 4.0, 1.0, 2.0, 7.0, 8.0, 5.0, 6.0]); + assert_eq!( + shuffled.into_vec(), + vec![3.0, 4.0, 1.0, 2.0, 7.0, 8.0, 5.0, 6.0] + ); } #[test] @@ -115,7 +117,9 @@ mod tests { let mat = Matrix::new(1, 8, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); let shuffled = shuffler.transform(mat).unwrap(); - assert_eq!(shuffled.into_vec(), - vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); + assert_eq!( + shuffled.into_vec(), + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0] + ); } -} \ No newline at end of file +} diff --git a/src/data/transforms/standardize.rs b/src/data/transforms/standardize.rs index 5ed92ce0..05f338f4 100644 --- a/src/data/transforms/standardize.rs +++ b/src/data/transforms/standardize.rs @@ -24,10 +24,10 @@ //! let transformed = transformer.transform(inputs).unwrap(); //! ``` -use learning::LearningResult; +use super::{Invertible, TransformFitter, Transformer}; use learning::error::{Error, ErrorKind}; -use linalg::{Matrix, Vector, Axes, BaseMatrix, BaseMatrixMut}; -use super::{Invertible, Transformer, TransformFitter}; +use learning::LearningResult; +use linalg::{Axes, BaseMatrix, BaseMatrixMut, Matrix, Vector}; use rulinalg::utils; @@ -37,14 +37,14 @@ use libnum::{Float, FromPrimitive}; #[derive(Debug)] pub struct StandardizerFitter { scaled_mean: T, - scaled_stdev: T + scaled_stdev: T, } impl Default for StandardizerFitter { fn default() -> Self { StandardizerFitter { scaled_mean: T::zero(), - scaled_stdev: T::one() + scaled_stdev: T::one(), } } } @@ -73,31 +73,39 @@ impl StandardizerFitter { pub fn new(mean: T, stdev: T) -> StandardizerFitter { StandardizerFitter { scaled_mean: mean, - scaled_stdev: stdev + scaled_stdev: stdev, } } } -impl TransformFitter, Standardizer> for StandardizerFitter { +impl TransformFitter, Standardizer> + for StandardizerFitter +{ fn fit(self, inputs: &Matrix) -> LearningResult> { if inputs.rows() <= 1 { - Err(Error::new(ErrorKind::InvalidData, - "Cannot standardize data with only one row.")) + Err(Error::new( + ErrorKind::InvalidData, + "Cannot standardize data with only one row.", + )) } else { let mean = inputs.mean(Axes::Row); - let variance = try!(inputs.variance(Axes::Row).map_err(|_| { - Error::new(ErrorKind::InvalidData, "Cannot compute variance of data.") - })); + let variance = try!(inputs.variance(Axes::Row).map_err(|_| Error::new( + ErrorKind::InvalidData, + "Cannot compute variance of data." + ))); if mean.data().iter().any(|x| !x.is_finite()) { - return Err(Error::new(ErrorKind::InvalidData, "Some data point is non-finite.")); + return Err(Error::new( + ErrorKind::InvalidData, + "Some data point is non-finite.", + )); } Ok(Standardizer { means: mean, variances: variance, scaled_mean: self.scaled_mean, - scaled_stdev: self.scaled_stdev + scaled_stdev: self.scaled_stdev, }) } } @@ -125,12 +133,16 @@ pub struct Standardizer { impl Transformer> for Standardizer { fn transform(&mut self, mut inputs: Matrix) -> LearningResult> { if self.means.size() != inputs.cols() { - Err(Error::new(ErrorKind::InvalidData, - "Input data has different number of columns from fitted data.")) + Err(Error::new( + ErrorKind::InvalidData, + "Input data has different number of columns from fitted data.", + )) } else { for mut row in inputs.row_iter_mut() { // Subtract the mean - utils::in_place_vec_bin_op(row.raw_slice_mut(), self.means.data(), |x, &y| *x = *x - y); + utils::in_place_vec_bin_op(row.raw_slice_mut(), self.means.data(), |x, &y| { + *x = *x - y + }); utils::in_place_vec_bin_op(row.raw_slice_mut(), self.variances.data(), |x, &y| { *x = (*x * self.scaled_stdev / y.sqrt()) + self.scaled_mean }); @@ -144,8 +156,10 @@ impl Invertible> for Standardizer { fn inv_transform(&self, mut inputs: Matrix) -> LearningResult> { let features = self.means.size(); if inputs.cols() != features { - return Err(Error::new(ErrorKind::InvalidData, - "Inputs have different feature count than transformer.")); + return Err(Error::new( + ErrorKind::InvalidData, + "Inputs have different feature count than transformer.", + )); } for mut row in inputs.row_iter_mut() { @@ -163,8 +177,8 @@ impl Invertible> for Standardizer { #[cfg(test)] mod tests { + use super::super::{Invertible, TransformFitter, Transformer}; use super::*; - use super::super::{Transformer, TransformFitter, Invertible}; use linalg::{Axes, Matrix}; use std::f64; diff --git a/src/datasets/iris.rs b/src/datasets/iris.rs index 7fa4d91c..443f71db 100644 --- a/src/datasets/iris.rs +++ b/src/datasets/iris.rs @@ -179,13 +179,16 @@ pub fn load() -> Dataset, Vector> { 6.5, 3.0, 5.2, 2.0; 6.2, 3.4, 5.4, 2.3; 5.9, 3.0, 5.1, 1.8]; - let target: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]; + let target: Vec = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + ]; - Dataset{ data: data, - target: Vector::new(target) } -} \ No newline at end of file + Dataset { + data: data, + target: Vector::new(target), + } +} diff --git a/src/datasets/mod.rs b/src/datasets/mod.rs index a6e24036..20773138 100644 --- a/src/datasets/mod.rs +++ b/src/datasets/mod.rs @@ -7,14 +7,20 @@ pub mod trees; /// Dataset container #[derive(Clone, Debug)] -pub struct Dataset where D: Clone + Debug, T: Clone + Debug { - +pub struct Dataset +where + D: Clone + Debug, + T: Clone + Debug, +{ data: D, - target: T + target: T, } -impl Dataset where D: Clone + Debug, T: Clone + Debug { - +impl Dataset +where + D: Clone + Debug, + T: Clone + Debug, +{ /// Returns explanatory variable (features) pub fn data(&self) -> &D { &self.data diff --git a/src/datasets/trees.rs b/src/datasets/trees.rs index 8d65f9f2..b84eee26 100644 --- a/src/datasets/trees.rs +++ b/src/datasets/trees.rs @@ -55,10 +55,13 @@ pub fn load() -> Dataset, Vector> { 18.0, 80.; 18.0, 80.; 20.6, 87.]; - let target = vec![10.3, 10.3, 10.2, 16.4, 18.8, 19.7, 15.6, 18.2, 22.6, 19.9, - 24.2, 21.0, 21.4, 21.3, 19.1, 22.2, 33.8, 27.4, 25.7, 24.9, - 34.5, 31.7, 36.3, 38.3, 42.6, 55.4, 55.7, 58.3, 51.5, 51.0, - 77.0]; - Dataset{ data: data, - target: Vector::new(target) } -} \ No newline at end of file + let target = vec![ + 10.3, 10.3, 10.2, 16.4, 18.8, 19.7, 15.6, 18.2, 22.6, 19.9, 24.2, 21.0, 21.4, 21.3, 19.1, + 22.2, 33.8, 27.4, 25.7, 24.9, 34.5, 31.7, 36.3, 38.3, 42.6, 55.4, 55.7, 58.3, 51.5, 51.0, + 77.0, + ]; + Dataset { + data: data, + target: Vector::new(target), + } +} diff --git a/src/learning/dbscan.rs b/src/learning/dbscan.rs index e7bbdee7..143edabf 100644 --- a/src/learning/dbscan.rs +++ b/src/learning/dbscan.rs @@ -36,12 +36,12 @@ //! let clustering = model.clusters().unwrap(); //! ``` -use learning::{LearningResult, UnSupModel}; use learning::error::{Error, ErrorKind}; +use learning::{LearningResult, UnSupModel}; -use linalg::{Matrix, Vector, BaseMatrix}; -use rulinalg::utils; +use linalg::{BaseMatrix, Matrix, Vector}; use rulinalg::matrix::Row; +use rulinalg::utils; /// DBSCAN Model /// @@ -105,16 +105,20 @@ impl UnSupModel, Vector>> for DBSCAN { fn predict(&self, inputs: &Matrix) -> LearningResult>> { if self.predictive { - if let (&Some(ref cluster_data), &Some(ref clusters)) = (&self._cluster_data, - &self.clusters) { + if let (&Some(ref cluster_data), &Some(ref clusters)) = + (&self._cluster_data, &self.clusters) + { let mut classes = Vec::with_capacity(inputs.rows()); for input_point in inputs.row_iter() { let mut distances = Vec::with_capacity(cluster_data.rows()); for cluster_point in cluster_data.row_iter() { - let point_distance = - utils::vec_bin_op(input_point.raw_slice(), cluster_point.raw_slice(), |x, y| x - y); + let point_distance = utils::vec_bin_op( + input_point.raw_slice(), + cluster_point.raw_slice(), + |x, y| x - y, + ); distances.push(utils::dot(&point_distance, &point_distance).sqrt()); } @@ -131,8 +135,10 @@ impl UnSupModel, Vector>> for DBSCAN { Err(Error::new_untrained()) } } else { - Err(Error::new(ErrorKind::InvalidState, - "Model must be set to predictive. Use `self.set_predictive(true)`.")) + Err(Error::new( + ErrorKind::InvalidState, + "Model must be set to predictive. Use `self.set_predictive(true)`.", + )) } } } @@ -167,17 +173,25 @@ impl DBSCAN { self.clusters.as_ref() } - fn expand_cluster(&mut self, - inputs: &Matrix, - point_idx: usize, - neighbour_pts: Vec, - cluster: usize) { - debug_assert!(point_idx < inputs.rows(), - "Point index too large for inputs"); - debug_assert!(neighbour_pts.iter().all(|x| *x < inputs.rows()), - "Neighbour indices too large for inputs"); - - self.clusters.as_mut().map(|x| x.mut_data()[point_idx] = Some(cluster)); + fn expand_cluster( + &mut self, + inputs: &Matrix, + point_idx: usize, + neighbour_pts: Vec, + cluster: usize, + ) { + debug_assert!( + point_idx < inputs.rows(), + "Point index too large for inputs" + ); + debug_assert!( + neighbour_pts.iter().all(|x| *x < inputs.rows()), + "Neighbour indices too large for inputs" + ); + + self.clusters + .as_mut() + .map(|x| x.mut_data()[point_idx] = Some(cluster)); for data_point_idx in &neighbour_pts { let visited = self._visited[*data_point_idx]; @@ -193,15 +207,17 @@ impl DBSCAN { } } - fn region_query(&self, point: Row, inputs: &Matrix) -> Vec { - debug_assert!(point.cols() == inputs.cols(), - "point must be of same dimension as inputs"); + debug_assert!( + point.cols() == inputs.cols(), + "point must be of same dimension as inputs" + ); let mut in_neighbourhood = Vec::new(); for (idx, data_point) in inputs.row_iter().enumerate() { //TODO: Use `MatrixMetric` when rulinalg#154 is fixed. - let point_distance = utils::vec_bin_op(data_point.raw_slice(), point.raw_slice(), |x, y| x - y); + let point_distance = + utils::vec_bin_op(data_point.raw_slice(), point.raw_slice(), |x, y| x - y); let dist = utils::dot(&point_distance, &point_distance).sqrt(); if dist < self.eps { @@ -229,7 +245,7 @@ impl DBSCAN { #[cfg(test)] mod tests { use super::DBSCAN; - use linalg::{Matrix, BaseMatrix}; + use linalg::{BaseMatrix, Matrix}; #[test] fn test_region_query() { diff --git a/src/learning/error.rs b/src/learning/error.rs index ebbb6559..caad7bdd 100644 --- a/src/learning/error.rs +++ b/src/learning/error.rs @@ -30,13 +30,14 @@ pub enum ErrorKind { /// The model has not been trained UntrainedModel, /// Linear algebra related error - LinearAlgebra + LinearAlgebra, } impl Error { /// Construct a new `Error` of a particular `ErrorKind`. pub fn new(kind: ErrorKind, error: E) -> Error - where E: Into> + where + E: Into>, { Error { kind: kind, @@ -59,7 +60,10 @@ impl Error { impl From for Error { fn from(e: rulinalg::error::Error) -> Error { - Error::new(ErrorKind::LinearAlgebra, ::description(&e)) + Error::new( + ErrorKind::LinearAlgebra, + ::description(&e), + ) } } diff --git a/src/learning/glm.rs b/src/learning/glm.rs index aa69a7e6..92baed2d 100644 --- a/src/learning/glm.rs +++ b/src/learning/glm.rs @@ -36,10 +36,10 @@ //! ``` use linalg::Vector; -use linalg::{Matrix, BaseMatrix}; +use linalg::{BaseMatrix, Matrix}; -use learning::{LearningResult, SupModel}; use learning::error::{Error, ErrorKind}; +use learning::{LearningResult, SupModel}; /// The Generalized Linear Model /// @@ -94,8 +94,10 @@ impl SupModel, Vector> for GenLinearModel { let n = inputs.rows(); if n != targets.size() { - return Err(Error::new(ErrorKind::InvalidData, - "Training data do not have the same dimensions")); + return Err(Error::new( + ErrorKind::InvalidData, + "Training data do not have the same dimensions", + )); } // Construct initial estimate for mu @@ -119,8 +121,8 @@ impl SupModel, Vector> for GenLinearModel { let new_beta = (&x_t_w * &full_inputs) .inverse() - .expect("Could not compute input data inverse.") * - x_t_w * z; + .expect("Could not compute input data inverse.") * x_t_w + * z; let diff = (beta - &new_beta).apply(&|x| x.abs()).sum(); beta = new_beta; @@ -334,11 +336,7 @@ impl Criterion for Bernoulli { for m in mu { let var = self.model_variance(*m); - working_weights_vec.push(if var.abs() < 1e-5 { - 1e-5 - } else { - var - }); + working_weights_vec.push(if var.abs() < 1e-5 { 1e-5 } else { var }); } working_weights_vec @@ -378,7 +376,6 @@ impl Criterion for Binomial { } else { var } - } fn initialize_mu(&self, y: &[f64]) -> Vec { @@ -403,11 +400,7 @@ impl Criterion for Binomial { for (idx, m) in mu.iter().enumerate() { let var = self.model_variance(*m) / self.weights[idx]; - working_weights_vec.push(if var.abs() < 1e-5 { - 1e-5 - } else { - var - }); + working_weights_vec.push(if var.abs() < 1e-5 { 1e-5 } else { var }); } working_weights_vec @@ -459,11 +452,7 @@ impl Criterion for Poisson { let mut mu_data = Vec::with_capacity(y.len()); for y_val in y { - mu_data.push(if *y_val < 1e-10 { - 1e-10 - } else { - *y_val - }); + mu_data.push(if *y_val < 1e-10 { 1e-10 } else { *y_val }); } mu_data diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index b4ec8c6b..929c7914 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -30,13 +30,13 @@ //! // Probabilities that each point comes from each Gaussian. //! println!("{:?}", post_probs.data()); //! ``` -use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes}; +use linalg::{Axes, BaseMatrix, BaseMatrixMut, Matrix, MatrixSlice, Vector}; +use rulinalg::matrix::decomposition::PartialPivLu; use rulinalg::utils; -use rulinalg::matrix::decomposition::{PartialPivLu}; -use learning::{LearningResult, UnSupModel}; -use learning::toolkit::rand_utils; use learning::error::{Error, ErrorKind}; +use learning::toolkit::rand_utils; +use learning::{LearningResult, UnSupModel}; /// Covariance options for GMMs. /// @@ -53,7 +53,6 @@ pub enum CovOption { Diagonal, } - /// A Gaussian Mixture Model #[derive(Debug)] pub struct GaussianMixtureModel { @@ -73,7 +72,10 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { let reg_value = if inputs.rows() > 1 { 1f64 / (inputs.rows() - 1) as f64 } else { - return Err(Error::new(ErrorKind::InvalidData, "Only one row of data provided.")); + return Err(Error::new( + ErrorKind::InvalidData, + "Only one row of data provided.", + )); }; // Initialization: @@ -112,7 +114,6 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { } else { Err(Error::new_untrained()) } - } } @@ -162,11 +163,20 @@ impl GaussianMixtureModel { /// /// - Mixture weights do not have length k. /// - Mixture weights have a negative entry. - pub fn with_weights(k: usize, mixture_weights: Vector) -> LearningResult { + pub fn with_weights( + k: usize, + mixture_weights: Vector, + ) -> LearningResult { if mixture_weights.size() != k { - Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have length k.")) + Err(Error::new( + ErrorKind::InvalidParameters, + "Mixture weights must have length k.", + )) } else if mixture_weights.data().iter().any(|&x| x < 0f64) { - Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have only non-negative entries.")) + Err(Error::new( + ErrorKind::InvalidParameters, + "Mixture weights must have only non-negative entries.", + )) } else { let sum = mixture_weights.sum(); let normalized_weights = mixture_weights / sum; @@ -224,7 +234,11 @@ impl GaussianMixtureModel { self.max_iters = iters; } - fn initialize_covariances(&self, inputs: &Matrix, reg_value: f64) -> LearningResult> { + fn initialize_covariances( + &self, + inputs: &Matrix, + reg_value: f64, + ) -> LearningResult> { match self.cov_option { CovOption::Diagonal => { let variance = try!(inputs.variance(Axes::Row)); @@ -236,9 +250,10 @@ impl GaussianMixtureModel { let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols()); for (j, mut row) in cov_mat.row_iter_mut().enumerate() { for (k, elem) in row.iter_mut().enumerate() { - *elem = inputs.row_iter().map(|r| { - (r[j] - means[j]) * (r[k] - means[k]) - }).sum::(); + *elem = inputs + .row_iter() + .map(|r| (r[j] - means[j]) * (r[k] - means[k])) + .sum::(); } } cov_mat *= reg_value; @@ -261,7 +276,8 @@ impl GaussianMixtureModel { if let Some(ref covars) = self.model_covars { for cov in covars { - let lup = PartialPivLu::decompose(cov.clone()).expect("Covariance could not be lup decomposed"); + let lup = PartialPivLu::decompose(cov.clone()) + .expect("Covariance could not be lup decomposed"); let covar_det = lup.det(); // TODO: We can probably remove this inverse for a more stable solve elsewhere. let covar_inv = try!(lup.inverse().map_err(Error::from)); @@ -283,8 +299,8 @@ impl GaussianMixtureModel { let mu_j = MatrixSlice::from_matrix(means, [j, 0], 1, means.cols()); let diff = x_i - mu_j; - let pdf = (&diff * &cov_invs[j] * diff.transpose() * -0.5).into_vec()[0] - .exp() / cov_sqrt_dets[j]; + let pdf = (&diff * &cov_invs[j] * diff.transpose() * -0.5).into_vec()[0].exp() + / cov_sqrt_dets[j]; pdfs.push(pdf); } @@ -298,7 +314,10 @@ impl GaussianMixtureModel { } } - Ok((Matrix::new(n, self.comp_count, member_weights_data), log_lik)) + Ok(( + Matrix::new(n, self.comp_count, member_weights_data), + log_lik, + )) } fn update_params(&mut self, inputs: &Matrix, membership_weights: Matrix) { @@ -332,7 +351,6 @@ impl GaussianMixtureModel { } new_covs.push(cov_mat / sum_weights[k]); - } self.model_means = Some(new_means); diff --git a/src/learning/gp.rs b/src/learning/gp.rs index 828e039d..608ba749 100644 --- a/src/learning/gp.rs +++ b/src/learning/gp.rs @@ -26,12 +26,11 @@ //! the predictive mean and covariance. However, this is likely to change in //! a future release. +use learning::error::{Error, ErrorKind}; use learning::toolkit::kernel::{Kernel, SquaredExp}; -use linalg::{Matrix, BaseMatrix}; -use linalg::Vector; use learning::{LearningResult, SupModel}; -use learning::error::{Error, ErrorKind}; - +use linalg::Vector; +use linalg::{BaseMatrix, Matrix}; /// Trait for GP mean functions. pub trait MeanFunc { @@ -124,8 +123,10 @@ impl GaussianProcess { /// Construct a kernel matrix fn ker_mat(&self, m1: &Matrix, m2: &Matrix) -> LearningResult> { if m1.cols() != m2.cols() { - Err(Error::new(ErrorKind::InvalidState, - "Inputs to kernel matrices have different column counts.")) + Err(Error::new( + ErrorKind::InvalidState, + "Inputs to kernel matrices have different column counts.", + )) } else { let dim1 = m1.rows(); let dim2 = m2.rows(); @@ -144,14 +145,16 @@ impl GaussianProcess { impl SupModel, Vector> for GaussianProcess { /// Predict output from inputs. fn predict(&self, inputs: &Matrix) -> LearningResult> { - // Messy referencing for succint syntax if let (&Some(ref alpha), &Some(ref t_data)) = (&self.alpha, &self.train_data) { let mean = self.mean.func(inputs.clone()); let post_mean = try!(self.ker_mat(inputs, t_data)) * alpha; Ok(mean + post_mean) } else { - Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained.")) + Err(Error::new( + ErrorKind::UntrainedModel, + "The model has not been trained.", + )) } } @@ -161,12 +164,14 @@ impl SupModel, Vector> for GaussianProc let ker_mat = self.ker_mat(inputs, inputs).unwrap(); - let train_mat = try!((ker_mat + noise_mat).cholesky().map_err(|_| { - Error::new(ErrorKind::InvalidState, - "Could not compute Cholesky decomposition.") - })); + let train_mat = try!((ker_mat + noise_mat).cholesky().map_err(|_| Error::new( + ErrorKind::InvalidState, + "Could not compute Cholesky decomposition." + ))); - let x = train_mat.solve_l_triangular(targets - self.mean.func(inputs.clone())).unwrap(); + let x = train_mat + .solve_l_triangular(targets - self.mean.func(inputs.clone())) + .unwrap(); let alpha = train_mat.transpose().solve_u_triangular(x).unwrap(); self.train_mat = Some(train_mat); @@ -183,12 +188,13 @@ impl GaussianProcess { /// Requires the model to be trained first. /// /// Outputs the posterior mean and covariance matrix. - pub fn get_posterior(&self, - inputs: &Matrix) - -> LearningResult<(Vector, Matrix)> { - if let (&Some(ref t_mat), &Some(ref alpha), &Some(ref t_data)) = (&self.train_mat, - &self.alpha, - &self.train_data) { + pub fn get_posterior( + &self, + inputs: &Matrix, + ) -> LearningResult<(Vector, Matrix)> { + if let (&Some(ref t_mat), &Some(ref alpha), &Some(ref t_data)) = + (&self.train_mat, &self.alpha, &self.train_data) + { let mean = self.mean.func(inputs.clone()); let post_mean = mean + try!(self.ker_mat(inputs, t_data)) * alpha; diff --git a/src/learning/k_means.rs b/src/learning/k_means.rs index 8b5cb416..296d1abe 100644 --- a/src/learning/k_means.rs +++ b/src/learning/k_means.rs @@ -42,12 +42,12 @@ //! //! The [k-means++](https://en.wikipedia.org/wiki/K-means%2B%2B) scheme. -use linalg::{Matrix, MatrixSlice, Axes, Vector, BaseMatrix}; -use learning::{LearningResult, UnSupModel}; use learning::error::{Error, ErrorKind}; +use learning::{LearningResult, UnSupModel}; +use linalg::{Axes, BaseMatrix, Matrix, MatrixSlice, Vector}; -use rand::{Rng, thread_rng}; use libnum::abs; +use rand::{thread_rng, Rng}; use std::fmt::Debug; @@ -185,26 +185,33 @@ impl KMeansClassifier { /// Used internally within model. fn init_centroids(&mut self, inputs: &Matrix) -> LearningResult<()> { if self.k > inputs.rows() { - Err(Error::new(ErrorKind::InvalidData, - format!("Number of clusters ({0}) exceeds number of data points \ - ({1}).", - self.k, - inputs.rows()))) + Err(Error::new( + ErrorKind::InvalidData, + format!( + "Number of clusters ({0}) exceeds number of data points \ + ({1}).", + self.k, + inputs.rows() + ), + )) } else { let centroids = try!(self.init_algorithm.init_centroids(self.k, inputs)); if centroids.rows() != self.k { - Err(Error::new(ErrorKind::InvalidState, - "Initial centroids must have exactly k rows.")) + Err(Error::new( + ErrorKind::InvalidState, + "Initial centroids must have exactly k rows.", + )) } else if centroids.cols() != inputs.cols() { - Err(Error::new(ErrorKind::InvalidState, - "Initial centroids must have the same column count as inputs.")) + Err(Error::new( + ErrorKind::InvalidState, + "Initial centroids must have the same column count as inputs.", + )) } else { self.centroids = Some(centroids); Ok(()) } } - } /// Updated the centroids by computing means of assigned classes. @@ -226,14 +233,20 @@ impl KMeansClassifier { self.centroids = Some(Matrix::new(self.k, inputs.cols(), new_centroids)); } - fn get_closest_centroids(&self, - inputs: &Matrix) - -> LearningResult<(Vector, Vector)> { + fn get_closest_centroids( + &self, + inputs: &Matrix, + ) -> LearningResult<(Vector, Vector)> { if let Some(ref c) = self.centroids { - Ok(KMeansClassifier::::find_closest_centroids(c.as_slice(), inputs)) + Ok(KMeansClassifier::::find_closest_centroids( + c.as_slice(), + inputs, + )) } else { - Err(Error::new(ErrorKind::InvalidState, - "Centroids not correctly initialized.")) + Err(Error::new( + ErrorKind::InvalidState, + "Centroids not correctly initialized.", + )) } } @@ -241,9 +254,10 @@ impl KMeansClassifier { /// /// Used internally within model. /// Returns the index of the closest centroid and the distance to it. - fn find_closest_centroids(centroids: MatrixSlice, - inputs: &Matrix) - -> (Vector, Vector) { + fn find_closest_centroids( + centroids: MatrixSlice, + inputs: &Matrix, + ) -> (Vector, Vector) { let mut idx = Vec::with_capacity(inputs.rows()); let mut distances = Vec::with_capacity(inputs.rows()); @@ -296,7 +310,6 @@ pub struct RandomPartition; impl Initializer for RandomPartition { fn init_centroids(&self, k: usize, inputs: &Matrix) -> LearningResult> { - // Populate so we have something in each class. let mut random_assignments = (0..k).map(|i| vec![i]).collect::>>(); let mut rng = thread_rng(); @@ -335,18 +348,22 @@ impl Initializer for KPlusPlus { for i in 1..k { unsafe { - let temp_centroids = MatrixSlice::from_raw_parts(init_centroids.as_ptr(), - i, - inputs.cols(), - inputs.cols()); + let temp_centroids = MatrixSlice::from_raw_parts( + init_centroids.as_ptr(), + i, + inputs.cols(), + inputs.cols(), + ); let (_, dist) = KMeansClassifier::::find_closest_centroids(temp_centroids, inputs); // A relatively cheap way to validate our input data if !dist.data().iter().all(|x| x.is_finite()) { - return Err(Error::new(ErrorKind::InvalidData, - "Input data led to invalid centroid distances during \ - initialization.")); + return Err(Error::new( + ErrorKind::InvalidData, + "Input data led to invalid centroid distances during \ + initialization.", + )); } let next_cen = sample_discretely(&dist); diff --git a/src/learning/knn/binary_tree.rs b/src/learning/knn/binary_tree.rs index eacce74d..e1b6e21c 100644 --- a/src/learning/knn/binary_tree.rs +++ b/src/learning/knn/binary_tree.rs @@ -2,10 +2,10 @@ use std::borrow::Borrow; use std::collections::VecDeque; -use linalg::{Matrix, BaseMatrix, Vector}; use learning::error::Error; +use linalg::{BaseMatrix, Matrix, Vector}; -use super::{KNearest, KNearestSearch, get_distances, dist}; +use super::{dist, get_distances, KNearest, KNearestSearch}; /// Binary tree #[derive(Debug)] @@ -15,7 +15,7 @@ pub struct BinaryTree { // Search data data: Option>, // Binary tree - root: Option> + root: Option>, } impl Default for BinaryTree { @@ -33,24 +33,28 @@ impl Default for BinaryTree { BinaryTree { leafsize: 30, data: None, - root: None + root: None, } } } /// Binary splittable pub trait BinarySplit: Sized { - /// Build branch from passed args - fn build(data: &Matrix, remains: Vec, - dim: usize, split: f64, min: Vector, max: Vector, - left: Node, right: Node) - -> Node; + fn build( + data: &Matrix, + remains: Vec, + dim: usize, + split: f64, + min: Vector, + max: Vector, + left: Node, + right: Node, + ) -> Node; /// Return a tuple of left and right node. First node is likely to be /// closer to the point - unsafe fn maybe_close<'s, 'p>(&'s self, point: &'p [f64]) - -> (&'s Node, &'s Node); + unsafe fn maybe_close<'s, 'p>(&'s self, point: &'p [f64]) -> (&'s Node, &'s Node); /// Return distance between the point and myself fn dist(&self, point: &[f64]) -> f64; @@ -106,25 +110,28 @@ pub type KDTree = BinaryTree; pub type BallTree = BinaryTree; impl BinarySplit for KDTreeBranch { - - fn build(_: &Matrix, _: Vec, - dim: usize, split: f64, min: Vector, max: Vector, - left: Node, right: Node) -> Node { - + fn build( + _: &Matrix, + _: Vec, + dim: usize, + split: f64, + min: Vector, + max: Vector, + left: Node, + right: Node, + ) -> Node { let b = KDTreeBranch { dim: dim, split: split, min: min, max: max, left: Box::new(left), - right: Box::new(right) + right: Box::new(right), }; Node::Branch(b) } - unsafe fn maybe_close<'s, 'p>(&'s self, point: &'p [f64]) - -> (&'s Node, &'s Node) { - + unsafe fn maybe_close<'s, 'p>(&'s self, point: &'p [f64]) -> (&'s Node, &'s Node) { if *point.get_unchecked(self.dim) < self.split { (&self.left, &self.right) } else { @@ -134,9 +141,7 @@ impl BinarySplit for KDTreeBranch { fn dist(&self, point: &[f64]) -> f64 { let mut d = 0.; - for ((&p, &mi), &ma) in point.iter() - .zip(self.min.iter()) - .zip(self.max.iter()) { + for ((&p, &mi), &ma) in point.iter().zip(self.min.iter()).zip(self.max.iter()) { if p < mi { d += (mi - p) * (mi - p); } else if ma < p { @@ -157,11 +162,16 @@ impl BinarySplit for KDTreeBranch { } impl BinarySplit for BallTreeBranch { - - fn build(data: &Matrix, remains: Vec, - dim: usize, split: f64, _: Vector, _: Vector, - left: Node, right: Node) -> Node { - + fn build( + data: &Matrix, + remains: Vec, + dim: usize, + split: f64, + _: Vector, + _: Vector, + left: Node, + right: Node, + ) -> Node { // calculate centroid (mean) // TODO: cleanup using .row() let mut center: Vec = vec![0.; data.cols()]; @@ -190,14 +200,12 @@ impl BinarySplit for BallTreeBranch { center: Vector::new(center), radius: radius, left: Box::new(left), - right: Box::new(right) + right: Box::new(right), }; Node::Branch(b) } - unsafe fn maybe_close<'s, 'p>(&'s self, point: &'p [f64]) - -> (&'s Node, &'s Node) { - + unsafe fn maybe_close<'s, 'p>(&'s self, point: &'p [f64]) -> (&'s Node, &'s Node) { if *point.get_unchecked(self.dim) < self.split { (&self.left, &self.right) } else { @@ -229,25 +237,22 @@ pub enum Node { /// Binary tree branch Branch(B), /// Binary tree leaf - Leaf(Leaf) + Leaf(Leaf), } /// Binary tree leaf #[derive(Debug)] pub struct Leaf { - children: Vec + children: Vec, } impl Leaf { fn new(children: Vec) -> Self { - Leaf { - children: children - } + Leaf { children: children } } } impl BinaryTree { - /// Constructs binary-tree (kd-tree or ball-tree) seach. /// Specify leafsize which is maximum number to be contained in each leaf. /// @@ -262,7 +267,7 @@ impl BinaryTree { BinaryTree { leafsize: leafsize, data: None, - root: None + root: None, } } @@ -273,10 +278,13 @@ impl BinaryTree { /// - remains for right node /// - updated max for left node /// - updated min for right node - fn select_split(&self, data: &Matrix, mut remains: Vec, - mut dmin: Vector, mut dmax: Vector) - -> (usize, f64, Vec, Vec, Vector, Vector){ - + fn select_split( + &self, + data: &Matrix, + mut remains: Vec, + mut dmin: Vector, + mut dmax: Vector, + ) -> (usize, f64, Vec, Vec, Vector, Vector) { // avoid recursive call loop { // split columns which has the widest range @@ -286,9 +294,7 @@ impl BinaryTree { // with Data Clustered in Lower Dimensions (Maneewongvatan and Mount, 1999)" // ToDo: use unsafe get (v0.4.0?) // https://github.com/AtheMathmo/rulinalg/pull/104 - let split = unsafe { - dmin.data().get_unchecked(dim) + d / 2.0 - }; + let split = unsafe { dmin.data().get_unchecked(dim) + d / 2.0 }; // split remains let mut l_remains: Vec = Vec::with_capacity(remains.len()); @@ -325,17 +331,20 @@ impl BinaryTree { return (dim, split, l_remains, r_remains, l_max, r_min); } - }; + } } /// find next binary split - fn split(&self, data: &Matrix, remains: Vec, - dmin: Vector, dmax: Vector) -> Node { - + fn split( + &self, + data: &Matrix, + remains: Vec, + dmin: Vector, + dmax: Vector, + ) -> Node { if remains.len() < self.leafsize { Node::Leaf(Leaf::new(remains)) } else { - // ToDo: avoid this clone let (dim, split, l_remains, r_remains, l_max, r_min) = self.select_split(data, remains.clone(), dmin.clone(), dmax.clone()); @@ -347,11 +356,12 @@ impl BinaryTree { } /// find leaf contains search point - fn search_leaf<'s, 'p>(&'s self, point: &'p [f64], k: usize) - -> Result<(KNearest, VecDeque<&'s Node>), Error> { - + fn search_leaf<'s, 'p>( + &'s self, + point: &'p [f64], + k: usize, + ) -> Result<(KNearest, VecDeque<&'s Node>), Error> { if let (&Some(ref root), &Some(ref data)) = (&self.root, &self.data) { - let mut queue: VecDeque<&Node> = VecDeque::new(); queue.push_front(root); @@ -363,14 +373,12 @@ impl BinaryTree { let distances = get_distances(data, point, &l.children); let kn = KNearest::new(k, l.children.clone(), distances); return Ok((kn, queue)); - }, + } Node::Branch(ref b) => { // the current branch must contains target point. // store the child branch which contains target point to // the front, put other side on the back. - let (close, far) = unsafe { - b.maybe_close(point) - }; + let (close, far) = unsafe { b.maybe_close(point) }; queue.push_front(close); queue.push_back(far); } @@ -384,7 +392,6 @@ impl BinaryTree { /// Can search k-nearest items impl KNearestSearch for BinaryTree { - /// build data structure for search optimization fn build(&mut self, data: Matrix) { let remains: Vec = (0..data.rows()).collect(); @@ -411,7 +418,7 @@ impl KNearestSearch for BinaryTree { current_dist = query.add(i, d); } } - }, + } Node::Branch(ref b) => { let d = b.dist(point); if d < query.dist() { @@ -428,7 +435,6 @@ impl KNearestSearch for BinaryTree { } } - /// min fn min(data: &Matrix) -> Vector { // ToDo: use rulinalg .min (v0.4.1?) @@ -468,17 +474,17 @@ fn max(data: &Matrix) -> Vector { #[cfg(test)] mod tests { - use linalg::{Vector, Matrix, BaseMatrix}; use super::super::KNearestSearch; - use super::{KDTree, BallTree, min, max}; + use super::{max, min, BallTree, KDTree}; + use linalg::{BaseMatrix, Matrix, Vector}; - use super::{Node, BinarySplit, Leaf}; + use super::{BinarySplit, Leaf, Node}; // return node's leaf reference, for testing purpose fn as_leaf(n: &Node) -> &Leaf { match n { &Node::Leaf(ref leaf) => leaf, - _ => panic!("Node is not leaf") + _ => panic!("Node is not leaf"), } } @@ -486,17 +492,13 @@ mod tests { fn as_branch(n: &Node) -> &B { match n { &Node::Branch(ref branch) => branch, - _ => panic!("Node is not branch") + _ => panic!("Node is not branch"), } } #[test] fn test_kdtree_construct() { - let m = Matrix::new(5, 2, vec![1., 2., - 8., 0., - 6., 10., - 3., 6., - 0., 3.]); + let m = Matrix::new(5, 2, vec![1., 2., 8., 0., 6., 10., 3., 6., 0., 3.]); let mut tree = KDTree::new(3); tree.build(m); @@ -525,17 +527,16 @@ mod tests { #[test] fn test_kdtree_search() { - let m = Matrix::new(5, 2, vec![1., 2., - 8., 0., - 6., 10., - 3., 6., - 0., 3.]); + let m = Matrix::new(5, 2, vec![1., 2., 8., 0., 6., 10., 3., 6., 0., 3.]); let mut tree = KDTree::new(3); tree.build(m); // search first leaf let (kn, _) = tree.search_leaf(&vec![3., 4.9], 1).unwrap(); - assert_eq!(kn.pairs, vec![(0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt())]); + assert_eq!( + kn.pairs, + vec![(0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt())] + ); // search tree let (ind, dist) = tree.search(&vec![3., 4.9], 1).unwrap(); @@ -544,12 +545,23 @@ mod tests { let (ind, dist) = tree.search(&vec![3., 4.9], 3).unwrap(); assert_eq!(ind, vec![3, 0, 4]); - assert_eq!(dist, vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941]); + assert_eq!( + dist, + vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941] + ); // search first leaf let (kn, _) = tree.search_leaf(&vec![3., 4.9], 2).unwrap(); - assert_eq!(kn.pairs, vec![(0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt()), - (4, (3.0f64 * 3.0f64 + (4.9f64 - 3.0f64) * (4.9f64 - 3.0f64)).sqrt())]); + assert_eq!( + kn.pairs, + vec![ + (0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt()), + ( + 4, + (3.0f64 * 3.0f64 + (4.9f64 - 3.0f64) * (4.9f64 - 3.0f64)).sqrt(), + ), + ] + ); // search tree let (ind, dist) = tree.search(&vec![3., 4.9], 2).unwrap(); assert_eq!(ind, vec![3, 0]); @@ -570,11 +582,27 @@ mod tests { // search tree let (ind, dist) = tree.search(&vec![5.8, 3.6], 4).unwrap(); assert_eq!(ind, vec![18, 85, 36, 14]); - assert_eq!(dist, vec![0.22360679774997858, 0.2828427124746193, 0.31622776601683783, 0.3999999999999999]); + assert_eq!( + dist, + vec![ + 0.22360679774997858, + 0.2828427124746193, + 0.31622776601683783, + 0.3999999999999999, + ] + ); let (ind, dist) = tree.search(&vec![7.0, 2.6], 4).unwrap(); assert_eq!(ind, vec![76, 108, 102, 107]); - assert_eq!(dist, vec![0.28284271247461895, 0.31622776601683783, 0.41231056256176585, 0.4242640687119283]); + assert_eq!( + dist, + vec![ + 0.28284271247461895, + 0.31622776601683783, + 0.41231056256176585, + 0.4242640687119283, + ] + ); } #[cfg(feature = "datasets")] @@ -591,25 +619,42 @@ mod tests { // search tree let (ind, dist) = tree.search(&vec![5.8, 3.1, 3.8, 1.2], 8).unwrap(); assert_eq!(ind, vec![64, 88, 82, 95, 99, 96, 71, 61]); - assert_eq!(dist, vec![0.360555127546399, 0.3872983346207417, 0.41231056256176596, - 0.4242640687119288, 0.4472135954999579, 0.4690415759823433, - 0.4795831523312721, 0.5196152422706636]); + assert_eq!( + dist, + vec![ + 0.360555127546399, + 0.3872983346207417, + 0.41231056256176596, + 0.4242640687119288, + 0.4472135954999579, + 0.4690415759823433, + 0.4795831523312721, + 0.5196152422706636, + ] + ); let (ind, dist) = tree.search(&vec![6.5, 3.5, 3.2, 1.3], 10).unwrap(); assert_eq!(ind, vec![71, 64, 74, 82, 79, 61, 65, 97, 75, 51]); - assert_eq!(dist, vec![1.1357816691600549, 1.1532562594670799, 1.2569805089976533, - 1.2767145334803702, 1.2767145334803702, 1.284523257866513, - 1.2845232578665131, 1.2884098726725122, 1.3076696830622023, - 1.352774925846868]); + assert_eq!( + dist, + vec![ + 1.1357816691600549, + 1.1532562594670799, + 1.2569805089976533, + 1.2767145334803702, + 1.2767145334803702, + 1.284523257866513, + 1.2845232578665131, + 1.2884098726725122, + 1.3076696830622023, + 1.352774925846868, + ] + ); } #[test] fn test_kdtree_dim_selection() { - let m = Matrix::new(5, 2, vec![1., 2., - 3., 0., - 2., 10., - 3., 6., - 1., 3.]); + let m = Matrix::new(5, 2, vec![1., 2., 3., 0., 2., 10., 3., 6., 1., 3.]); let mut tree = KDTree::new(3); tree.build(m); @@ -639,11 +684,7 @@ mod tests { #[test] fn test_kdtree_dim_selection_biased() { - let m = Matrix::new(5, 2, vec![1., 0., - 3., 0., - 2., 20., - 3., 0., - 1., 0.]); + let m = Matrix::new(5, 2, vec![1., 0., 3., 0., 2., 20., 3., 0., 1., 0.]); let mut tree = KDTree::new(3); tree.build(m); @@ -684,11 +725,7 @@ mod tests { #[test] fn test_balltree_construct() { - let m = Matrix::new(5, 2, vec![1., 2., - 8., 0., - 6., 10., - 3., 6., - 0., 3.]); + let m = Matrix::new(5, 2, vec![1., 2., 8., 0., 6., 10., 3., 6., 0., 3.]); let mut tree = BallTree::new(3); tree.build(m); @@ -721,17 +758,16 @@ mod tests { #[test] fn test_balltree_search() { - let m = Matrix::new(5, 2, vec![1., 2., - 8., 0., - 6., 10., - 3., 6., - 0., 3.]); + let m = Matrix::new(5, 2, vec![1., 2., 8., 0., 6., 10., 3., 6., 0., 3.]); let mut tree = BallTree::new(3); tree.build(m); // search first leaf let (kn, _) = tree.search_leaf(&vec![3., 4.9], 1).unwrap(); - assert_eq!(kn.pairs, vec![(0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt())]); + assert_eq!( + kn.pairs, + vec![(0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt())] + ); // search tree let (ind, dist) = tree.search(&vec![3., 4.9], 1).unwrap(); @@ -740,12 +776,23 @@ mod tests { let (ind, dist) = tree.search(&vec![3., 4.9], 3).unwrap(); assert_eq!(ind, vec![3, 0, 4]); - assert_eq!(dist, vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941]); + assert_eq!( + dist, + vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941] + ); // search first leaf let (kn, _) = tree.search_leaf(&vec![3., 4.9], 2).unwrap(); - assert_eq!(kn.pairs, vec![(0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt()), - (4, (3.0f64 * 3.0f64 + (4.9f64 - 3.0f64) * (4.9f64 - 3.0f64)).sqrt())]); + assert_eq!( + kn.pairs, + vec![ + (0, (2.0f64 * 2.0f64 + 2.9f64 * 2.9f64).sqrt()), + ( + 4, + (3.0f64 * 3.0f64 + (4.9f64 - 3.0f64) * (4.9f64 - 3.0f64)).sqrt(), + ), + ] + ); // search tree let (ind, dist) = tree.search(&vec![3., 4.9], 2).unwrap(); assert_eq!(ind, vec![3, 0]); @@ -766,25 +813,42 @@ mod tests { // search tree let (ind, dist) = tree.search(&vec![5.8, 3.1, 3.8, 1.2], 8).unwrap(); assert_eq!(ind, vec![64, 88, 82, 95, 99, 96, 71, 61]); - assert_eq!(dist, vec![0.360555127546399, 0.3872983346207417, 0.41231056256176596, - 0.4242640687119288, 0.4472135954999579, 0.4690415759823433, - 0.4795831523312721, 0.5196152422706636]); + assert_eq!( + dist, + vec![ + 0.360555127546399, + 0.3872983346207417, + 0.41231056256176596, + 0.4242640687119288, + 0.4472135954999579, + 0.4690415759823433, + 0.4795831523312721, + 0.5196152422706636, + ] + ); let (ind, dist) = tree.search(&vec![6.5, 3.5, 3.2, 1.3], 10).unwrap(); assert_eq!(ind, vec![71, 64, 74, 82, 79, 61, 65, 97, 75, 51]); - assert_eq!(dist, vec![1.1357816691600549, 1.1532562594670799, 1.2569805089976533, - 1.2767145334803702, 1.2767145334803702, 1.284523257866513, - 1.2845232578665131, 1.2884098726725122, 1.3076696830622023, - 1.352774925846868]); + assert_eq!( + dist, + vec![ + 1.1357816691600549, + 1.1532562594670799, + 1.2569805089976533, + 1.2767145334803702, + 1.2767145334803702, + 1.284523257866513, + 1.2845232578665131, + 1.2884098726725122, + 1.3076696830622023, + 1.352774925846868, + ] + ); } #[test] fn test_balltree_dim_selection_biased() { - let m = Matrix::new(5, 2, vec![1., 0., - 3., 0., - 2., 20., - 3., 0., - 1., 0.]); + let m = Matrix::new(5, 2, vec![1., 0., 3., 0., 2., 20., 3., 0., 1., 0.]); let mut tree = BallTree::new(3); tree.build(m); @@ -829,9 +893,7 @@ mod tests { #[test] fn test_min_max() { - let data = Matrix::new(3, 2, vec![1., 2., - 2., 4., - 3., 1.]); + let data = Matrix::new(3, 2, vec![1., 2., 2., 4., 3., 1.]); assert_eq!(min(&data), Vector::new(vec![1., 1.])); assert_eq!(max(&data), Vector::new(vec![3., 4.])); } diff --git a/src/learning/knn/brute_force.rs b/src/learning/knn/brute_force.rs index d200c09c..197a75ef 100644 --- a/src/learning/knn/brute_force.rs +++ b/src/learning/knn/brute_force.rs @@ -1,8 +1,8 @@ //! Bruteforce search implementations -use linalg::{Matrix, BaseMatrix}; use learning::error::Error; +use linalg::{BaseMatrix, Matrix}; -use super::{KNearest, KNearestSearch, get_distances, dist}; +use super::{dist, get_distances, KNearest, KNearestSearch}; /// Perform brute-force search #[derive(Debug)] @@ -20,9 +20,7 @@ impl Default for BruteForce { /// let _ = BruteForce::default(); /// ``` fn default() -> Self { - BruteForce { - data: None - } + BruteForce { data: None } } } @@ -43,7 +41,6 @@ impl BruteForce { /// Can search K-nearest items impl KNearestSearch for BruteForce { - /// initialize BruteForce Searcher fn build(&mut self, data: Matrix) { self.data = Some(data); @@ -76,17 +73,13 @@ impl KNearestSearch for BruteForce { #[cfg(test)] mod tests { - use linalg::Matrix; use super::super::KNearestSearch; use super::BruteForce; + use linalg::Matrix; #[test] fn test_bruteforce_search() { - let m = Matrix::new(5, 2, vec![1., 2., - 8., 0., - 6., 10., - 3., 6., - 0., 3.]); + let m = Matrix::new(5, 2, vec![1., 2., 8., 0., 6., 10., 3., 6., 0., 3.]); let mut b = BruteForce::new(); b.build(m); @@ -100,7 +93,10 @@ mod tests { let (ind, dist) = b.search(&vec![3., 4.9], 3).unwrap(); assert_eq!(ind, vec![3, 0, 4]); - assert_eq!(dist, vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941]); + assert_eq!( + dist, + vec![1.0999999999999996, 3.5227829907617076, 3.551056180912941] + ); } #[test] diff --git a/src/learning/knn/mod.rs b/src/learning/knn/mod.rs index 53be7468..1fe2e1ac 100644 --- a/src/learning/knn/mod.rs +++ b/src/learning/knn/mod.rs @@ -26,17 +26,17 @@ //! assert_eq!(res, Vector::new(vec![1, 0])); //! # } //! ``` -use std::f64; use std::collections::BTreeMap; +use std::f64; -use linalg::{Matrix, BaseMatrix, Vector}; -use learning::{LearningResult, SupModel}; use learning::error::{Error, ErrorKind}; +use learning::{LearningResult, SupModel}; +use linalg::{BaseMatrix, Matrix, Vector}; mod binary_tree; mod brute_force; -pub use self::binary_tree::{KDTree, BallTree}; +pub use self::binary_tree::{BallTree, KDTree}; pub use self::brute_force::BruteForce; /// k-Nearest Neighbor Classifier @@ -61,7 +61,7 @@ impl Default for KNNClassifier { KNNClassifier { k: 5, searcher: KDTree::default(), - target: None + target: None, } } } @@ -80,7 +80,7 @@ impl KNNClassifier { KNNClassifier { k: k, searcher: KDTree::default(), - target: None + target: None, } } } @@ -99,17 +99,15 @@ impl KNNClassifier { KNNClassifier { k: k, searcher: searcher, - target: None + target: None, } } } impl SupModel, Vector> for KNNClassifier { - fn predict(&self, inputs: &Matrix) -> LearningResult> { match self.target { Some(ref target) => { - let mut results: Vec = Vec::with_capacity(inputs.rows()); for row in inputs.row_iter() { let (idx, _) = try!(self.searcher.search(row.raw_slice(), self.k)); @@ -119,19 +117,23 @@ impl SupModel, Vector> for KNNClassifier Err(Error::new_untrained()) + } + _ => Err(Error::new_untrained()), } } fn train(&mut self, inputs: &Matrix, targets: &Vector) -> LearningResult<()> { if inputs.rows() != targets.size() { - return Err(Error::new(ErrorKind::InvalidData, - "inputs and targets must be the same length")); + return Err(Error::new( + ErrorKind::InvalidData, + "inputs and targets must be the same length", + )); } if inputs.rows() < self.k { - return Err(Error::new(ErrorKind::InvalidData, - "inputs number of rows must be equal or learger than k")); + return Err(Error::new( + ErrorKind::InvalidData, + "inputs number of rows must be equal or learger than k", + )); } self.searcher.build(inputs.clone()); self.target = Some(targets.clone()); @@ -148,23 +150,19 @@ struct KNearest { } impl KNearest { - fn new(k: usize, index: Vec, distances: Vec) -> Self { debug_assert!(!index.is_empty(), "index can't be empty"); - debug_assert!(index.len() == distances.len(), - "index and distance must have the same length"); + debug_assert!( + index.len() == distances.len(), + "index and distance must have the same length" + ); - let mut pairs: Vec<(usize, f64)> = index.into_iter() - .zip(distances.into_iter()) - .collect(); + let mut pairs: Vec<(usize, f64)> = index.into_iter().zip(distances.into_iter()).collect(); // sort by distance, take k elements pairs.sort_by(|x, y| x.1.partial_cmp(&y.1).unwrap()); pairs.truncate(k); - KNearest { - k: k, - pairs: pairs - } + KNearest { k: k, pairs: pairs } } /// Add new index and distances to the container, keeping first k elements which @@ -173,11 +171,7 @@ impl KNearest { // self.pairs can't be empty let len = self.pairs.len(); // index of the last element after the query - let last_index: usize = if len < self.k { - len - } else { - len - 1 - }; + let last_index: usize = if len < self.k { len } else { len - 1 }; unsafe { if self.pairs.get_unchecked(len - 1).1 < distance { @@ -232,8 +226,7 @@ impl KNearest { } /// Search K-nearest items -pub trait KNearestSearch: Default{ - +pub trait KNearestSearch: Default { /// build data structure for search optimization fn build(&mut self, data: Matrix); @@ -278,17 +271,17 @@ fn get_distances(data: &Matrix, point: &[f64], ids: &[usize]) -> Vec { fn dist(v1: &[f64], v2: &[f64]) -> f64 { // ToDo: use metrics let d: f64 = v1.iter() - .zip(v2.iter()) - .map(|(&x, &y)| (x - y) * (x - y)) - .fold(0., |s, v| s + v); + .zip(v2.iter()) + .map(|(&x, &y)| (x - y) * (x - y)) + .fold(0., |s, v| s + v); d.sqrt() } #[cfg(test)] mod tests { - use std::f64; use super::KNearest; + use std::f64; #[test] fn test_knearest() { diff --git a/src/learning/lin_reg.rs b/src/learning/lin_reg.rs index d376a20b..c32ceac1 100644 --- a/src/learning/lin_reg.rs +++ b/src/learning/lin_reg.rs @@ -30,14 +30,14 @@ //! assert!(output[0] > 17f64, "Our regressor isn't very good!"); //! ``` -use linalg::{Matrix, BaseMatrix}; -use linalg::Vector; -use learning::{LearningResult, SupModel}; -use learning::toolkit::cost_fn::CostFunc; -use learning::toolkit::cost_fn::MeanSqError; +use learning::error::Error; use learning::optim::grad_desc::GradientDesc; use learning::optim::{OptimAlgorithm, Optimizable}; -use learning::error::Error; +use learning::toolkit::cost_fn::CostFunc; +use learning::toolkit::cost_fn::MeanSqError; +use learning::{LearningResult, SupModel}; +use linalg::Vector; +use linalg::{BaseMatrix, Matrix}; /// Linear Regression Model. /// @@ -109,12 +109,12 @@ impl Optimizable for LinRegressor { type Inputs = Matrix; type Targets = Vector; - fn compute_grad(&self, - params: &[f64], - inputs: &Matrix, - targets: &Vector) - -> (f64, Vec) { - + fn compute_grad( + &self, + params: &[f64], + inputs: &Matrix, + targets: &Vector, + ) -> (f64, Vec) { let beta_vec = Vector::new(params.to_vec()); let outputs = inputs * beta_vec; diff --git a/src/learning/logistic_reg.rs b/src/learning/logistic_reg.rs index c49c4b4c..ce3a1546 100644 --- a/src/learning/logistic_reg.rs +++ b/src/learning/logistic_reg.rs @@ -34,21 +34,22 @@ //! by using the `new` constructor instead. This allows us to provide //! a `GradientDesc` object with custom parameters. -use linalg::{Matrix, BaseMatrix}; -use linalg::Vector; -use learning::{LearningResult, SupModel}; -use learning::toolkit::activ_fn::{ActivationFunc, Sigmoid}; -use learning::toolkit::cost_fn::{CostFunc, CrossEntropyError}; +use learning::error::Error; use learning::optim::grad_desc::GradientDesc; use learning::optim::{OptimAlgorithm, Optimizable}; -use learning::error::Error; +use learning::toolkit::activ_fn::{ActivationFunc, Sigmoid}; +use learning::toolkit::cost_fn::{CostFunc, CrossEntropyError}; +use learning::{LearningResult, SupModel}; +use linalg::Vector; +use linalg::{BaseMatrix, Matrix}; /// Logistic Regression Model. /// /// Contains option for optimized parameter. #[derive(Debug)] pub struct LogisticRegressor - where A: OptimAlgorithm +where + A: OptimAlgorithm, { base: BaseLogisticRegressor, alg: A, @@ -93,7 +94,8 @@ impl> LogisticRegressor { } impl SupModel, Vector> for LogisticRegressor - where A: OptimAlgorithm +where + A: OptimAlgorithm, { /// Train the logistic regression model. /// @@ -119,7 +121,8 @@ impl SupModel, Vector> for LogisticRegressor let initial_params = vec![0.5; full_inputs.cols()]; - let optimal_w = self.alg.optimize(&self.base, &initial_params[..], &full_inputs, targets); + let optimal_w = self.alg + .optimize(&self.base, &initial_params[..], &full_inputs, targets); self.base.set_parameters(Vector::new(optimal_w)); Ok(()) } @@ -178,12 +181,12 @@ impl Optimizable for BaseLogisticRegressor { type Inputs = Matrix; type Targets = Vector; - fn compute_grad(&self, - params: &[f64], - inputs: &Matrix, - targets: &Vector) - -> (f64, Vec) { - + fn compute_grad( + &self, + params: &[f64], + inputs: &Matrix, + targets: &Vector, + ) -> (f64, Vec) { let beta_vec = Vector::new(params.to_vec()); let outputs = (inputs * beta_vec).apply(&Sigmoid::func); diff --git a/src/learning/naive_bayes.rs b/src/learning/naive_bayes.rs index 8a08af2d..856099f8 100644 --- a/src/learning/naive_bayes.rs +++ b/src/learning/naive_bayes.rs @@ -40,9 +40,9 @@ //! println!("Final outputs --\n{}", outputs); //! ``` -use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut}; -use learning::{LearningResult, SupModel}; use learning::error::{Error, ErrorKind}; +use learning::{LearningResult, SupModel}; +use linalg::{Axes, BaseMatrix, BaseMatrixMut, Matrix}; use rulinalg::utils; use std::f64::consts::PI; @@ -128,7 +128,10 @@ impl SupModel, Matrix> for NaiveBayes { Ok(Matrix::new(inputs.rows(), cluster_count, class_data)) } else { - Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained.")) + Err(Error::new( + ErrorKind::UntrainedModel, + "The model has not been trained.", + )) } } } @@ -136,7 +139,6 @@ impl SupModel, Matrix> for NaiveBayes { impl NaiveBayes { /// Get the log-probabilities per class for each input. pub fn get_log_probs(&self, inputs: &Matrix) -> LearningResult> { - if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) { // Get the joint log likelihood from the distribution distr.joint_log_lik(inputs, prior) @@ -177,7 +179,11 @@ impl NaiveBayes { let mut class_prior = Vec::with_capacity(class_count); // Compute the prior as the proportion in each class - class_prior.extend(self.class_counts.iter().map(|c| *c as f64 / total_data as f64)); + class_prior.extend( + self.class_counts + .iter() + .map(|c| *c as f64 / total_data as f64), + ); self.class_prior = Some(class_prior); self.cluster_count = Some(class_count); @@ -192,8 +198,10 @@ impl NaiveBayes { } } - Err(Error::new(ErrorKind::InvalidState, - "No class found for entry in targets")) + Err(Error::new( + ErrorKind::InvalidState, + "No class found for entry in targets", + )) } fn get_classes(log_probs: Matrix) -> Vec { @@ -220,10 +228,8 @@ pub trait Distribution { /// Compute the joint log likelihood of the data. /// /// Returns a matrix with rows containing the probability that the input lies in each class. - fn joint_log_lik(&self, - data: &Matrix, - class_prior: &[f64]) - -> LearningResult>; + fn joint_log_lik(&self, data: &Matrix, class_prior: &[f64]) + -> LearningResult>; } /// The Gaussian Naive Bayes model distribution. @@ -265,11 +271,10 @@ impl Distribution for Gaussian { fn update_params(&mut self, data: &Matrix, class: usize) -> LearningResult<()> { // Compute mean and sample variance let mean = data.mean(Axes::Row).into_vec(); - let var = try!(data.variance(Axes::Row).map_err(|_| { - Error::new(ErrorKind::InvalidData, - "Cannot compute variance for Gaussian distribution.") - })) - .into_vec(); + let var = try!(data.variance(Axes::Row).map_err(|_| Error::new( + ErrorKind::InvalidData, + "Cannot compute variance for Gaussian distribution." + ))).into_vec(); let features = data.cols(); @@ -281,16 +286,19 @@ impl Distribution for Gaussian { Ok(()) } - fn joint_log_lik(&self, - data: &Matrix, - class_prior: &[f64]) - -> LearningResult> { + fn joint_log_lik( + &self, + data: &Matrix, + class_prior: &[f64], + ) -> LearningResult> { let class_count = class_prior.len(); let mut log_lik = Vec::with_capacity(class_count); for (i, item) in class_prior.into_iter().enumerate() { let joint_i = item.ln(); - let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&|x| x.ln()).sum(); + let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI) + .apply(&|x| x.ln()) + .sum(); // NOTE: Here we are copying the row data which is inefficient let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()])) @@ -350,13 +358,13 @@ impl Distribution for Bernoulli { } Ok(()) - } - fn joint_log_lik(&self, - data: &Matrix, - class_prior: &[f64]) - -> LearningResult> { + fn joint_log_lik( + &self, + data: &Matrix, + class_prior: &[f64], + ) -> LearningResult> { let class_count = class_prior.len(); let neg_prob = self.log_probs.clone().apply(&|x| (1f64 - x.exp()).ln()); @@ -424,10 +432,11 @@ impl Distribution for Multinomial { Ok(()) } - fn joint_log_lik(&self, - data: &Matrix, - class_prior: &[f64]) - -> LearningResult> { + fn joint_log_lik( + &self, + data: &Matrix, + class_prior: &[f64], + ) -> LearningResult> { let class_count = class_prior.len(); let res = data * self.log_probs.transpose(); @@ -445,10 +454,10 @@ impl Distribution for Multinomial { #[cfg(test)] mod tests { - use super::NaiveBayes; - use super::Gaussian; use super::Bernoulli; + use super::Gaussian; use super::Multinomial; + use super::NaiveBayes; use learning::SupModel; @@ -456,14 +465,20 @@ mod tests { #[test] fn test_gaussian() { - let inputs = Matrix::new(6, - 2, - vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]); - - let targets = Matrix::new(6, - 3, - vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, - 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]); + let inputs = Matrix::new( + 6, + 2, + vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3], + ); + + let targets = Matrix::new( + 6, + 3, + vec![ + 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, + 0.0, 1.0, + ], + ); let mut model = NaiveBayes::::new(); model.train(&inputs, &targets).unwrap(); @@ -474,9 +489,11 @@ mod tests { #[test] fn test_bernoulli() { - let inputs = Matrix::new(4, - 3, - vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]); + let inputs = Matrix::new( + 4, + 3, + vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0], + ); let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]); @@ -489,10 +506,13 @@ mod tests { #[test] fn test_multinomial() { - let inputs = Matrix::new(4, - 3, - vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0, - 0.0]); + let inputs = Matrix::new( + 4, + 3, + vec![ + 1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0, 0.0, + ], + ); let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]); diff --git a/src/learning/nnet/mod.rs b/src/learning/nnet/mod.rs index 5d0356ff..65e536e3 100644 --- a/src/learning/nnet/mod.rs +++ b/src/learning/nnet/mod.rs @@ -42,21 +42,20 @@ //! You can define your own criterion by implementing the `Criterion` //! trait with a concrete `CostFunc`. - pub mod net_layer; use linalg::{Matrix, MatrixSlice}; use rulinalg::utils; -use learning::{LearningResult, SupModel}; use learning::error::{Error, ErrorKind}; +use learning::optim::grad_desc::StochasticGD; +use learning::optim::{OptimAlgorithm, Optimizable}; use learning::toolkit::activ_fn; use learning::toolkit::activ_fn::ActivationFunc; use learning::toolkit::cost_fn; use learning::toolkit::cost_fn::CostFunc; use learning::toolkit::regularization::Regularization; -use learning::optim::{Optimizable, OptimAlgorithm}; -use learning::optim::grad_desc::StochasticGD; +use learning::{LearningResult, SupModel}; use self::net_layer::NetLayer; @@ -66,8 +65,9 @@ use self::net_layer::NetLayer; /// a gradient descent algorithm. #[derive(Debug)] pub struct NeuralNet - where T: Criterion, - A: OptimAlgorithm> +where + T: Criterion, + A: OptimAlgorithm>, { base: BaseNeuralNet, alg: A, @@ -77,8 +77,9 @@ pub struct NeuralNet /// /// The model is trained using back propagation. impl SupModel, Matrix> for NeuralNet - where T: Criterion, - A: OptimAlgorithm> +where + T: Criterion, + A: OptimAlgorithm>, { /// Predict neural network output using forward propagation. fn predict(&self, inputs: &Matrix) -> LearningResult> { @@ -87,7 +88,8 @@ impl SupModel, Matrix> for NeuralNet /// Train the model using gradient optimization and back propagation. fn train(&mut self, inputs: &Matrix, targets: &Matrix) -> LearningResult<()> { - let optimal_w = self.alg.optimize(&self.base, &self.base.weights, inputs, targets); + let optimal_w = self.alg + .optimize(&self.base, &self.base.weights, inputs, targets); self.base.weights = optimal_w; Ok(()) } @@ -119,8 +121,9 @@ impl NeuralNet { } impl NeuralNet - where T: Criterion, - A: OptimAlgorithm> +where + T: Criterion, + A: OptimAlgorithm>, { /// Create a new neural network with no layers /// @@ -160,8 +163,10 @@ impl NeuralNet /// let layers = &[3; 4]; /// let mut net = NeuralNet::mlp(layers, BCECriterion::default(), StochasticGD::default(), Sigmoid); /// ``` - pub fn mlp(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet - where U: ActivationFunc + 'static { + pub fn mlp(layer_sizes: &[usize], criterion: T, alg: A, activ_fn: U) -> NeuralNet + where + U: ActivationFunc + 'static, + { NeuralNet { base: BaseNeuralNet::mlp(layer_sizes, criterion, activ_fn), alg: alg, @@ -179,7 +184,7 @@ impl NeuralNet /// use rusty_machine::learning::nnet::net_layer::Linear; /// use rusty_machine::learning::optim::grad_desc::StochasticGD; /// - /// // Create a new neural net + /// // Create a new neural net /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// // Give net an input layer of size 3, hidden layer of size 4, and output layer of size 5 @@ -204,7 +209,7 @@ impl NeuralNet /// use rusty_machine::learning::toolkit::activ_fn::Sigmoid; /// use rusty_machine::learning::optim::grad_desc::StochasticGD; /// - /// // Create a new neural net + /// // Create a new neural net /// let mut net = NeuralNet::new(BCECriterion::default(), StochasticGD::default()); /// /// let linear_sig: Vec> = vec![Box::new(Linear::new(5, 5)), Box::new(Sigmoid)]; @@ -213,9 +218,11 @@ impl NeuralNet /// net.add_layers(linear_sig); /// ``` pub fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut NeuralNet - where U: IntoIterator> { - self.base.add_layers(layers); - self + where + U: IntoIterator>, + { + self.base.add_layers(layers); + self } /// Gets matrix of weights between specified layer and forward layer. @@ -251,33 +258,35 @@ pub struct BaseNeuralNet { criterion: T, } - impl BaseNeuralNet { /// Creates a base neural network with the specified layer sizes. fn default(layer_sizes: &[usize], activ_fn: U) -> BaseNeuralNet - where U: ActivationFunc + 'static { + where + U: ActivationFunc + 'static, + { BaseNeuralNet::mlp(layer_sizes, BCECriterion::default(), activ_fn) } } - impl BaseNeuralNet { /// Create a base neural network with no layers fn new(criterion: T) -> BaseNeuralNet { BaseNeuralNet { layers: Vec::new(), weights: Vec::new(), - criterion: criterion + criterion: criterion, } - } + } /// Create a multilayer perceptron with the specified layer sizes. - fn mlp(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet - where U: ActivationFunc + 'static { + fn mlp(layer_sizes: &[usize], criterion: T, activ_fn: U) -> BaseNeuralNet + where + U: ActivationFunc + 'static, + { let mut mlp = BaseNeuralNet { - layers: Vec::with_capacity(2*(layer_sizes.len()-1)), + layers: Vec::with_capacity(2 * (layer_sizes.len() - 1)), weights: Vec::new(), - criterion: criterion + criterion: criterion, }; for shape in layer_sizes.windows(2) { mlp.add(Box::new(net_layer::Linear::new(shape[0], shape[1]))); @@ -295,7 +304,8 @@ impl BaseNeuralNet { /// Adds multiple layers to the end of the network fn add_layers<'a, U>(&'a mut self, layers: U) -> &'a mut BaseNeuralNet - where U: IntoIterator> + where + U: IntoIterator>, { for layer in layers { self.add(layer); @@ -316,19 +326,22 @@ impl BaseNeuralNet { let shape = self.layers[idx].param_shape(); unsafe { - MatrixSlice::from_raw_parts(weights.as_ptr().offset(start as isize), - shape.0, - shape.1, - shape.1) + MatrixSlice::from_raw_parts( + weights.as_ptr().offset(start as isize), + shape.0, + shape.1, + shape.1, + ) } } /// Compute the gradient using the back propagation algorithm. - fn compute_grad(&self, - weights: &[f64], - inputs: &Matrix, - targets: &Matrix) - -> (f64, Vec) { + fn compute_grad( + &self, + weights: &[f64], + inputs: &Matrix, + targets: &Matrix, + ) -> (f64, Vec) { let mut gradients = Vec::with_capacity(weights.len()); unsafe { gradients.set_len(weights.len()); @@ -339,16 +352,18 @@ impl BaseNeuralNet { let mut params = Vec::with_capacity(self.layers.len()); // Forward propagation - + let mut index = 0; for (i, layer) in self.layers.iter().enumerate() { let shape = layer.param_shape(); let slice = unsafe { - MatrixSlice::from_raw_parts(weights.as_ptr().offset(index as isize), - shape.0, - shape.1, - shape.1) + MatrixSlice::from_raw_parts( + weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1, + ) }; let output = if i == 0 { @@ -364,29 +379,34 @@ impl BaseNeuralNet { let output = activations.last().unwrap(); // Backward propagation - + // The gradient with respect to the current layer's output let mut out_grad = self.criterion.cost_grad(output, targets); // at this point index == weights.len() for (i, layer) in self.layers.iter().enumerate().rev() { - let activation = if i == 0 {inputs} else {&activations[i-1]}; + let activation = if i == 0 { inputs } else { &activations[i - 1] }; let result = &activations[i]; index -= layer.num_params(); - let grad_params = &mut gradients[index..index+layer.num_params()]; - grad_params.copy_from_slice(layer.back_params(&out_grad, activation, result, params[i]).data()); - + let grad_params = &mut gradients[index..index + layer.num_params()]; + grad_params.copy_from_slice( + layer + .back_params(&out_grad, activation, result, params[i]) + .data(), + ); + out_grad = layer.back_input(&out_grad, activation, result, params[i]); } let mut cost = self.criterion.cost(output, targets); if self.criterion.is_regularized() { - let all_params = unsafe { - MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1) - }; - utils::in_place_vec_bin_op(&mut gradients, - self.criterion.reg_cost_grad(all_params).data(), - |x, &y| *x = *x + y); + let all_params = + unsafe { MatrixSlice::from_raw_parts(weights.as_ptr(), weights.len(), 1, 1) }; + utils::in_place_vec_bin_op( + &mut gradients, + self.criterion.reg_cost_grad(all_params).data(), + |x, &y| *x = *x + y, + ); cost += self.criterion.reg_cost(all_params); } (cost, gradients) @@ -400,10 +420,8 @@ impl BaseNeuralNet { let mut outputs = unsafe { let shape = self.layers[0].param_shape(); - let slice = MatrixSlice::from_raw_parts(self.weights.as_ptr(), - shape.0, - shape.1, - shape.1); + let slice = + MatrixSlice::from_raw_parts(self.weights.as_ptr(), shape.0, shape.1, shape.1); try!(self.layers[0].forward(inputs, slice)) }; @@ -412,16 +430,22 @@ impl BaseNeuralNet { let shape = layer.param_shape(); let slice = unsafe { - MatrixSlice::from_raw_parts(self.weights.as_ptr().offset(index as isize), - shape.0, - shape.1, - shape.1) + MatrixSlice::from_raw_parts( + self.weights.as_ptr().offset(index as isize), + shape.0, + shape.1, + shape.1, + ) }; - + outputs = match layer.forward(&outputs, slice) { Ok(act) => act, - Err(_) => {return Err(Error::new(ErrorKind::InvalidParameters, - "The network's layers do not line up correctly."))} + Err(_) => { + return Err(Error::new( + ErrorKind::InvalidParameters, + "The network's layers do not line up correctly.", + )) + } }; index += layer.num_params(); @@ -437,11 +461,12 @@ impl Optimizable for BaseNeuralNet { type Targets = Matrix; /// Compute the gradient of the neural network. - fn compute_grad(&self, - params: &[f64], - inputs: &Matrix, - targets: &Matrix) - -> (f64, Vec) { + fn compute_grad( + &self, + params: &[f64], + inputs: &Matrix, + targets: &Matrix, + ) -> (f64, Vec) { self.compute_grad(params, inputs, targets) } } @@ -525,7 +550,9 @@ impl Criterion for BCECriterion { /// Creates an MSE Criterion without any regularization. impl Default for BCECriterion { fn default() -> Self { - BCECriterion { regularization: Regularization::None } + BCECriterion { + regularization: Regularization::None, + } } } @@ -542,7 +569,9 @@ impl BCECriterion { /// let criterion = BCECriterion::new(Regularization::L2(0.3f64)); /// ``` pub fn new(regularization: Regularization) -> Self { - BCECriterion { regularization: regularization } + BCECriterion { + regularization: regularization, + } } } @@ -566,7 +595,9 @@ impl Criterion for MSECriterion { /// Creates an MSE Criterion without any regularization. impl Default for MSECriterion { fn default() -> Self { - MSECriterion { regularization: Regularization::None } + MSECriterion { + regularization: Regularization::None, + } } } @@ -583,6 +614,8 @@ impl MSECriterion { /// let criterion = MSECriterion::new(Regularization::L2(0.3f64)); /// ``` pub fn new(regularization: Regularization) -> Self { - MSECriterion { regularization: regularization } + MSECriterion { + regularization: regularization, + } } } diff --git a/src/learning/nnet/net_layer.rs b/src/learning/nnet/net_layer.rs index 8239d7ab..02887fa0 100644 --- a/src/learning/nnet/net_layer.rs +++ b/src/learning/nnet/net_layer.rs @@ -1,27 +1,40 @@ //! Neural Network Layers -use linalg::{Matrix, MatrixSlice, BaseMatrix}; +use linalg::{BaseMatrix, Matrix, MatrixSlice}; -use learning::LearningResult; use learning::error::{Error, ErrorKind}; use learning::toolkit::activ_fn::ActivationFunc; +use learning::LearningResult; -use rand::thread_rng; -use rand::distributions::Sample; use rand::distributions::normal::Normal; +use rand::distributions::Sample; +use rand::thread_rng; use std::fmt::Debug; /// Trait for neural net layers -pub trait NetLayer : Debug { +pub trait NetLayer: Debug { /// The result of propogating data forward through this layer - fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult>; + fn forward(&self, input: &Matrix, params: MatrixSlice) + -> LearningResult>; /// The gradient of the output of this layer with respect to its input - fn back_input(&self, out_grad: &Matrix, input: &Matrix, output: &Matrix, params: MatrixSlice) -> Matrix; - + fn back_input( + &self, + out_grad: &Matrix, + input: &Matrix, + output: &Matrix, + params: MatrixSlice, + ) -> Matrix; + /// The gradient of the output of this layer with respect to its parameters - fn back_params(&self, out_grad: &Matrix, input: &Matrix, output: &Matrix, params: MatrixSlice) -> Matrix; + fn back_params( + &self, + out_grad: &Matrix, + input: &Matrix, + output: &Matrix, + params: MatrixSlice, + ) -> Matrix; /// The default value of the parameters of this layer before training fn default_params(&self) -> Vec; @@ -43,7 +56,7 @@ pub trait NetLayer : Debug { /// The parameters are a matrix of weights of size I x N /// where N is the dimensionality of the output and I the dimensionality of the input #[derive(Debug, Clone, Copy)] -pub struct Linear { +pub struct Linear { /// The number of dimensions of the input input_size: usize, /// The number of dimensions of the output @@ -56,24 +69,23 @@ impl Linear { /// Construct a new Linear layer pub fn new(input_size: usize, output_size: usize) -> Linear { Linear { - input_size: input_size + 1, + input_size: input_size + 1, output_size: output_size, - has_bias: true + has_bias: true, } } /// Construct a Linear layer without a bias term pub fn without_bias(input_size: usize, output_size: usize) -> Linear { Linear { - input_size: input_size, + input_size: input_size, output_size: output_size, - has_bias: false + has_bias: false, } } } -fn remove_first_col(mat: Matrix) -> Matrix -{ +fn remove_first_col(mat: Matrix) -> Matrix { let rows = mat.rows(); let cols = mat.cols(); let mut data = mat.into_vec(); @@ -102,23 +114,39 @@ impl NetLayer for Linear { /// /// input should have dimensions N x I /// where N is the number of samples and I is the dimensionality of the input - fn forward(&self, input: &Matrix, params: MatrixSlice) -> LearningResult> { + fn forward( + &self, + input: &Matrix, + params: MatrixSlice, + ) -> LearningResult> { if self.has_bias { - if input.cols()+1 != params.rows() { - Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + if input.cols() + 1 != params.rows() { + Err(Error::new( + ErrorKind::InvalidData, + "The input had the wrong number of columns", + )) } else { Ok(&Matrix::ones(input.rows(), 1).hcat(input) * ¶ms) } } else { if input.cols() != params.rows() { - Err(Error::new(ErrorKind::InvalidData, "The input had the wrong number of columns")) + Err(Error::new( + ErrorKind::InvalidData, + "The input had the wrong number of columns", + )) } else { Ok(input * ¶ms) } } } - - fn back_input(&self, out_grad: &Matrix, _: &Matrix, _: &Matrix, params: MatrixSlice) -> Matrix { + + fn back_input( + &self, + out_grad: &Matrix, + _: &Matrix, + _: &Matrix, + params: MatrixSlice, + ) -> Matrix { debug_assert_eq!(out_grad.cols(), params.cols()); let gradient = out_grad * ¶ms.transpose(); if self.has_bias { @@ -127,8 +155,14 @@ impl NetLayer for Linear { gradient } } - - fn back_params(&self, out_grad: &Matrix, input: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + + fn back_params( + &self, + out_grad: &Matrix, + input: &Matrix, + _: &Matrix, + _: MatrixSlice, + ) -> Matrix { debug_assert_eq!(input.rows(), out_grad.rows()); if self.has_bias { &Matrix::ones(input.rows(), 1).hcat(input).transpose() * out_grad @@ -141,11 +175,15 @@ impl NetLayer for Linear { /// /// weights drawn from gaussian distribution with 0 mean and variance 2/(input_size+output_size) fn default_params(&self) -> Vec { - let mut distro = Normal::new(0.0, (2.0/(self.input_size+self.output_size) as f64).sqrt()); + let mut distro = Normal::new( + 0.0, + (2.0 / (self.input_size + self.output_size) as f64).sqrt(), + ); let mut rng = thread_rng(); - (0..self.input_size*self.output_size).map(|_| distro.sample(&mut rng)) - .collect() + (0..self.input_size * self.output_size) + .map(|_| distro.sample(&mut rng)) + .collect() } fn param_shape(&self) -> (usize, usize) { @@ -156,22 +194,34 @@ impl NetLayer for Linear { impl NetLayer for T { /// Applies the activation function to each element of the input fn forward(&self, input: &Matrix, _: MatrixSlice) -> LearningResult> { - let mut output = Vec::with_capacity(input.rows()*input.cols()); + let mut output = Vec::with_capacity(input.rows() * input.cols()); for val in input.data() { output.push(T::func(*val)); } Ok(Matrix::new(input.rows(), input.cols(), output)) } - fn back_input(&self, out_grad: &Matrix, _: &Matrix, output: &Matrix, _: MatrixSlice) -> Matrix { - let mut in_grad = Vec::with_capacity(output.rows()*output.cols()); + fn back_input( + &self, + out_grad: &Matrix, + _: &Matrix, + output: &Matrix, + _: MatrixSlice, + ) -> Matrix { + let mut in_grad = Vec::with_capacity(output.rows() * output.cols()); for (y, g) in output.data().iter().zip(out_grad.data()) { in_grad.push(T::func_grad_from_output(*y) * g); } Matrix::new(output.rows(), output.cols(), in_grad) } - - fn back_params(&self, _: &Matrix, _: &Matrix, _: &Matrix, _: MatrixSlice) -> Matrix { + + fn back_params( + &self, + _: &Matrix, + _: &Matrix, + _: &Matrix, + _: MatrixSlice, + ) -> Matrix { Matrix::new(0, 0, Vec::new()) } diff --git a/src/learning/optim/fmincg.rs b/src/learning/optim/fmincg.rs index 5afb72bc..b5da7853 100644 --- a/src/learning/optim/fmincg.rs +++ b/src/learning/optim/fmincg.rs @@ -28,13 +28,12 @@ //! - Conversion to Rust. //! - Length hard defaults to the max iterations. -use learning::optim::{Optimizable, OptimAlgorithm}; +use learning::optim::{OptimAlgorithm, Optimizable}; use linalg::Vector; use std::cmp; use std::f64; - /// Conjugate Gradient Descent algorithm #[derive(Clone, Copy, Debug)] pub struct ConjugateGD { @@ -81,12 +80,13 @@ impl Default for ConjugateGD { } impl OptimAlgorithm for ConjugateGD { - fn optimize(&self, - model: &M, - start: &[f64], - inputs: &M::Inputs, - targets: &M::Targets) - -> Vec { + fn optimize( + &self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets, + ) -> Vec { let mut i = 0usize; let mut ls_failed = false; @@ -140,7 +140,6 @@ impl OptimAlgorithm for ConjugateGD { let mut z2: f64; while ((f2 > (f1 + z1 * self.rho * d1)) || (d2 > -self.sig * d1)) && (m > 0i32) { - limit = z1; if f2 > f1 { @@ -265,7 +264,6 @@ impl OptimAlgorithm for ConjugateGD { z1 = 1f64 / (1f64 - d1); ls_failed = true; } - } x.into_vec() } diff --git a/src/learning/optim/grad_desc.rs b/src/learning/optim/grad_desc.rs index 1e114877..ca0fc046 100644 --- a/src/learning/optim/grad_desc.rs +++ b/src/learning/optim/grad_desc.rs @@ -8,9 +8,9 @@ //! optimization algorithm but there is flexibility to introduce new //! algorithms and git them into the same scheme easily. -use learning::optim::{Optimizable, OptimAlgorithm}; +use learning::optim::{OptimAlgorithm, Optimizable}; use linalg::Vector; -use linalg::{Matrix, BaseMatrix}; +use linalg::{BaseMatrix, Matrix}; use rulinalg::utils; use learning::toolkit::rand_utils; @@ -55,8 +55,10 @@ impl GradientDesc { /// let gd = GradientDesc::new(0.3, 10000); /// ``` pub fn new(alpha: f64, iters: usize) -> GradientDesc { - assert!(alpha > 0f64, - "The step size (alpha) must be greater than 0."); + assert!( + alpha > 0f64, + "The step size (alpha) must be greater than 0." + ); GradientDesc { alpha: alpha, @@ -66,13 +68,13 @@ impl GradientDesc { } impl OptimAlgorithm for GradientDesc { - fn optimize(&self, - model: &M, - start: &[f64], - inputs: &M::Inputs, - targets: &M::Targets) - -> Vec { - + fn optimize( + &self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets, + ) -> Vec { // Create the initial optimal parameters let mut optimizing_val = Vector::new(start.to_vec()); // The cost at the start of each iteration @@ -131,7 +133,7 @@ impl StochasticGD { /// /// Requires the learning rate, momentum rate and iteration count /// to be specified. - /// + /// /// With Nesterov momentum by default. /// /// # Examples @@ -154,15 +156,16 @@ impl StochasticGD { } impl OptimAlgorithm for StochasticGD - where M: Optimizable, Targets = Matrix> +where + M: Optimizable, Targets = Matrix>, { - fn optimize(&self, - model: &M, - start: &[f64], - inputs: &M::Inputs, - targets: &M::Targets) - -> Vec { - + fn optimize( + &self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets, + ) -> Vec { // Create the initial optimal parameters let mut optimizing_val = Vector::new(start.to_vec()); // Create the momentum based gradient distance @@ -180,17 +183,19 @@ impl OptimAlgorithm for StochasticGD rand_utils::in_place_fisher_yates(&mut permutation); for i in &permutation { // Compute the cost and gradient for this data pair - let (cost, vec_data) = model.compute_grad(optimizing_val.data(), - &inputs.select_rows(&[*i]), - &targets.select_rows(&[*i])); + let (cost, vec_data) = model.compute_grad( + optimizing_val.data(), + &inputs.select_rows(&[*i]), + &targets.select_rows(&[*i]), + ); // Backup previous velocity let prev_w = delta_w.clone(); // Compute the difference in gradient using Nesterov momentum delta_w = Vector::new(vec_data) * self.mu + &delta_w * self.alpha; // Update the parameters - optimizing_val = &optimizing_val - - (&prev_w * (-self.alpha) + &delta_w * (1. + self.alpha)); + optimizing_val = + &optimizing_val - (&prev_w * (-self.alpha) + &delta_w * (1. + self.alpha)); // Set the end cost (this is only used after the last iteration) end_cost += cost; } @@ -232,10 +237,14 @@ impl AdaGrad { /// let gd = AdaGrad::new(0.5, 1.0, 100); /// ``` pub fn new(alpha: f64, tau: f64, iters: usize) -> AdaGrad { - assert!(alpha > 0f64, - "The step size (alpha) must be greater than 0."); - assert!(tau >= 0f64, - "The adaptive constant (tau) cannot be negative."); + assert!( + alpha > 0f64, + "The step size (alpha) must be greater than 0." + ); + assert!( + tau >= 0f64, + "The adaptive constant (tau) cannot be negative." + ); AdaGrad { alpha: alpha, tau: tau, @@ -255,13 +264,13 @@ impl Default for AdaGrad { } impl, Targets = Matrix>> OptimAlgorithm for AdaGrad { - fn optimize(&self, - model: &M, - start: &[f64], - inputs: &M::Inputs, - targets: &M::Targets) - -> Vec { - + fn optimize( + &self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets, + ) -> Vec { // Initialize the adaptive scaling let mut ada_s = Vector::zeros(start.len()); // Initialize the optimal parameters @@ -279,9 +288,11 @@ impl, Targets = Matrix>> OptimAlgorithm rand_utils::in_place_fisher_yates(&mut permutation); for i in &permutation { // Compute the cost and gradient for this data pair - let (cost, mut vec_data) = model.compute_grad(optimizing_val.data(), - &inputs.select_rows(&[*i]), - &targets.select_rows(&[*i])); + let (cost, mut vec_data) = model.compute_grad( + optimizing_val.data(), + &inputs.select_rows(&[*i]), + &targets.select_rows(&[*i]), + ); // Update the adaptive scaling by adding the gradient squared utils::in_place_vec_bin_op(ada_s.mut_data(), &vec_data, |x, &y| *x += y * y); @@ -308,12 +319,12 @@ impl, Targets = Matrix>> OptimAlgorithm } } -/// RMSProp +/// RMSProp /// /// The RMSProp algorithm (Hinton et al. 2012). #[derive(Debug, Clone, Copy)] pub struct RMSProp { - /// The base step size of gradient descent steps + /// The base step size of gradient descent steps learning_rate: f64, /// Rate at which running total of average square gradients decays decay_rate: f64, @@ -337,7 +348,7 @@ impl Default for RMSProp { learning_rate: 0.01, decay_rate: 0.9, epsilon: 1.0e-5, - iters: 50 + iters: 50, } } } @@ -356,26 +367,32 @@ impl RMSProp { /// ``` pub fn new(learning_rate: f64, decay_rate: f64, epsilon: f64, iters: usize) -> RMSProp { assert!(0f64 < learning_rate, "The learning rate must be positive"); - assert!(0f64 < decay_rate && decay_rate < 1f64, "The decay rate must be between 0 and 1"); + assert!( + 0f64 < decay_rate && decay_rate < 1f64, + "The decay rate must be between 0 and 1" + ); assert!(0f64 < epsilon, "Epsilon must be positive"); RMSProp { decay_rate: decay_rate, learning_rate: learning_rate, epsilon: epsilon, - iters: iters + iters: iters, } } } impl OptimAlgorithm for RMSProp - where M: Optimizable, Targets = Matrix> { - fn optimize(&self, - model: &M, - start: &[f64], - inputs: &M::Inputs, - targets: &M::Targets) - -> Vec { +where + M: Optimizable, Targets = Matrix>, +{ + fn optimize( + &self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets, + ) -> Vec { // Initial parameters let mut params = Vector::new(start.to_vec()); // Running average of squared gradients @@ -392,15 +409,18 @@ impl OptimAlgorithm for RMSProp // Permute the vertices rand_utils::in_place_fisher_yates(&mut permutation); for i in &permutation { - let (cost, grad) = model.compute_grad(params.data(), - &inputs.select_rows(&[*i]), - &targets.select_rows(&[*i])); + let (cost, grad) = model.compute_grad( + params.data(), + &inputs.select_rows(&[*i]), + &targets.select_rows(&[*i]), + ); let mut grad = Vector::new(grad); - let grad_squared = grad.clone().apply(&|x| x*x); + let grad_squared = grad.clone().apply(&|x| x * x); // Update cached average of squared gradients - rmsprop_cache = &rmsprop_cache*self.decay_rate + &grad_squared*(1.0 - self.decay_rate); - // RMSProp update rule + rmsprop_cache = + &rmsprop_cache * self.decay_rate + &grad_squared * (1.0 - self.decay_rate); + // RMSProp update rule utils::in_place_vec_bin_op(grad.mut_data(), rmsprop_cache.data(), |x, &y| { *x = *x * self.learning_rate / (y + self.epsilon).sqrt(); }); @@ -424,7 +444,7 @@ impl OptimAlgorithm for RMSProp #[cfg(test)] mod tests { - use super::{GradientDesc, StochasticGD, AdaGrad, RMSProp}; + use super::{AdaGrad, GradientDesc, RMSProp, StochasticGD}; #[test] #[should_panic] diff --git a/src/learning/pca.rs b/src/learning/pca.rs index 743b9b72..e09bd885 100644 --- a/src/learning/pca.rs +++ b/src/learning/pca.rs @@ -24,11 +24,11 @@ //! assert_eq!(output, Matrix::new(1, 2, vec![-0.6686215718235227, 0.042826190364433595])); //! ``` -use linalg::{Matrix, BaseMatrix, Axes}; use linalg::Vector; +use linalg::{Axes, BaseMatrix, Matrix}; -use learning::{LearningResult, UnSupModel}; use learning::error::{Error, ErrorKind}; +use learning::{LearningResult, UnSupModel}; /// Principal Component Analysis /// @@ -47,11 +47,10 @@ pub struct PCA { // Principal components components: Option>, // Whether components is inversed (trained with number of rows < cols data) - inv: bool + inv: bool, } impl PCA { - /// Constructs untrained PCA model. /// /// # Parameters @@ -67,7 +66,6 @@ impl PCA { /// let model = PCA::new(3, true); /// ``` pub fn new(n: usize, center: bool) -> PCA { - PCA { // accept n as usize, user should know the number of columns n: Some(n), @@ -76,15 +74,15 @@ impl PCA { n_features: None, centers: None, components: None, - inv: false + inv: false, } } /// Returns principal components (matrix which contains eigenvectors as columns) - pub fn components(&self) -> LearningResult<&Matrix> { + pub fn components(&self) -> LearningResult<&Matrix> { match self.components { None => Err(Error::new_untrained()), - Some(ref rot) => { Ok(rot) } + Some(ref rot) => Ok(rot), } } } @@ -114,29 +112,33 @@ impl Default for PCA { n_features: None, centers: None, components: None, - inv: false + inv: false, } } } /// Train the model and predict the model output from new data. impl UnSupModel, Matrix> for PCA { - - fn predict(&self, inputs: &Matrix) -> LearningResult> { - + fn predict(&self, inputs: &Matrix) -> LearningResult> { match self.n_features { - None => { return Err(Error::new_untrained()); }, + None => { + return Err(Error::new_untrained()); + } Some(f) => { if f != inputs.cols() { - return Err(Error::new(ErrorKind::InvalidData, - "Input data must have the same number of columns as training data")); + return Err(Error::new( + ErrorKind::InvalidData, + "Input data must have the same number of columns as training data", + )); } } }; match self.components { // this can't happen - None => { return Err(Error::new_untrained()); }, + None => { + return Err(Error::new_untrained()); + } Some(ref comp) => { if self.center == true { match self.centers { @@ -164,11 +166,13 @@ impl UnSupModel, Matrix> for PCA { fn train(&mut self, inputs: &Matrix) -> LearningResult<()> { match self.n { - None => {}, + None => {} Some(n) => { if n > inputs.cols() { - return Err(Error::new(ErrorKind::InvalidData, - "Input data must have equal or larger number of columns than n")); + return Err(Error::new( + ErrorKind::InvalidData, + "Input data must have equal or larger number of columns than n", + )); } } } @@ -191,8 +195,8 @@ impl UnSupModel, Matrix> for PCA { Some(c) => { let slicer: Vec = (0..c).collect(); Some(v.select_cols(&slicer)) - }, - None => Some(v) + } + None => Some(v), }; self.n_features = Some(inputs.cols()); Ok(()) @@ -202,26 +206,30 @@ impl UnSupModel, Matrix> for PCA { /// Subtract center Vector from each rows unsafe fn centering(inputs: &Matrix, centers: &Vector) -> Matrix { // Number of inputs columns and centers length must be the same - Matrix::from_fn(inputs.rows(), inputs.cols(), - |c, r| inputs.get_unchecked([r, c]) - centers.data().get_unchecked(c)) + Matrix::from_fn(inputs.rows(), inputs.cols(), |c, r| { + inputs.get_unchecked([r, c]) - centers.data().get_unchecked(c) + }) } #[cfg(test)] mod tests { - use linalg::{Matrix, Axes, Vector}; use super::centering; + use linalg::{Axes, Matrix, Vector}; #[test] fn test_centering() { - let m = Matrix::new(2, 3, vec![1., 2., 3., - 2., 4., 4.]); + let m = Matrix::new(2, 3, vec![1., 2., 3., 2., 4., 4.]); let centers = m.mean(Axes::Row); - assert_vector_eq!(centers, Vector::new(vec![1.5, 3., 3.5]), comp=abs, tol=1e-8); + assert_vector_eq!( + centers, + Vector::new(vec![1.5, 3., 3.5]), + comp = abs, + tol = 1e-8 + ); let centered = unsafe { centering(&m, ¢ers) }; - let exp = Matrix::new(2, 3, vec![-0.5, -1., -0.5, - 0.5, 1., 0.5]); - assert_matrix_eq!(centered, exp, comp=abs, tol=1e-8); + let exp = Matrix::new(2, 3, vec![-0.5, -1., -0.5, 0.5, 1., 0.5]); + assert_matrix_eq!(centered, exp, comp = abs, tol = 1e-8); } -} \ No newline at end of file +} diff --git a/src/learning/svm.rs b/src/learning/svm.rs index 6d5622a3..b4b449d8 100644 --- a/src/learning/svm.rs +++ b/src/learning/svm.rs @@ -32,13 +32,12 @@ //! assert!(output[0] == 1f64, "Our classifier isn't very good!"); //! ``` - -use linalg::{Matrix, BaseMatrix}; use linalg::Vector; +use linalg::{BaseMatrix, Matrix}; +use learning::error::{Error, ErrorKind}; use learning::toolkit::kernel::{Kernel, SquaredExp}; use learning::{LearningResult, SupModel}; -use learning::error::{Error, ErrorKind}; use rand; use rand::Rng; @@ -104,8 +103,10 @@ impl SVM { /// Construct a kernel matrix fn ker_mat(&self, m1: &Matrix, m2: &Matrix) -> LearningResult> { if m1.cols() != m2.cols() { - Err(Error::new(ErrorKind::InvalidState, - "Inputs to kernel matrices have different column counts.")) + Err(Error::new( + ErrorKind::InvalidState, + "Inputs to kernel matrices have different column counts.", + )) } else { let dim1 = m1.rows(); let dim2 = m2.rows(); @@ -129,7 +130,8 @@ impl SupModel, Vector> for SVM { let full_inputs = ones.hcat(inputs); if let (&Some(ref alpha), &Some(ref train_inputs), &Some(ref train_targets)) = - (&self.alpha, &self.train_inputs, &self.train_targets) { + (&self.alpha, &self.train_inputs, &self.train_targets) + { let ker_mat = try!(self.ker_mat(&full_inputs, train_inputs)); let weight_vec = alpha.elemul(train_targets) / self.lambda; @@ -154,9 +156,9 @@ impl SupModel, Vector> for SVM { for t in 0..self.optim_iters { let i = rng.gen_range(0, n); let row_i = full_inputs.select_rows(&[i]); - let sum = full_inputs.row_iter() - .fold(0f64, |sum, row| sum + self.ker.kernel(row_i.data(), row.raw_slice())) * - targets[i] / (self.lambda * (t as f64)); + let sum = full_inputs.row_iter().fold(0f64, |sum, row| { + sum + self.ker.kernel(row_i.data(), row.raw_slice()) + }) * targets[i] / (self.lambda * (t as f64)); if sum < 1f64 { alpha[i] += 1f64; diff --git a/src/learning/toolkit/activ_fn.rs b/src/learning/toolkit/activ_fn.rs index 8529898e..cda393f5 100644 --- a/src/learning/toolkit/activ_fn.rs +++ b/src/learning/toolkit/activ_fn.rs @@ -109,14 +109,14 @@ impl ActivationFunc for Tanh { fn func_grad(x: f64) -> f64 { let y = x.tanh(); - 1.0 - y*y + 1.0 - y * y } fn func_grad_from_output(y: f64) -> f64 { - 1.0 - y*y + 1.0 - y * y } fn func_inv(x: f64) -> f64 { - 0.5*((1.0+x)/(1.0-x)).ln() + 0.5 * ((1.0 + x) / (1.0 - x)).ln() } -} \ No newline at end of file +} diff --git a/src/learning/toolkit/cost_fn.rs b/src/learning/toolkit/cost_fn.rs index b59328fa..64adbe6c 100644 --- a/src/learning/toolkit/cost_fn.rs +++ b/src/learning/toolkit/cost_fn.rs @@ -8,8 +8,8 @@ //! You can also create your own custom cost functions for use in your models. //! Just create a struct implementing the `CostFunc` trait. -use linalg::{Matrix, BaseMatrix, BaseMatrixMut}; use linalg::Vector; +use linalg::{BaseMatrix, BaseMatrixMut, Matrix}; /// Trait for cost functions in models. pub trait CostFunc { diff --git a/src/learning/toolkit/kernel.rs b/src/learning/toolkit/kernel.rs index c13e543d..9e35adaf 100644 --- a/src/learning/toolkit/kernel.rs +++ b/src/learning/toolkit/kernel.rs @@ -4,8 +4,8 @@ use std::ops::{Add, Mul}; +use linalg::norm::{Euclidean, VectorMetric, VectorNorm}; use linalg::Vector; -use linalg::norm::{Euclidean, VectorNorm, VectorMetric}; use rulinalg::utils; /// The Kernel trait @@ -42,8 +42,9 @@ pub trait Kernel { /// ``` #[derive(Debug)] pub struct KernelSum - where T: Kernel, - U: Kernel +where + T: Kernel, + U: Kernel, { k1: T, k2: U, @@ -51,8 +52,9 @@ pub struct KernelSum /// Computes the sum of the two associated kernels. impl Kernel for KernelSum - where T: Kernel, - U: Kernel +where + T: Kernel, + U: Kernel, { fn kernel(&self, x1: &[f64], x2: &[f64]) -> f64 { self.k1.kernel(x1, x2) + self.k2.kernel(x1, x2) @@ -83,8 +85,9 @@ impl Kernel for KernelSum /// ``` #[derive(Debug)] pub struct KernelProd - where T: Kernel, - U: Kernel +where + T: Kernel, + U: Kernel, { k1: T, k2: U, @@ -92,8 +95,9 @@ pub struct KernelProd /// Computes the product of the two associated kernels. impl Kernel for KernelProd - where T: Kernel, - U: Kernel +where + T: Kernel, + U: Kernel, { fn kernel(&self, x1: &[f64], x2: &[f64]) -> f64 { self.k1.kernel(x1, x2) * self.k2.kernel(x1, x2) @@ -259,10 +263,7 @@ impl SquaredExp { /// println!("{0}", ker.kernel(&[1.,2.,3.], &[3.,4.,5.])); /// ``` pub fn new(ls: f64, ampl: f64) -> SquaredExp { - SquaredExp { - ls: ls, - ampl: ampl, - } + SquaredExp { ls: ls, ampl: ampl } } } @@ -321,10 +322,7 @@ impl Exponential { /// println!("{0}", ker.kernel(&[1.,2.,3.], &[3.,4.,5.])); /// ``` pub fn new(ls: f64, ampl: f64) -> Exponential { - Exponential { - ls: ls, - ampl: ampl, - } + Exponential { ls: ls, ampl: ampl } } } @@ -381,10 +379,7 @@ impl HyperTan { /// println!("{0}", ker.kernel(&[1.,2.,3.], &[3.,4.,5.])); /// ``` pub fn new(alpha: f64, c: f64) -> HyperTan { - HyperTan { - alpha: alpha, - c: c, - } + HyperTan { alpha: alpha, c: c } } } diff --git a/src/learning/toolkit/rand_utils.rs b/src/learning/toolkit/rand_utils.rs index 0145168b..528d41e4 100644 --- a/src/learning/toolkit/rand_utils.rs +++ b/src/learning/toolkit/rand_utils.rs @@ -3,7 +3,7 @@ //! This module provides sampling and shuffling which are used //! within the learning modules. -use rand::{Rng, thread_rng}; +use rand::{thread_rng, Rng}; /// ``` /// use rusty_machine::learning::toolkit::rand_utils; @@ -14,8 +14,10 @@ use rand::{Rng, thread_rng}; /// println!("{:?}", sample); /// ``` pub fn reservoir_sample(pool: &[T], reservoir_size: usize) -> Vec { - assert!(pool.len() >= reservoir_size, - "Sample size is greater than total."); + assert!( + pool.len() >= reservoir_size, + "Sample size is greater than total." + ); let mut pool_mut = &pool[..]; diff --git a/src/learning/toolkit/regularization.rs b/src/learning/toolkit/regularization.rs index 395d36bd..77634df0 100644 --- a/src/learning/toolkit/regularization.rs +++ b/src/learning/toolkit/regularization.rs @@ -14,9 +14,9 @@ //! let reg = Regularization::L1(0.5); //! ``` +use libnum::{Float, FromPrimitive}; use linalg::norm::{Euclidean, Lp, MatrixNorm}; -use linalg::{Matrix, MatrixSlice, BaseMatrix}; -use libnum::{FromPrimitive, Float}; +use linalg::{BaseMatrix, Matrix, MatrixSlice}; /// Model Regularization #[derive(Debug, Clone, Copy)] @@ -76,7 +76,8 @@ impl Regularization { } fn l2_reg_cost(mat: &MatrixSlice, x: T) -> T { - Euclidean.norm(mat) * x / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) + Euclidean.norm(mat) * x + / ((T::one() + T::one()) * FromPrimitive::from_usize(mat.rows()).unwrap()) } fn l2_reg_grad(mat: &MatrixSlice, x: T) -> Matrix { @@ -87,8 +88,8 @@ impl Regularization { #[cfg(test)] mod tests { use super::Regularization; - use linalg::{Matrix, BaseMatrix}; use linalg::norm::{Euclidean, MatrixNorm}; + use linalg::{BaseMatrix, Matrix}; #[test] fn test_no_reg() { @@ -156,15 +157,17 @@ mod tests { assert!(a - ((Euclidean.norm(&input_mat) / 24f64) + (42f64 / 12f64)) < 1e-18); - let l1_true_grad = Matrix::new(3, 4, + let l1_true_grad = Matrix::new( + 3, + 4, vec![-1., -1., -1., 1., 1., 1., 1., 1., 1., 1., 1., 1.] - .into_iter() - .map(|x| x / 12f64) - .collect::>()); + .into_iter() + .map(|x| x / 12f64) + .collect::>(), + ); let l2_true_grad = &input_mat / 12f64; - for eps in (b - l1_true_grad - l2_true_grad) - .into_vec() { + for eps in (b - l1_true_grad - l2_true_grad).into_vec() { // Slightly lower boundary than others - more numerical error as more ops. assert!(eps < 1e-12); } diff --git a/src/lib.rs b/src/lib.rs index e117551b..eb5a0832 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,9 +118,11 @@ pub mod prelude; /// /// This module contains reexports of common tools from the rulinalg crate. pub mod linalg { - pub use rulinalg::matrix::{Axes, Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut}; - pub use rulinalg::vector::Vector; + pub use rulinalg::matrix::{ + Axes, BaseMatrix, BaseMatrixMut, Matrix, MatrixSlice, MatrixSliceMut, + }; pub use rulinalg::norm; + pub use rulinalg::vector::Vector; } /// Module for data handling @@ -133,15 +135,15 @@ pub mod learning { pub mod dbscan; pub mod glm; pub mod gmm; + pub mod gp; + pub mod k_means; + pub mod knn; pub mod lin_reg; pub mod logistic_reg; - pub mod k_means; - pub mod nnet; - pub mod gp; - pub mod svm; pub mod naive_bayes; - pub mod knn; + pub mod nnet; pub mod pca; + pub mod svm; pub mod error; @@ -177,11 +179,12 @@ pub mod learning { type Targets; /// Compute the gradient for the model. - fn compute_grad(&self, - params: &[f64], - inputs: &Self::Inputs, - targets: &Self::Targets) - -> (f64, Vec); + fn compute_grad( + &self, + params: &[f64], + inputs: &Self::Inputs, + targets: &Self::Targets, + ) -> (f64, Vec); } /// Trait for optimization algorithms. @@ -189,16 +192,17 @@ pub mod learning { /// Return the optimized parameter using gradient optimization. /// /// Takes in a set of starting parameters and related model data. - fn optimize(&self, - model: &M, - start: &[f64], - inputs: &M::Inputs, - targets: &M::Targets) - -> Vec; + fn optimize( + &self, + model: &M, + start: &[f64], + inputs: &M::Inputs, + targets: &M::Targets, + ) -> Vec; } - pub mod grad_desc; pub mod fmincg; + pub mod grad_desc; } /// Module for learning tools. diff --git a/src/prelude.rs b/src/prelude.rs index b5a7df09..b970258c 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -2,9 +2,9 @@ //! //! This module alleviates some common imports used within rusty-machine. -pub use linalg::{Matrix, MatrixSlice, MatrixSliceMut, BaseMatrix, BaseMatrixMut}; -pub use linalg::Vector; pub use linalg::Axes; +pub use linalg::Vector; +pub use linalg::{BaseMatrix, BaseMatrixMut, Matrix, MatrixSlice, MatrixSliceMut}; pub use learning::SupModel; pub use learning::UnSupModel; @@ -15,6 +15,6 @@ mod tests { #[test] fn create_mat_from_prelude() { - let _ = Matrix::new(2, 2, vec![4.0;4]); + let _ = Matrix::new(2, 2, vec![4.0; 4]); } } diff --git a/src/stats/dist/exponential.rs b/src/stats/dist/exponential.rs index 03a6ff77..542149a8 100644 --- a/src/stats/dist/exponential.rs +++ b/src/stats/dist/exponential.rs @@ -4,10 +4,10 @@ //! found in the rand crate. This is provided through //! traits added within the containing stats module. -use stats::dist::Distribution; -use rand::Rng; -use rand::distributions::{Sample, IndependentSample}; use rand::distributions::exponential::Exp1; +use rand::distributions::{IndependentSample, Sample}; +use rand::Rng; +use stats::dist::Distribution; /// An Exponential random variable. #[derive(Debug, Clone, Copy)] @@ -76,8 +76,10 @@ impl Distribution for Exponential { /// assert!((log_pdf - exp.lambda().ln() + exp.lambda() * 3f64).abs() < 1e-20); /// ``` fn logpdf(&self, x: f64) -> f64 { - assert!(x >= 0., - "Input to log pdf must be positive for exponential."); + assert!( + x >= 0., + "Input to log pdf must be positive for exponential." + ); self.lambda.ln() - (x * self.lambda) } diff --git a/src/stats/dist/gaussian.rs b/src/stats/dist/gaussian.rs index 8ba7f2fe..a384f887 100644 --- a/src/stats/dist/gaussian.rs +++ b/src/stats/dist/gaussian.rs @@ -4,11 +4,11 @@ //! found in the rand crate. This is provided through //! traits added within the containing stats module. -use stats::dist::Distribution; -use rand::Rng; -use rand::distributions::{Sample, IndependentSample}; -use rand::distributions::normal::StandardNormal; use super::consts as stat_consts; +use rand::distributions::normal::StandardNormal; +use rand::distributions::{IndependentSample, Sample}; +use rand::Rng; +use stats::dist::Distribution; use std::f64::consts as float_consts; /// A Gaussian random variable. @@ -86,8 +86,8 @@ impl Distribution for Gaussian { /// assert!((lpdf_zero - (1f64/consts::SQRT_2_PI).abs()) < 1e-20); /// ``` fn pdf(&self, x: f64) -> f64 { - (-(x - self.mean) * (x - self.mean) / (2.0 * self.variance)).exp() / - (stat_consts::SQRT_2_PI * self._std_dev) + (-(x - self.mean) * (x - self.mean) / (2.0 * self.variance)).exp() + / (stat_consts::SQRT_2_PI * self._std_dev) } /// The log pdf of the normal distribution. @@ -107,8 +107,9 @@ impl Distribution for Gaussian { /// assert!((lpdf_zero + 0.5*consts::LN_2_PI).abs() < 1e-20); /// ``` fn logpdf(&self, x: f64) -> f64 { - -self._std_dev.ln() - (stat_consts::LN_2_PI / 2.0) - - ((x - self.mean) * (x - self.mean) / (2.0 * self.variance)) + -self._std_dev.ln() + - (stat_consts::LN_2_PI / 2.0) + - ((x - self.mean) * (x - self.mean) / (2.0 * self.variance)) } /// Rough estimate for the cdf of the gaussian distribution. @@ -138,12 +139,11 @@ impl Distribution for Gaussian { /// assert!((0.5*(1f64 - 0.382924922548) - cdf).abs() < 0.004); /// ``` fn cdf(&self, x: f64) -> f64 { - 0.5 * - (1f64 + - (x - self.mean).signum() * - (1f64 - - (-float_consts::FRAC_2_PI * (x - self.mean) * (x - self.mean) / self.variance).exp()) - .sqrt()) + 0.5 * (1f64 + (x - self.mean).signum() + * (1f64 - (-float_consts::FRAC_2_PI * (x - self.mean) * (x - self.mean) + / self.variance) + .exp()) + .sqrt()) } } diff --git a/src/stats/dist/mod.rs b/src/stats/dist/mod.rs index 57d8ab1e..587dadc9 100644 --- a/src/stats/dist/mod.rs +++ b/src/stats/dist/mod.rs @@ -1,8 +1,8 @@ pub mod exponential; pub mod gaussian; -pub use self::gaussian::Gaussian; pub use self::exponential::Exponential; +pub use self::gaussian::Gaussian; /// Statistical constants /// diff --git a/tests/learning/dbscan.rs b/tests/learning/dbscan.rs index 5761e193..e37f366a 100644 --- a/tests/learning/dbscan.rs +++ b/tests/learning/dbscan.rs @@ -5,12 +5,11 @@ use rm::learning::UnSupModel; #[test] fn test_basic_clusters() { - let inputs = Matrix::new(6, 2, vec![1.0, 2.0, - 1.1, 2.2, - 0.9, 1.9, - 1.0, 2.1, - -2.0, 3.0, - -2.2, 3.1]); + let inputs = Matrix::new( + 6, + 2, + vec![1.0, 2.0, 1.1, 2.2, 0.9, 1.9, 1.0, 2.1, -2.0, 3.0, -2.2, 3.1], + ); let mut model = DBSCAN::new(0.5, 2); model.train(&inputs).unwrap(); @@ -21,21 +20,19 @@ fn test_basic_clusters() { assert!(clustering.data().iter().skip(4).all(|x| *x == Some(1))); } - #[test] fn test_basic_prediction() { - let inputs = Matrix::new(6, 2, vec![1.0, 2.0, - 1.1, 2.2, - 0.9, 1.9, - 1.0, 2.1, - -2.0, 3.0, - -2.2, 3.1]); + let inputs = Matrix::new( + 6, + 2, + vec![1.0, 2.0, 1.1, 2.2, 0.9, 1.9, 1.0, 2.1, -2.0, 3.0, -2.2, 3.1], + ); let mut model = DBSCAN::new(0.5, 2); model.set_predictive(true); model.train(&inputs).unwrap(); - let new_points = Matrix::new(2,2, vec![1.0, 2.0, 4.0, 4.0]); + let new_points = Matrix::new(2, 2, vec![1.0, 2.0, 4.0, 4.0]); let classes = model.predict(&new_points).unwrap(); assert!(classes[0] == Some(0)); diff --git a/tests/learning/gp.rs b/tests/learning/gp.rs index 34c92993..85da37ac 100644 --- a/tests/learning/gp.rs +++ b/tests/learning/gp.rs @@ -1,19 +1,19 @@ +use rm::learning::gp::GaussianProcess; +use rm::learning::SupModel; use rm::linalg::Matrix; use rm::linalg::Vector; -use rm::learning::SupModel; -use rm::learning::gp::GaussianProcess; #[test] fn test_default_gp() { - let mut gp = GaussianProcess::default(); - gp.noise = 10f64; + let mut gp = GaussianProcess::default(); + gp.noise = 10f64; - let inputs = Matrix::new(10,1,vec![0.,1.,2.,3.,4.,5.,6.,7.,8.,9.]); - let targets = Vector::new(vec![0.,1.,2.,3.,4.,4.,3.,2.,1.,0.]); + let inputs = Matrix::new(10, 1, vec![0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]); + let targets = Vector::new(vec![0., 1., 2., 3., 4., 4., 3., 2., 1., 0.]); - gp.train(&inputs, &targets).unwrap(); + gp.train(&inputs, &targets).unwrap(); - let test_inputs = Matrix::new(5,1,vec![2.3,4.4,5.1,6.2,7.1]); + let test_inputs = Matrix::new(5, 1, vec![2.3, 4.4, 5.1, 6.2, 7.1]); - let _outputs = gp.predict(&test_inputs).unwrap(); + let _outputs = gp.predict(&test_inputs).unwrap(); } diff --git a/tests/learning/k_means.rs b/tests/learning/k_means.rs index 168f2597..3e18b05c 100644 --- a/tests/learning/k_means.rs +++ b/tests/learning/k_means.rs @@ -1,13 +1,13 @@ -use rm::linalg::Matrix; -use rm::learning::UnSupModel; use rm::learning::k_means::KMeansClassifier; -use rm::learning::k_means::{Forgy, RandomPartition, KPlusPlus}; +use rm::learning::k_means::{Forgy, KPlusPlus, RandomPartition}; +use rm::learning::UnSupModel; +use rm::linalg::Matrix; #[test] fn test_model_default() { let mut model = KMeansClassifier::::new(3); let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); - let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); + let targets = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); model.train(&inputs).unwrap(); @@ -20,7 +20,7 @@ fn test_model_default() { fn test_model_iter() { let mut model = KMeansClassifier::::new(3); let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); - let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); + let targets = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); model.set_iters(1000); model.train(&inputs).unwrap(); @@ -34,7 +34,7 @@ fn test_model_iter() { fn test_model_forgy() { let mut model = KMeansClassifier::new_specified(3, 100, Forgy); let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); - let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); + let targets = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); model.train(&inputs).unwrap(); @@ -47,7 +47,7 @@ fn test_model_forgy() { fn test_model_ran_partition() { let mut model = KMeansClassifier::new_specified(3, 100, RandomPartition); let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); - let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); + let targets = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); model.train(&inputs).unwrap(); @@ -60,7 +60,7 @@ fn test_model_ran_partition() { fn test_model_kplusplus() { let mut model = KMeansClassifier::new_specified(3, 100, KPlusPlus); let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); - let targets = Matrix::new(3,2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); + let targets = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); model.train(&inputs).unwrap(); @@ -76,18 +76,19 @@ fn test_no_train_predict() { let inputs = Matrix::new(3, 2, vec![1.0, 2.0, 1.0, 3.0, 1.0, 4.0]); model.predict(&inputs).unwrap(); - } #[test] fn test_two_centroids() { let mut model = KMeansClassifier::new(2); - let inputs = Matrix::new(6, 2, vec![59.59375, 270.6875, - 51.59375, 307.6875, - 86.59375, 286.6875, - 319.59375, 145.6875, - 314.59375, 174.6875, - 350.59375, 161.6875]); + let inputs = Matrix::new( + 6, + 2, + vec![ + 59.59375, 270.6875, 51.59375, 307.6875, 86.59375, 286.6875, 319.59375, 145.6875, + 314.59375, 174.6875, 350.59375, 161.6875, + ], + ); model.train(&inputs).unwrap(); diff --git a/tests/learning/knn.rs b/tests/learning/knn.rs index beafa5e1..10d61298 100644 --- a/tests/learning/knn.rs +++ b/tests/learning/knn.rs @@ -1,6 +1,6 @@ -use rm::linalg::{Matrix, Vector}; -use rm::learning::SupModel; use rm::learning::knn::KNNClassifier; +use rm::learning::SupModel; +use rm::linalg::{Matrix, Vector}; #[test] fn test_knn() { @@ -44,10 +44,10 @@ fn test_knn_long() { #[cfg(feature = "datasets")] pub mod tests_datasets { - use rm::linalg::{BaseMatrix, Vector}; - use rm::learning::SupModel; - use rm::learning::knn::{KNNClassifier, KDTree, BallTree, BruteForce}; use rm::datasets::iris; + use rm::learning::knn::{BallTree, BruteForce, KDTree, KNNClassifier}; + use rm::learning::SupModel; + use rm::linalg::{BaseMatrix, Vector}; #[test] fn test_knn_iris_2cols() { @@ -84,14 +84,14 @@ pub mod tests_datasets { let _ = knn.train(&dataset.data(), &dataset.target()).unwrap(); let res = knn.predict(&dataset.data()).unwrap(); - let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]; + let exp = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, + ]; assert_eq!(res, Vector::new(exp)); } @@ -103,28 +103,28 @@ pub mod tests_datasets { let _ = knn.train(&dataset.data(), &dataset.target()).unwrap(); let res = knn.predict(&dataset.data()).unwrap(); - let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]; + let exp = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, + ]; assert_eq!(res, Vector::new(exp)); let mut knn = KNNClassifier::new(10); let _ = knn.train(&dataset.data(), &dataset.target()).unwrap(); let res = knn.predict(&dataset.data()).unwrap(); - let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]; + let exp = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, + ]; assert_eq!(res, Vector::new(exp)); } @@ -136,14 +136,14 @@ pub mod tests_datasets { let _ = knn.train(&dataset.data(), &dataset.target()).unwrap(); let res = knn.predict(&dataset.data()).unwrap(); - let exp = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]; + let exp = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, + ]; let expv = Vector::new(exp); assert_eq!(res, expv); @@ -157,4 +157,4 @@ pub mod tests_datasets { let res = knn.predict(&dataset.data()).unwrap(); assert_eq!(res, expv); } -} \ No newline at end of file +} diff --git a/tests/learning/lin_reg.rs b/tests/learning/lin_reg.rs index 1a00469c..16817c87 100644 --- a/tests/learning/lin_reg.rs +++ b/tests/learning/lin_reg.rs @@ -1,8 +1,8 @@ +use libnum::abs; +use rm::learning::lin_reg::LinRegressor; +use rm::learning::SupModel; use rm::linalg::Matrix; use rm::linalg::Vector; -use rm::learning::SupModel; -use rm::learning::lin_reg::LinRegressor; -use libnum::abs; #[test] fn test_optimized_regression() { @@ -58,17 +58,49 @@ fn test_regression_datasets_trees() { let mut lin_mod = LinRegressor::default(); lin_mod.train(&trees.data(), &trees.target()).unwrap(); let params = lin_mod.parameters().unwrap(); - assert_eq!(params, &Vector::new(vec![-57.98765891838409, 4.708160503017506, 0.3392512342447438])); + assert_eq!( + params, + &Vector::new(vec![ + -57.98765891838409, + 4.708160503017506, + 0.3392512342447438, + ]) + ); let predicted = lin_mod.predict(&trees.data()).unwrap(); - let expected = vec![4.837659653793278, 4.55385163347481, 4.816981265588826, 15.874115228921276, - 19.869008437727473, 21.018326956518717, 16.192688074961563, 19.245949183164257, - 21.413021404689726, 20.187581283767756, 22.015402271048487, 21.468464618616007, - 21.468464618616007, 20.50615412980805, 23.954109686181766, 27.852202904652785, - 31.583966481344966, 33.806481916796706, 30.60097760433255, 28.697035014921106, - 34.388184394951004, 36.008318964043994, 35.38525970948079, 41.76899799551756, - 44.87770231764652, 50.942867757643015, 52.223751092491256, 53.42851282520877, - 53.899328875510534, 53.899328875510534, 68.51530482306926]; + let expected = vec![ + 4.837659653793278, + 4.55385163347481, + 4.816981265588826, + 15.874115228921276, + 19.869008437727473, + 21.018326956518717, + 16.192688074961563, + 19.245949183164257, + 21.413021404689726, + 20.187581283767756, + 22.015402271048487, + 21.468464618616007, + 21.468464618616007, + 20.50615412980805, + 23.954109686181766, + 27.852202904652785, + 31.583966481344966, + 33.806481916796706, + 30.60097760433255, + 28.697035014921106, + 34.388184394951004, + 36.008318964043994, + 35.38525970948079, + 41.76899799551756, + 44.87770231764652, + 50.942867757643015, + 52.223751092491256, + 53.42851282520877, + 53.899328875510534, + 53.899328875510534, + 68.51530482306926, + ]; assert_eq!(predicted, Vector::new(expected)); } diff --git a/tests/learning/optim/grad_desc.rs b/tests/learning/optim/grad_desc.rs index 9dd1281a..a1290e97 100644 --- a/tests/learning/optim/grad_desc.rs +++ b/tests/learning/optim/grad_desc.rs @@ -1,7 +1,7 @@ -use rm::learning::optim::Optimizable; use rm::learning::optim::fmincg::ConjugateGD; -use rm::learning::optim::grad_desc::{GradientDesc, StochasticGD, AdaGrad, RMSProp}; +use rm::learning::optim::grad_desc::{AdaGrad, GradientDesc, RMSProp, StochasticGD}; use rm::learning::optim::OptimAlgorithm; +use rm::learning::optim::Optimizable; use rm::linalg::Matrix; @@ -15,12 +15,13 @@ struct XSqModel { impl Optimizable for XSqModel { type Inputs = Matrix; - type Targets = Matrix; + type Targets = Matrix; fn compute_grad(&self, params: &[f64], _: &Matrix, _: &Matrix) -> (f64, Vec) { - - ((params[0] - self.c) * (params[0] - self.c), - vec![2f64 * (params[0] - self.c)]) + ( + (params[0] - self.c) * (params[0] - self.c), + vec![2f64 * (params[0] - self.c)], + ) } } @@ -30,13 +31,18 @@ fn convex_fmincg_training() { let cgd = ConjugateGD::default(); let test_data = vec![500f64]; - let params = cgd.optimize(&x_sq, - &test_data[..], - &Matrix::zeros(1, 1), - &Matrix::zeros(1, 1)); + let params = cgd.optimize( + &x_sq, + &test_data[..], + &Matrix::zeros(1, 1), + &Matrix::zeros(1, 1), + ); assert!(params[0] - 20f64 < 1e-10); - assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); + assert!( + x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)) + .0 < 1e-10 + ); } #[test] @@ -45,13 +51,18 @@ fn convex_gd_training() { let gd = GradientDesc::default(); let test_data = vec![500f64]; - let params = gd.optimize(&x_sq, - &test_data[..], - &Matrix::zeros(1, 1), - &Matrix::zeros(1, 1)); + let params = gd.optimize( + &x_sq, + &test_data[..], + &Matrix::zeros(1, 1), + &Matrix::zeros(1, 1), + ); assert!(params[0] - 20f64 < 1e-10); - assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); + assert!( + x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)) + .0 < 1e-10 + ); } #[test] @@ -60,13 +71,18 @@ fn convex_stochastic_gd_training() { let gd = StochasticGD::new(0.9f64, 0.1f64, 100); let test_data = vec![100f64]; - let params = gd.optimize(&x_sq, - &test_data[..], - &Matrix::zeros(100, 1), - &Matrix::zeros(100, 1)); + let params = gd.optimize( + &x_sq, + &test_data[..], + &Matrix::zeros(100, 1), + &Matrix::zeros(100, 1), + ); assert!(params[0] - 20f64 < 1e-10); - assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); + assert!( + x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)) + .0 < 1e-10 + ); } #[test] @@ -75,26 +91,36 @@ fn convex_adagrad_training() { let gd = AdaGrad::new(5f64, 1f64, 100); let test_data = vec![100f64]; - let params = gd.optimize(&x_sq, - &test_data[..], - &Matrix::zeros(100, 1), - &Matrix::zeros(100, 1)); + let params = gd.optimize( + &x_sq, + &test_data[..], + &Matrix::zeros(100, 1), + &Matrix::zeros(100, 1), + ); assert!(params[0] - 20f64 < 1e-10); - assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); + assert!( + x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)) + .0 < 1e-10 + ); } #[test] fn convex_rmsprop_training() { - let x_sq = XSqModel { c: 20f64 }; - - let rms = RMSProp::new(0.05, 0.9, 1e-5, 50); - let test_data = vec![100f64]; - let params = rms.optimize(&x_sq, - &test_data[..], - &Matrix::zeros(100, 1), - &Matrix::zeros(100, 1)); - - assert!(params[0] - 20f64 < 1e-10); - assert!(x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)).0 < 1e-10); -} \ No newline at end of file + let x_sq = XSqModel { c: 20f64 }; + + let rms = RMSProp::new(0.05, 0.9, 1e-5, 50); + let test_data = vec![100f64]; + let params = rms.optimize( + &x_sq, + &test_data[..], + &Matrix::zeros(100, 1), + &Matrix::zeros(100, 1), + ); + + assert!(params[0] - 20f64 < 1e-10); + assert!( + x_sq.compute_grad(¶ms, &Matrix::zeros(1, 1), &Matrix::zeros(1, 1)) + .0 < 1e-10 + ); +} diff --git a/tests/learning/pca.rs b/tests/learning/pca.rs index f603a768..8ab52931 100644 --- a/tests/learning/pca.rs +++ b/tests/learning/pca.rs @@ -1,96 +1,141 @@ -use rm::linalg::Matrix; -use rm::learning::UnSupModel; use rm::learning::pca::PCA; +use rm::learning::UnSupModel; +use rm::linalg::Matrix; #[test] fn test_default() { let mut model = PCA::default(); - let inputs = Matrix::new(7, 3, vec![8.3, 50., 23., - 10.2, 55., 21., - 11.1, 57., 22., - 12.5, 60., 15., - 11.3, 59., 20., - 12.4, 61., 11., - 11.2, 58., 23.]); + let inputs = Matrix::new( + 7, + 3, + vec![ + 8.3, 50., 23., 10.2, 55., 21., 11.1, 57., 22., 12.5, 60., 15., 11.3, 59., 20., 12.4, + 61., 11., 11.2, 58., 23., + ], + ); model.train(&inputs).unwrap(); - let cexp = Matrix::new(3, 3, vec![0.2304196717022202, 0.2504639278931734, -0.9403055863478447, - 0.5897383434061588, 0.7326863014098074, 0.3396755364211204, - -0.7740254913174374, 0.6328021843757651, -0.021117155112842168]); + let cexp = Matrix::new( + 3, + 3, + vec![ + 0.2304196717022202, + 0.2504639278931734, + -0.9403055863478447, + 0.5897383434061588, + 0.7326863014098074, + 0.3396755364211204, + -0.7740254913174374, + 0.6328021843757651, + -0.021117155112842168, + ], + ); let cmp = model.components().unwrap(); - assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8); + assert_matrix_eq!(cmp, cexp, comp = abs, tol = 1e-8); let new_data = Matrix::new(1, 3, vec![9., 45., 22.]); let outputs = model.predict(&new_data).unwrap(); - let exp = Matrix::new(1, 3, vec![-9.72287413262656, -7.680227015314077, -2.301338333438487]); - assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8); + let exp = Matrix::new( + 1, + 3, + vec![-9.72287413262656, -7.680227015314077, -2.301338333438487], + ); + assert_matrix_eq!(outputs, exp, comp = abs, tol = 1e-8); } #[test] fn test_not_centering() { let mut model = PCA::new(3, false); - let inputs = Matrix::new(7, 3, vec![8.3, 50., 23., - 10.2, 55., 21., - 11.1, 57., 22., - 12.5, 60., 15., - 11.3, 59., 20., - 12.4, 61., 11., - 11.2, 58., 23.]); + let inputs = Matrix::new( + 7, + 3, + vec![ + 8.3, 50., 23., 10.2, 55., 21., 11.1, 57., 22., 12.5, 60., 15., 11.3, 59., 20., 12.4, + 61., 11., 11.2, 58., 23., + ], + ); model.train(&inputs).unwrap(); - let cexp = Matrix::new(3, 3, vec![0.17994480617740657, -0.16908609066166264, 0.9690354795746806, - 0.9326216647416523, -0.2839205184846983, -0.2227239763426676, - 0.3127885822473139, 0.9438215049087068, 0.10660332868901998]); + let cexp = Matrix::new( + 3, + 3, + vec![ + 0.17994480617740657, + -0.16908609066166264, + 0.9690354795746806, + 0.9326216647416523, + -0.2839205184846983, + -0.2227239763426676, + 0.3127885822473139, + 0.9438215049087068, + 0.10660332868901998, + ], + ); let cmp = model.components().unwrap(); - assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8); + assert_matrix_eq!(cmp, cexp, comp = abs, tol = 1e-8); let new_data = Matrix::new(1, 3, vec![9., 45., 22.]); let outputs = model.predict(&new_data).unwrap(); - let exp = Matrix::new(1, 3, vec![50.468826978411926, 6.465874960225161, 1.0440136119105228]); - assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8); + let exp = Matrix::new( + 1, + 3, + vec![50.468826978411926, 6.465874960225161, 1.0440136119105228], + ); + assert_matrix_eq!(outputs, exp, comp = abs, tol = 1e-8); } #[test] fn test_filter_component() { let mut model = PCA::new(2, false); - let inputs = Matrix::new(7, 3, vec![8.3, 50., 23., - 10.2, 55., 21., - 11.1, 57., 22., - 12.5, 60., 15., - 11.3, 59., 20., - 12.4, 61., 11., - 11.2, 58., 23.]); + let inputs = Matrix::new( + 7, + 3, + vec![ + 8.3, 50., 23., 10.2, 55., 21., 11.1, 57., 22., 12.5, 60., 15., 11.3, 59., 20., 12.4, + 61., 11., 11.2, 58., 23., + ], + ); model.train(&inputs).unwrap(); - let cexp = Matrix::new(3, 2, vec![0.17994480617740657, -0.16908609066166264, - 0.9326216647416523, -0.2839205184846983, - 0.3127885822473139, 0.9438215049087068]); + let cexp = Matrix::new( + 3, + 2, + vec![ + 0.17994480617740657, + -0.16908609066166264, + 0.9326216647416523, + -0.2839205184846983, + 0.3127885822473139, + 0.9438215049087068, + ], + ); let cmp = model.components().unwrap(); - assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8); + assert_matrix_eq!(cmp, cexp, comp = abs, tol = 1e-8); let new_data = Matrix::new(1, 3, vec![9., 45., 22.]); let outputs = model.predict(&new_data).unwrap(); let exp = Matrix::new(1, 2, vec![50.468826978411926, 6.465874960225161]); - assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8); + assert_matrix_eq!(outputs, exp, comp = abs, tol = 1e-8); } #[test] fn test_predict_different_dimension() { let mut model = PCA::new(2, false); - let inputs = Matrix::new(7, 3, vec![8.3, 50., 23., - 10.2, 55., 21., - 11.1, 57., 22., - 12.5, 60., 15., - 11.3, 59., 20., - 12.4, 61., 11., - 11.2, 58., 23.]); + let inputs = Matrix::new( + 7, + 3, + vec![ + 8.3, 50., 23., 10.2, 55., 21., 11.1, 57., 22., 12.5, 60., 15., 11.3, 59., 20., 12.4, + 61., 11., 11.2, 58., 23., + ], + ); model.train(&inputs).unwrap(); let new_data = Matrix::new(1, 2, vec![1., 2.]); @@ -110,18 +155,29 @@ fn test_predict_different_dimension() { fn test_wide() { let mut model = PCA::default(); - let inputs = Matrix::new(2, 4, vec![8.3, 50., 23., 2., - 10.2, 55., 21., 3.]); + let inputs = Matrix::new(2, 4, vec![8.3, 50., 23., 2., 10.2, 55., 21., 3.]); model.train(&inputs).unwrap(); - let cexp = Matrix::new(2, 4, vec![0.3277323746171723, 0.8624536174136117, -0.3449814469654447, 0.17249072348272235, - 0.933710591152088, -0.23345540994181946, 0.23959824886246414, -0.1275765757549414]); + let cexp = Matrix::new( + 2, + 4, + vec![ + 0.3277323746171723, + 0.8624536174136117, + -0.3449814469654447, + 0.17249072348272235, + 0.933710591152088, + -0.23345540994181946, + 0.23959824886246414, + -0.1275765757549414, + ], + ); let cmp = model.components().unwrap(); - assert_matrix_eq!(cmp, cexp, comp=abs, tol=1e-8); + assert_matrix_eq!(cmp, cexp, comp = abs, tol = 1e-8); let new_data = Matrix::new(1, 4, vec![9., 45., 22., 2.5]); let outputs = model.predict(&new_data).unwrap(); let exp = Matrix::new(1, 2, vec![-6.550335224256381, 1.517487926775624]); - assert_matrix_eq!(outputs, exp, comp=abs, tol=1e-8); -} \ No newline at end of file + assert_matrix_eq!(outputs, exp, comp = abs, tol = 1e-8); +} diff --git a/tests/lib.rs b/tests/lib.rs index 6df4f3da..797179ee 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1,18 +1,18 @@ #[macro_use] extern crate rulinalg; -extern crate rusty_machine as rm; extern crate num as libnum; +extern crate rusty_machine as rm; pub mod learning { mod dbscan; - mod lin_reg; - mod k_means; mod gp; + mod k_means; mod knn; + mod lin_reg; mod pca; pub mod optim { - mod grad_desc; + mod grad_desc; } }