diff --git a/Cargo.toml b/Cargo.toml index 2ef44396..bc352753 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,4 @@ stats = [] [dependencies] num = { version = "0.1.35", default-features = false } rand = "0.3.14" -rulinalg = "0.3.3" +rulinalg = "0.3.4" diff --git a/examples/gmm_simulation.rs b/examples/gmm_simulation.rs new file mode 100644 index 00000000..7e686663 --- /dev/null +++ b/examples/gmm_simulation.rs @@ -0,0 +1,81 @@ +extern crate rusty_machine; +extern crate rand; + +use rand::distributions::{Normal, IndependentSample}; +use rand::{Rng, thread_rng}; + +use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; +use rusty_machine::learning::UnSupModel; +use rusty_machine::linalg::Matrix; + +// Simulate some data from gaussian clusters +fn simulate_gmm_1d_data(count: usize, + means: Vec, + vars: Vec, + mixture_weights: Vec) + -> Vec { + assert_eq!(means.len(), vars.len()); + assert_eq!(means.len(), mixture_weights.len()); + + let gmm_count = means.len(); + + let mut gaussians = Vec::with_capacity(gmm_count); + + for i in 0..gmm_count { + // Create a Gaussian with given mean and var + gaussians.push(Normal::new(means[i], vars[i].sqrt())); + } + + let mut rng = thread_rng(); + let mut out_samples = Vec::with_capacity(count); + + for _ in 0..count { + // Pick a gaussian from the mixture weights + let chosen_gaussian = gaussians[pick_gaussian(&mixture_weights, &mut rng)]; + + // Draw sample from it + let sample = chosen_gaussian.ind_sample(&mut rng); + + // Add to data + out_samples.push(sample); + } + + out_samples +} + +// A utility function which chooses an index from some mixture weights +fn pick_gaussian(unnorm_dist: &[f64], rng: &mut R) -> usize { + assert!(unnorm_dist.len() > 0); + let sum = unnorm_dist.iter().fold(0f64, |acc, &x| acc + x); + let rand = rng.gen_range(0.0f64, sum); + + let mut tempsum = 0.0; + for (i, p) in unnorm_dist.iter().enumerate() { + tempsum += *p; + if rand < tempsum { + return i; + } + } + + panic!("No random value was sampled!"); +} + + +fn main() { + let gmm_count = 3; + let count = 1000; + + let means = vec![-3f64, 0., 3.]; + let vars = vec![1f64, 0.5, 0.25]; + let weights = vec![0.5, 0.25, 0.25]; + + let data = simulate_gmm_1d_data(count, means, vars, weights); + + let mut gmm: GaussianMixtureModel = GaussianMixtureModel::new(gmm_count); + gmm.set_max_iters(300); + gmm.train(&Matrix::new(count, 1, data)).unwrap(); + + println!("Means = {:?}", gmm.means()); + println!("Covs = {:?}", gmm.covariances()); + println!("Mix Weights = {:?}", gmm.mixture_weights()); +} \ No newline at end of file diff --git a/src/learning/gmm.rs b/src/learning/gmm.rs index 4c88e9ce..60587f11 100644 --- a/src/learning/gmm.rs +++ b/src/learning/gmm.rs @@ -6,16 +6,15 @@ //! //! ``` //! use rusty_machine::linalg::Matrix; -//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel}; +//! use rusty_machine::learning::gmm::{CovOption, GaussianMixtureModel, Random, CholeskyFull, Diagonal}; //! use rusty_machine::learning::UnSupModel; //! //! let inputs = Matrix::new(4, 2, vec![1.0, 2.0, -3.0, -3.0, 0.1, 1.5, -5.0, -2.5]); //! let test_inputs = Matrix::new(3, 2, vec![1.0, 2.0, 3.0, 2.9, -4.4, -2.5]); //! //! // Create gmm with k(=2) classes. -//! let mut model = GaussianMixtureModel::new(2); +//! let mut model: GaussianMixtureModel = GaussianMixtureModel::new(2); //! model.set_max_iters(10); -//! model.cov_option = CovOption::Diagonal; //! //! // Where inputs is a Matrix with features in columns. //! model.train(&inputs).unwrap(); @@ -30,75 +29,82 @@ //! // Probabilities that each point comes from each Gaussian. //! println!("{:?}", post_probs.data()); //! ``` -use linalg::{Matrix, MatrixSlice, Vector, BaseMatrix, BaseMatrixMut, Axes}; -use rulinalg::utils; +extern crate rand; + +use linalg::{Matrix, Vector, BaseMatrix, BaseMatrixMut, Axes}; use learning::{LearningResult, UnSupModel}; -use learning::toolkit::rand_utils; use learning::error::{Error, ErrorKind}; -/// Covariance options for GMMs. -/// -/// - Full : The full covariance structure. -/// - Regularized : Adds a regularization constant to the covariance diagonal. -/// - Diagonal : Only the diagonal covariance structure. -#[derive(Clone, Copy, Debug)] -pub enum CovOption { - /// The full covariance structure. - Full, - /// Adds a regularization constant to the covariance diagonal. - Regularized(f64), - /// Only the diagonal covariance structure. - Diagonal, -} +use std::f64::consts::PI; +use std::f64::EPSILON; +use std::mem; +use std::marker::PhantomData; +const CONVERGENCE_EPS: f64 = 1.0e-15; /// A Gaussian Mixture Model #[derive(Debug)] -pub struct GaussianMixtureModel { +pub struct GaussianMixtureModel> { comp_count: usize, + // [n_features] mix_weights: Vector, + // [n_components, n_features] model_means: Option>, + // n_components elements: [n_features, n_features] model_covars: Option>>, + // n_components elements: [n_features, n_features] + precisions: Option>>, log_lik: f64, max_iters: usize, - /// The covariance options for the GMM. - pub cov_option: CovOption, + phantom: PhantomData, + /// Struct that calculates covariance and gaussian parameters + pub cov_option: C, } -impl UnSupModel, Matrix> for GaussianMixtureModel { +impl UnSupModel, Matrix> for GaussianMixtureModel + where T: Initializer, + C: CovOption + Default +{ /// Train the model using inputs. fn train(&mut self, inputs: &Matrix) -> LearningResult<()> { - let reg_value = if inputs.rows() > 1 { - 1f64 / (inputs.rows() - 1) as f64 - } else { - return Err(Error::new(ErrorKind::InvalidData, "Only one row of data provided.")); - }; - - // Initialization: + // Move k to stack to appease the borrow checker let k = self.comp_count; - self.model_covars = { - let cov_mat = try!(self.initialize_covariances(inputs, reg_value)); - Some(vec![cov_mat; k]) - }; + // Initialize empty matrices for covariances and means + self.model_covars = Some( + vec![Matrix::zeros(inputs.cols(), inputs.cols()); k]); + self.model_means = Some(Matrix::zeros(inputs.cols(), k)); + + // Initialize responsibitilies and calculate parameters + self.update_gaussian_parameters(inputs, try!(T::init_resp(k, inputs))); + self.precisions = + Some(try!(self.cov_option.compute_precision( + self.model_covars.as_ref().unwrap()))); - let random_rows: Vec = - rand_utils::reservoir_sample(&(0..inputs.rows()).collect::>(), k); - self.model_means = Some(inputs.select_rows(&random_rows)); + self.log_lik = 0.; for _ in 0..self.max_iters { let log_lik_0 = self.log_lik; - let (weights, log_lik_1) = try!(self.membership_weights(inputs)); - - if (log_lik_1 - log_lik_0).abs() < 1e-15 { + // e_step + let (log_prob_norm, mut resp) = self.estimate_log_prob_resp(inputs); + // Return to normal space + for v in resp.iter_mut() { *v = v.exp(); } + + // m_step + self.update_gaussian_parameters(inputs, resp); + self.precisions = + Some(try!(self.cov_option.compute_precision( + self.model_covars.as_ref().unwrap()))); + + // check for convergence + let log_lik_1 = log_prob_norm.mean(); + if (log_lik_0 - log_lik_1).abs() < CONVERGENCE_EPS { break; } self.log_lik = log_lik_1; - - self.update_params(inputs, weights); } Ok(()) @@ -107,7 +113,7 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { /// Predict output from inputs. fn predict(&self, inputs: &Matrix) -> LearningResult> { if let (&Some(_), &Some(_)) = (&self.model_means, &self.model_covars) { - Ok(try!(self.membership_weights(inputs)).0) + Ok(self.estimate_weighted_log_prob(inputs)) } else { Err(Error::new_untrained()) } @@ -115,7 +121,10 @@ impl UnSupModel, Matrix> for GaussianMixtureModel { } } -impl GaussianMixtureModel { +impl GaussianMixtureModel + where T: Initializer, + C: CovOption + Default +{ /// Constructs a new Gaussian Mixture Model /// /// Defaults to 100 maximum iterations and @@ -123,20 +132,12 @@ impl GaussianMixtureModel { /// /// # Examples /// ``` - /// use rusty_machine::learning::gmm::GaussianMixtureModel; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; /// - /// let gmm = GaussianMixtureModel::new(3); + /// let gmm: GaussianMixtureModel = GaussianMixtureModel::new(3); /// ``` - pub fn new(k: usize) -> GaussianMixtureModel { - GaussianMixtureModel { - comp_count: k, - mix_weights: Vector::ones(k) / (k as f64), - model_means: None, - model_covars: None, - log_lik: 0f64, - max_iters: 100, - cov_option: CovOption::Full, - } + pub fn new(k: usize) -> Self { + Self::with_weights(k, Vector::ones(k) / k as f64).unwrap() } /// Constructs a new GMM with the specified prior mixture weights. @@ -147,12 +148,12 @@ impl GaussianMixtureModel { /// # Examples /// /// ``` - /// use rusty_machine::learning::gmm::GaussianMixtureModel; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; /// use rusty_machine::linalg::Vector; /// /// let mix_weights = Vector::new(vec![0.25, 0.25, 0.5]); /// - /// let gmm = GaussianMixtureModel::with_weights(3, mix_weights).unwrap(); + /// let gmm: GaussianMixtureModel = GaussianMixtureModel::with_weights(3, mix_weights).unwrap(); /// ``` /// /// # Failures @@ -161,7 +162,7 @@ impl GaussianMixtureModel { /// /// - Mixture weights do not have length k. /// - Mixture weights have a negative entry. - pub fn with_weights(k: usize, mixture_weights: Vector) -> LearningResult { + pub fn with_weights(k: usize, mixture_weights: Vector) -> LearningResult { if mixture_weights.size() != k { Err(Error::new(ErrorKind::InvalidParameters, "Mixture weights must have length k.")) } else if mixture_weights.data().iter().any(|&x| x < 0f64) { @@ -175,9 +176,11 @@ impl GaussianMixtureModel { mix_weights: normalized_weights, model_means: None, model_covars: None, - log_lik: 0f64, + precisions: None, + log_lik: 0., max_iters: 100, - cov_option: CovOption::Full, + cov_option: C::default(), + phantom: PhantomData, }) } } @@ -209,159 +212,389 @@ impl GaussianMixtureModel { &self.mix_weights } + /// The model mean likelihood + /// + /// Returns the mean log likelihood of the model + pub fn log_lik(&self) -> f64 { + self.log_lik + } + /// Sets the max number of iterations for the EM algorithm. /// /// # Examples /// /// ``` - /// use rusty_machine::learning::gmm::GaussianMixtureModel; + /// use rusty_machine::learning::gmm::{GaussianMixtureModel, Random, CholeskyFull}; /// - /// let mut gmm = GaussianMixtureModel::new(2); + /// let mut gmm: GaussianMixtureModel = GaussianMixtureModel::new(2); /// gmm.set_max_iters(5); /// ``` pub fn set_max_iters(&mut self, iters: usize) { self.max_iters = iters; } - fn initialize_covariances(&self, inputs: &Matrix, reg_value: f64) -> LearningResult> { - match self.cov_option { - CovOption::Diagonal => { - let variance = try!(inputs.variance(Axes::Row)); - Ok(Matrix::from_diag(&variance.data()) * reg_value.sqrt()) - } + // == Parameters + // inputs : [n_samples, n_features] + // + // == Returns + // weighted_log_prob : [n_features, n_components] + // + fn estimate_weighted_log_prob(&self, inputs: &Matrix) -> Matrix { + let mut log_prob = self.estimate_log_prob(&inputs); + // println!("log_prob: \n{:.4}", log_prob.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + // println!("mix_weights: \n{:?}", &self.mix_weights); + let log_weights = self.mix_weights.iter().map(|w| w.ln()); + for (lp, lw) in log_prob.iter_mut().zip(log_weights) { + *lp += lw; + } + log_prob + } - CovOption::Full | CovOption::Regularized(_) => { - let means = inputs.mean(Axes::Row); - let mut cov_mat = Matrix::zeros(inputs.cols(), inputs.cols()); - for (j, row) in cov_mat.iter_rows_mut().enumerate() { - for (k, elem) in row.iter_mut().enumerate() { - *elem = inputs.iter_rows().map(|r| { - (r[j] - means[j]) * (r[k] - means[k]) - }).sum::(); - } + // called estimate_log_gaussian_prob in scipy + // + // == Paramers + // inputs : [n_samples, n_features] + // + // + fn estimate_log_prob(&self, inputs: &Matrix) -> Matrix { + let precisions: &Vec> = self.precisions.as_ref().unwrap(); + let ref model_means = self.model_means.as_ref().unwrap(); + // The log of the determinant for each precision matrix + let log_det = precisions.iter().map(|m| m.det().ln()); + // println!("log_det: {:?}", log_det); + + let mut log_prob = Matrix::zeros(inputs.rows(), self.comp_count); + for k in 0..self.comp_count { + let prec = &precisions[k]; + // y is a matrix of shape [n_samples, n_features] + let mut y = inputs * prec; + // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + // Matrix of shape [1, n_features] + let z: Matrix = model_means.select_rows(&[k]) * prec; + // println!("z: \n{:.4}", &z); + + // Subtract the mean of each column from the matrix y + for col in 0..y.cols() { + for row in 0..y.rows() { + y[[row, col]] -= z[[0, col]]; } - cov_mat *= reg_value; - if let CovOption::Regularized(eps) = self.cov_option { - cov_mat += Matrix::::identity(cov_mat.cols()) * eps; + } + // println!("y: \n{:.4}", &y.select_rows(&[0, 1, 2, 3, 4, 5, 6])); + + for (i, row) in y.iter_rows().enumerate() { + let sum_of_squares = row.iter().map(|v| v.powi(2)).sum(); + log_prob[[i, k]] = sum_of_squares; + } + } + + log_prob = (log_prob + inputs.cols() as f64 * (2.0 * PI).ln()) * -0.5; + for (row, det) in log_prob.iter_rows_mut().zip(log_det) { + for v in row { + *v += det; + } + } + log_prob + } + + fn estimate_log_prob_resp(&self, inputs: &Matrix) -> (Vector, Matrix) { + let mut weighted_log_prob: Matrix = self.estimate_weighted_log_prob(inputs); + // length of n_samples + let log_prob_norm: Vector = + Vector::new(weighted_log_prob.iter_rows().map(|row: &[f64]| { + let a: f64 = row.iter().map(|v| v.exp()).sum(); + a.ln() + }).collect::>()); + + for row in 0..log_prob_norm.size() { + for col in 0..weighted_log_prob.cols() { + weighted_log_prob[[row, col]] -= log_prob_norm[row]; + } + } + + (log_prob_norm, weighted_log_prob) + } + + fn update_gaussian_parameters(&mut self, inputs: &Matrix, resp: Matrix) { + self.mix_weights = resp.iter_rows() + .fold(Vector::new(vec![0.0; resp.cols()]), |mut acc, row| { + for (a, r) in acc.iter_mut().zip(row.iter()) { + *a += *r; } - Ok(cov_mat) + acc + }) + + 10.0 * EPSILON; + + let mut model_means = self.model_means.as_mut().unwrap(); + *model_means = resp.transpose() * inputs; + + for (nk, row) in self.mix_weights.iter().zip(model_means.iter_rows_mut()) { + for v in row { + *v /= *nk; } } + + // Delegate updating the covariance matrices to the cov_option + let mut model_covars = self.model_covars.as_mut().unwrap(); + self.cov_option.update_covars(model_covars, resp, inputs, + model_means, &self.mix_weights); + + self.mix_weights /= inputs.rows() as f64; + } +} + +/// Contains various methods for calculating covariances +pub trait CovOption { + /// Calculate a precision matrix from a covariance matrix + fn compute_precision(&self, covariances: &Vec>) -> + LearningResult>>; + + /// Update covariance matrices + fn update_covars(&self, model_covars: &mut Vec>, resp: Matrix, + inputs: &Matrix, model_means: &Matrix, + mix_weights: &Vector); + + /// Regularization constant added along the diagonal + fn reg_covar(&self) -> f64 { + 0.0 } +} - fn membership_weights(&self, inputs: &Matrix) -> LearningResult<(Matrix, f64)> { - let n = inputs.rows(); +/// Performs calculations on a full covariance matrix using Cholesky factorization, +/// adding a constant regularization factor along the diagonal to ensure stability. +#[derive(Clone, Copy, Debug)] +pub struct CholeskyFull { + /// Regularization constant + pub reg_covar: f64 +} - let mut member_weights_data = Vec::with_capacity(n * self.comp_count); +/// Only calculates the diagonal (the variance). This assumes that all parameters +/// are uncorrelated. Adds a constant regularization factor (generally unnecessary). +#[derive(Clone, Copy, Debug)] +pub struct Diagonal { + /// Regularization constant + pub reg_covar: f64 +} - // We compute the determinants and inverses now - let mut cov_sqrt_dets = Vec::with_capacity(self.comp_count); - let mut cov_invs = Vec::with_capacity(self.comp_count); +impl CholeskyFull { + /// Initialize with a covariance regularization constant + pub fn new(reg_covar: f64) -> Self { + CholeskyFull { + reg_covar: reg_covar + } + } - if let Some(ref covars) = self.model_covars { - for cov in covars { - // TODO: combine these. We compute det to get the inverse. - let covar_det = cov.det(); - let covar_inv = try!(cov.inverse().map_err(Error::from)); + /// Solves a system given the Cholesky decomposition of the lower triangular matrix + pub fn solve_cholesky_decomposition(mut cov_chol: Matrix) -> Matrix { + let half = (cov_chol.rows() as f64 / 2.0).floor() as usize; + let lower_rows = cov_chol.rows() - half - 1; + let n_col = cov_chol.cols() - 1; + + let det: f64 = cov_chol.det(); + cov_chol /= det; + + for idx in 0..half { + // Mirror along the diagonal + let (mut upper, mut lower) = cov_chol.split_at_mut(half, Axes::Row); + { + let swap_row = lower_rows - idx; + let swap_col = n_col - idx; + mem::swap(&mut upper[[idx, idx]], &mut lower[[swap_row, swap_col]]); + } - cov_sqrt_dets.push(covar_det.sqrt()); - cov_invs.push(covar_inv); + // Transpose and invert all other values + for j in (idx+1)..(n_col+1) { + let swap_row = lower_rows - idx; + let swap_col = n_col - j; + mem::swap(&mut upper[[idx, j]], &mut lower[[swap_row, swap_col]]); + upper[[idx, j]] *= -1.0; } } + cov_chol + } +} - let mut log_lik = 0f64; +impl Default for CholeskyFull { + /// Initialize with a covariance regularization constant of 0.0 + fn default() -> Self { + Self::new(0.0) + } +} - // Now we compute the membership weights - if let Some(ref means) = self.model_means { - for i in 0..n { - let mut pdfs = Vec::with_capacity(self.comp_count); - let x_i = MatrixSlice::from_matrix(inputs, [i, 0], 1, inputs.cols()); +impl CovOption for CholeskyFull { + fn compute_precision(&self, covariances: &Vec>) -> LearningResult>> { + let n_components = covariances.len(); + let mut precisions_chol = Vec::>::with_capacity(n_components); - for j in 0..self.comp_count { - let mu_j = MatrixSlice::from_matrix(means, [j, 0], 1, means.cols()); - let diff = x_i - mu_j; + for covariance in covariances { + let cov_chol: Matrix = try!(covariance.cholesky()); + precisions_chol.push(Self::solve_cholesky_decomposition(cov_chol)); + } - let pdf = (&diff * &cov_invs[j] * diff.transpose() * -0.5).into_vec()[0] - .exp() / cov_sqrt_dets[j]; - pdfs.push(pdf); - } + Ok(precisions_chol) + } - let weighted_pdf_sum = utils::dot(&pdfs, self.mix_weights.data()); + fn update_covars(&self, model_covars: &mut Vec>, resp: Matrix, + inputs: &Matrix, model_means: &Matrix, + mix_weights: &Vector) { + for (k, covariance) in model_covars.iter_mut().enumerate() { + let mut diff: Matrix = inputs.clone(); + for (_, mut row) in diff.iter_rows_mut().enumerate() { + for (i, v) in row.iter_mut().enumerate() { + *v -= model_means[[k, i]]; + } + } - for (idx, pdf) in pdfs.iter().enumerate() { - member_weights_data.push(self.mix_weights[idx] * pdf / (weighted_pdf_sum)); + let mut diff_transpose = diff.transpose(); + let resp_transpose_row = resp.select_cols(&[k]); + for row in diff_transpose.iter_rows_mut() { + for (v, x) in row.iter_mut().zip(resp_transpose_row.iter()) { + *v *= *x; } + } + + *covariance = (diff_transpose * diff) / mix_weights[k]; - log_lik += weighted_pdf_sum.ln(); + // Add the regularization value + for idx in 0..covariance.rows() { + covariance[[idx, idx]] += self.reg_covar; } } - - Ok((Matrix::new(n, self.comp_count, member_weights_data), log_lik)) } - fn update_params(&mut self, inputs: &Matrix, membership_weights: Matrix) { - let n = membership_weights.rows(); - let d = inputs.cols(); + fn reg_covar(&self) -> f64 { + self.reg_covar + } +} - let sum_weights = membership_weights.sum_rows(); +impl Default for Diagonal { + /// Initialize with a covariance regularization constant of 0.0 + fn default() -> Self { + Diagonal { + reg_covar: 0.0 + } + } +} - self.mix_weights = &sum_weights / (n as f64); - let mut new_means = membership_weights.transpose() * inputs; +impl CovOption for Diagonal { + fn compute_precision(&self, covariances: &Vec>) -> LearningResult>> { + let n_components = covariances.len(); + let n_features = covariances[0].cols(); + for covariance in covariances { + for i in 0..covariance.cols() { + if covariance[[i, i]] < EPSILON { + return Err(Error::new( + ErrorKind::InvalidState, + "Mixture model had zeros along the diagonals.")); + } - for (mean, w) in new_means.iter_rows_mut().zip(sum_weights.data().iter()) { - for m in mean.iter_mut() { - *m /= *w; + if covariance[[i, i]].is_nan() { + return Err(Error::new( + ErrorKind::InvalidState, + "Mixture model had NaN along the diagonals.")); + } } } - let mut new_covs = Vec::with_capacity(self.comp_count); + let mut precisions_chol = Vec::>::with_capacity(n_components); + for covariance in covariances { + let v: Vec = covariance.iter().map(|v| { + if *v != 0.0 { + 1.0 / v.sqrt() + } else { + 0.0 + } + }).collect(); + precisions_chol.push(Matrix::::new(n_features, n_features, v)); + } - for k in 0..self.comp_count { - let mut cov_mat = Matrix::zeros(d, d); - let new_means_k = MatrixSlice::from_matrix(&new_means, [k, 0], 1, d); + Ok(precisions_chol) + } - for i in 0..n { - let inputs_i = MatrixSlice::from_matrix(inputs, [i, 0], 1, d); - let diff = inputs_i - new_means_k; - cov_mat += self.compute_cov(diff, membership_weights[[i, k]]); + fn update_covars(&self, model_covars: &mut Vec>, _resp: Matrix, + inputs: &Matrix, model_means: &Matrix, + mix_weights: &Vector) { + for (k, covariance) in model_covars.iter_mut().enumerate() { + let mut diff: Matrix = inputs.clone(); + for (_, mut row) in diff.iter_rows_mut().enumerate() { + for (i, v) in row.iter_mut().enumerate() { + *v -= model_means[[k, i]]; + } } - if let CovOption::Regularized(eps) = self.cov_option { - cov_mat += Matrix::::identity(cov_mat.cols()) * eps; - } + *covariance = Matrix::from_diag( + &((diff.variance(Axes::Row).unwrap() / mix_weights[k]) + + self.reg_covar).data()[..]); + } + } - new_covs.push(cov_mat / sum_weights[k]); + fn reg_covar(&self) -> f64 { + self.reg_covar + } +} - } +/// Trait for possible methods of initializing the responsibilities matrix +pub trait Initializer { + /// Should return the responsibilities matrix: + /// shape is [samples, components] + fn init_resp(k: usize, inputs: &Matrix) -> LearningResult>; +} - self.model_means = Some(new_means); - self.model_covars = Some(new_covs); +/// Initialize the responsibilities matrix using randomly generated values +#[derive(Debug)] +pub struct Random; + +impl Initializer for Random { + fn init_resp(k: usize, inputs: &Matrix) -> LearningResult> { + use rand::distributions::{IndependentSample, Range}; + let between = Range::new(0.0f64, 1.); + let mut rng = rand::thread_rng(); + let random_numbers: Vec = + (0..(inputs.rows()*k)).map(|_| between.ind_sample(&mut rng).exp()).collect(); + let mut resp = Matrix::new(inputs.rows(), k, random_numbers); + let sum = resp.sum_cols(); + for row in resp.iter_rows_mut() { + for (v, s) in row.iter_mut().zip(sum.iter()) { + *v /= *s; + } + } + Ok(resp) } +} - fn compute_cov(&self, diff: Matrix, weight: f64) -> Matrix { - match self.cov_option { - CovOption::Full | CovOption::Regularized(_) => (diff.transpose() * diff) * weight, - CovOption::Diagonal => Matrix::from_diag(&diff.elemul(&diff).into_vec()) * weight, +/// Initialize the responsibilities matrix using k-means clustering +#[derive(Debug)] +pub struct KMeans; + +impl Initializer for KMeans { + fn init_resp(k: usize, inputs: &Matrix) -> LearningResult> { + use learning::k_means::KMeansClassifier; + let mut model = KMeansClassifier::new(k); + try!(model.train(inputs)); + let classes = try!(model.predict(inputs)); + let mut resp: Matrix = Matrix::zeros(inputs.rows(), k); + for (row, col) in classes.iter().enumerate() { + resp[[row, *col]] = 1.; } + Ok(resp) } } #[cfg(test)] mod tests { - use super::GaussianMixtureModel; + use super::{GaussianMixtureModel, Random, CholeskyFull}; use linalg::Vector; #[test] fn test_means_none() { - let model = GaussianMixtureModel::new(5); + let model = GaussianMixtureModel::::new(5); assert_eq!(model.means(), None); } #[test] fn test_covars_none() { - let model = GaussianMixtureModel::new(5); + let model = GaussianMixtureModel::::new(5); assert_eq!(model.covariances(), None); } @@ -369,14 +602,14 @@ mod tests { #[test] fn test_negative_mixtures() { let mix_weights = Vector::new(vec![-0.25, 0.75, 0.5]); - let gmm_res = GaussianMixtureModel::with_weights(3, mix_weights); + let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); assert!(gmm_res.is_err()); } #[test] fn test_wrong_length_mixtures() { let mix_weights = Vector::new(vec![0.1, 0.25, 0.75, 0.5]); - let gmm_res = GaussianMixtureModel::with_weights(3, mix_weights); + let gmm_res = GaussianMixtureModel::::with_weights(3, mix_weights); assert!(gmm_res.is_err()); } } diff --git a/tests/learning/gmm.rs b/tests/learning/gmm.rs new file mode 100644 index 00000000..8a5eeaba --- /dev/null +++ b/tests/learning/gmm.rs @@ -0,0 +1,62 @@ +extern crate rand; + +use rm::linalg::{Matrix, BaseMatrix}; +use rm::learning::gmm::{GaussianMixtureModel, KMeans, CholeskyFull}; +use rm::learning::UnSupModel; + +use self::rand::thread_rng; +use self::rand::distributions::IndependentSample; +use self::rand::distributions::normal::Normal; + +fn generate_data(centroids: &Matrix, points_per_centroid: usize, noise: f64) -> Matrix { + assert!(centroids.cols() > 0, "Centroids cannot be empty."); + assert!(centroids.rows() > 0, "Centroids cannot be empty."); + assert!(noise >= 0f64, "Noise must be non-negative."); + let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid * + centroids.cols()); + + let mut rng = thread_rng(); + let normal_rv = Normal::new(0f64, noise); + + for _ in 0..points_per_centroid { + // Generate points from each centroid + for centroid in centroids.iter_rows() { + // Generate a point randomly around the centroid + let mut point = Vec::with_capacity(centroids.cols()); + for feature in centroid { + point.push(feature + normal_rv.ind_sample(&mut rng)); + } + + // Push point to raw_cluster_data + raw_cluster_data.extend(point); + } + } + + Matrix::new(centroids.rows() * points_per_centroid, + centroids.cols(), + raw_cluster_data) +} + +#[test] +fn gmm_train() { + + const SAMPLES_PER_CENTROID: usize = 500; + // Choose three cluster centers, at (-0.5, -0.5), (0, 0.5), (0.5, 0.5). + let centroids = Matrix::new(2, 3, vec![-0.4, -0.6, 0.1, 0.6, 0.7, -0.3]); + + // Generate some data randomly around the centroids + let samples = generate_data(¢roids, SAMPLES_PER_CENTROID, 0.2); + let mut model = GaussianMixtureModel::::new(100); + model.cov_option.reg_covar = 0.0; + model.set_max_iters(100); + match model.train(&samples) { + Ok(_) => { + println!("means: \n{:.4}", model.means().unwrap()); + println!("log_lik: \n{:.4}", model.log_lik()); + for cov in model.covariances().as_ref().unwrap().iter() { + println!("cov: \n{:.4}", cov); + } + }, + Err(e) => println!("error: {}", e) + } +} diff --git a/tests/lib.rs b/tests/lib.rs index f1261809..c536cd82 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -6,8 +6,9 @@ pub mod learning { mod lin_reg; mod k_means; mod gp; + mod gmm; pub mod optim { mod grad_desc; } -} \ No newline at end of file +}