From 248ce54c01282d428a9c31b862280166e52c6eb9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 21:24:12 -0700 Subject: [PATCH 01/55] C code for one row of M --- selectiveInference/R/funs.fixed.R | 19 +++ selectiveInference/src/debiasing_matrix.c | 151 ++++++++++++++++++++++ 2 files changed, 170 insertions(+) create mode 100644 selectiveInference/src/debiasing_matrix.c diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7073e2e..da701e5 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -334,6 +334,25 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } +InverseLinftyOneRowC <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { + + p = nrow(sigma) + theta = rep(0, p) + + val = .C("find_one_row", + Sigma=as.double(sigma), + nrow=as.integer(p), + bound=as.double(mu), + theta=as.double(theta), + maxiter=as.integer(maxiter), + row=as.integer(i-1), + coord=as.integer(i-1), + dup=FALSE, + package="selectiveInference") + + return(val$theta) +} + InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { p <- nrow(sigma); rho <- max(abs(sigma[i,-i])) / sigma[i,i]; diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c new file mode 100644 index 0000000..0a626ea --- /dev/null +++ b/selectiveInference/src/debiasing_matrix.c @@ -0,0 +1,151 @@ +#include +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Problem (4) of ???? + +// Update one coordinate + +double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int row, /* which row: 0-based */ + int coord) /* which coordinate to update: 0-based */ +{ + + double linear_term = 0; + double quadratic_term = 0; + double value = 0; + double *Sigma_ptr; + double *theta_ptr = theta; + int icol = 0; + + Sigma_ptr = ((double *) Sigma + nrow * coord); + + for (icol=0; icol < nrow; icol++) { + if (icol != coord) { + linear_term += (*Sigma_ptr) * (*theta_ptr); + } + else { + quadratic_term = (*Sigma_ptr); + } + Sigma_ptr += 1; + theta_ptr += 1; + } + + if (row == coord) { + linear_term += 1; + } + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + if (linear_term < -bound) { + value = - (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = (linear_term - bound) / quadratic_term; + } + + theta_ptr = ((double *) theta + coord); + *theta_ptr = value; + return(value); + +} + +double objective(double *Sigma, /* A covariance matrix: X^TX/n */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *Sigma_ptr = Sigma; + double *theta_row_ptr, *theta_col_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 3)) { + break; + } + old_value = new_value; + } + + *nrow_ptr = iter-1; +} + From 4d6719990d1f49059e8fd672f27f784e4e3f2fbe Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 21:51:39 -0700 Subject: [PATCH 02/55] added a warning about feasibility -- but it didn't print in gist? --- selectiveInference/R/funs.fixed.R | 15 ++++++++++++--- selectiveInference/src/debiasing_matrix.c | 15 +++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index da701e5..1bf216e 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -334,13 +334,15 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRowC <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { +InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50, threshold=1e-2 ) { - p = nrow(sigma) + p = nrow(Sigma) + basis_vector = rep(0, p) + basis_vector[i] = 1. theta = rep(0, p) val = .C("find_one_row", - Sigma=as.double(sigma), + Sigma=as.double(Sigma), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), @@ -350,6 +352,13 @@ InverseLinftyOneRowC <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { dup=FALSE, package="selectiveInference") + # Check feasibility + + if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { + print("Solution for row of M does not seem to be feasible") + warning("Solution for row of M does not seem to be feasible") + } + return(val$theta) } diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 0a626ea..6309df3 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -3,8 +3,12 @@ // Find an approximate row of \hat{Sigma}^{-1} -// Problem (4) of ???? +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1 + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. // Update one coordinate double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ @@ -36,7 +40,7 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ } if (row == coord) { - linear_term += 1; + linear_term -= 1; } // Now soft-threshold the coord entry of theta @@ -44,11 +48,14 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| // with q=quadratic_term and l=linear_term + // With a negative linear term, solution should be + // positive + if (linear_term < -bound) { - value = - (-linear_term - bound) / quadratic_term; + value = (-linear_term - bound) / quadratic_term; } else if (linear_term > bound) { - value = (linear_term - bound) / quadratic_term; + value = -(linear_term - bound) / quadratic_term; } theta_ptr = ((double *) theta + coord); From e87d12dbcddd960e8d5856915818a251969deb49 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 21:54:45 -0700 Subject: [PATCH 03/55] removed print statement --- selectiveInference/R/funs.fixed.R | 1 - 1 file changed, 1 deletion(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 1bf216e..53dc61f 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -355,7 +355,6 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50, threshold=1e-2 ) { # Check feasibility if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { - print("Solution for row of M does not seem to be feasible") warning("Solution for row of M does not seem to be feasible") } From 210d0feff9932686e8210454138e577342520e21 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Tue, 15 Aug 2017 22:00:19 -0700 Subject: [PATCH 04/55] using C for each row --- selectiveInference/R/funs.fixed.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 53dc61f..b69dabb 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -159,7 +159,8 @@ sigma=NULL, alpha=0.1, hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) + useC = TRUE + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, useC=useC) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -269,7 +270,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, useC = FALSE) { # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ @@ -294,7 +295,11 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + if (useC == FALSE) { + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) + } else { + output <- InverseLinftyOneRowC(sigma, i, mu, maxiter=maxiter) + } beta <- output$optsol iter <- output$iter if (isgiven==1){ @@ -334,7 +339,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50, threshold=1e-2 ) { +InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { p = nrow(Sigma) basis_vector = rep(0, p) From c7c41e417693578173e272e2653c46b3a3edaba1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 08:32:39 -0700 Subject: [PATCH 05/55] optimized the C code a bit -- still has debug statements --- selectiveInference/R/funs.fixed.R | 20 ++- selectiveInference/src/debiasing_matrix.c | 154 +++++++++++++++------- 2 files changed, 119 insertions(+), 55 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index b69dabb..584018f 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -348,10 +348,12 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { val = .C("find_one_row", Sigma=as.double(Sigma), + Sigma_diag=as.double(diag(Sigma)), + Sigma_theta=as.double(rep(0, p)), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), - maxiter=as.integer(maxiter), + maxiter=as.integer(50), row=as.integer(i-1), coord=as.integer(i-1), dup=FALSE, @@ -359,6 +361,12 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { # Check feasibility + # DEBUG statements + #print(diag(Sigma)) + #print(0.5 * sum(val$theta * (Sigma %*% val$theta)) - val$theta[i] + mu * sum(abs(val$theta))) + #print(Sigma %*% val$theta - val$Sigma_theta) + #print(val$nrow) # number of iterations + if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { warning("Solution for row of M does not seem to be feasible") } @@ -372,11 +380,11 @@ InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { mu0 <- rho/(1+rho); beta <- rep(0,p); - if (mu >= mu0){ - beta[i] <- (1-mu0)/sigma[i,i]; - returnlist <- list("optsol" = beta, "iter" = 0); - return(returnlist); - } + #if (mu >= mu0){ + # beta[i] <- (1-mu0)/sigma[i,i]; + # returnlist <- list("optsol" = beta, "iter" = 0); + # return(returnlist); + #} diff.norm2 <- 1; last.norm2 <- 1; diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 6309df3..7dfeb1e 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -11,34 +11,72 @@ // Therefore we don't have to negate the answer to get theta. // Update one coordinate -double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int row, /* which row: 0-based */ - int coord) /* which coordinate to update: 0-based */ +double objective(double *Sigma, /* A covariance matrix: X^TX/n */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { + int irow, icol; + double value = 0; + double *Sigma_ptr = Sigma; + double *theta_row_ptr, *theta_col_ptr; + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 1.e-6 * (fabs(value) + fabs(old_value))) { // Update the linear term + + delta = value - old_value; + Sigma_ptr = ((double *) Sigma + coord * nrow); + Sigma_theta_ptr = ((double *) Sigma_theta); + + for (icol=0; icol before) { + fprintf(stderr, "not a descent step!!!!!!!!!!!!!!!!!!!!!\n"); + } - theta_row_ptr = theta; - theta_col_ptr = theta; - for (irow=0; irow 3)) { + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 5)) { break; } + + fprintf(stderr, "%f %f value\n", old_value, new_value); old_value = new_value; } From f6ae738bad048aca71a22c94ba4fe8cdb9b1f3d7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 08:50:20 -0700 Subject: [PATCH 06/55] removing debug statements, making objective computation faster --- selectiveInference/R/funs.fixed.R | 6 --- selectiveInference/src/debiasing_matrix.c | 46 +++++------------------ 2 files changed, 9 insertions(+), 43 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 584018f..aa8eac2 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -361,12 +361,6 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { # Check feasibility - # DEBUG statements - #print(diag(Sigma)) - #print(0.5 * sum(val$theta * (Sigma %*% val$theta)) - val$theta[i] + mu * sum(abs(val$theta))) - #print(Sigma %*% val$theta - val$Sigma_theta) - #print(val$nrow) # number of iterations - if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { warning("Solution for row of M does not seem to be feasible") } diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 7dfeb1e..0849f38 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -27,15 +27,16 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ for (irow=0; irow before) { - fprintf(stderr, "not a descent step!!!!!!!!!!!!!!!!!!!!!\n"); - } - + theta_ptr = ((double *) theta + coord); + *theta_ptr = value; } - Sigma_ptr = ((double *) Sigma + coord * nrow); - Sigma_theta_ptr = ((double *) Sigma_theta); - for (icol=0; icol 5)) { + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { break; } - fprintf(stderr, "%f %f value\n", old_value, new_value); old_value = new_value; } From 1a8e2ad351c08f7eab632d6f7184651d1640e89e Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 09:23:01 -0700 Subject: [PATCH 07/55] slightly lower tolerance --- selectiveInference/src/debiasing_matrix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 0849f38..420ecb9 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -141,7 +141,7 @@ void find_one_row(double *Sigma, /* A covariance matrix: X^TX/n */ bound, theta); double new_value; - double tol=1.e-10; + double tol=1.e-6; for (iter=0; iter Date: Wed, 16 Aug 2017 10:16:24 -0700 Subject: [PATCH 08/55] using active sets --- selectiveInference/R/funs.fixed.R | 2 + selectiveInference/src/debiasing_matrix.c | 144 ++++++++++++++++++++-- 2 files changed, 133 insertions(+), 13 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index aa8eac2..a30fc0b 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -350,6 +350,8 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { Sigma=as.double(Sigma), Sigma_diag=as.double(diag(Sigma)), Sigma_theta=as.double(rep(0, p)), + ever_active=as.integer(i), + nactive_ptr=as.integer(1), nrow=as.integer(p), bound=as.double(mu), theta=as.double(theta), diff --git a/selectiveInference/src/debiasing_matrix.c b/selectiveInference/src/debiasing_matrix.c index 420ecb9..e3ef415 100644 --- a/selectiveInference/src/debiasing_matrix.c +++ b/selectiveInference/src/debiasing_matrix.c @@ -12,6 +12,8 @@ // Update one coordinate double objective(double *Sigma, /* A covariance matrix: X^TX/n */ + int *ever_active, /* Ever active set */ + int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ int row, /* which row: 0-based */ double bound, /* Lagrange multipler for \ell_1 */ @@ -21,33 +23,106 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ double value = 0; double *Sigma_ptr = Sigma; double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; theta_row_ptr = theta; theta_col_ptr = theta; + for (irow=0; irow 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { + fail += 1; + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > (1. + tol) * bound)) { + fail += 1; } } - if (irow == row) { - value -= (*theta_row_ptr); // the elementary basis vector term + else { + if (fabs(gradient) > (1. + tol) * bound) { + fail += 1; + } } - value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term - theta_row_ptr++; } - return(value); + return(fail == 0); } double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_diag, /* Diagonal entries of Sigma */ double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set */ + int *nactive_ptr, /* Size of ever active set */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ @@ -67,6 +142,8 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n double *quadratic_ptr = ((double *) Sigma_diag + coord); double quadratic_term = *quadratic_ptr; + int *ever_active_ptr; + Sigma_theta_ptr = ((double *) Sigma_theta + coord); linear_term = *Sigma_theta_ptr; @@ -97,7 +174,17 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n value = -(linear_term - bound) / quadratic_term; } - if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { // Update the linear term + // Add to active set if necessary + + if ((value != 0) && (is_active(coord, ever_active, nactive_ptr) == 0)) { + ever_active_ptr = ((int *) ever_active + *nactive_ptr); + *ever_active_ptr = coord; + *nactive_ptr += 1; + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; Sigma_ptr = ((double *) Sigma + coord * nrow); @@ -121,6 +208,8 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n void find_one_row(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_diag, /* Diagonal entry of covariance matrix */ double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set */ + int *nactive_ptr, /* Size of ever active set */ int *nrow_ptr, /* How many rows in Sigma */ double *bound_ptr, /* feasibility parameter */ double *theta, /* current value */ @@ -135,13 +224,17 @@ void find_one_row(double *Sigma, /* A covariance matrix: X^TX/n */ double bound = *bound_ptr; int nrow = *nrow_ptr; + fprintf(stderr, "starting now\n"); + double old_value = objective(Sigma, + ever_active, + nactive_ptr, nrow, row, bound, theta); double new_value; - double tol=1.e-6; + double tol=1.e-5; for (iter=0; iter 0)) { + fprintf(stderr, "ending in objective value check\n"); break; } From 096b0b8f9f92bb360f8d4f59bc9cbd6f5e4437c1 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 17:02:26 -0700 Subject: [PATCH 09/55] begun the Rcpp extension --- selectiveInference/src/Makevars | 14 ++++++ selectiveInference/src/Rcpp-debias.cpp | 45 +++++++++++++++++++ .../src/{debiasing_matrix.c => debias.c} | 6 +-- selectiveInference/src/debias.h | 10 +++++ 4 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 selectiveInference/src/Makevars create mode 100644 selectiveInference/src/Rcpp-debias.cpp rename selectiveInference/src/{debiasing_matrix.c => debias.c} (97%) create mode 100644 selectiveInference/src/debias.h diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars new file mode 100644 index 0000000..191c774 --- /dev/null +++ b/selectiveInference/src/Makevars @@ -0,0 +1,14 @@ +PKG_CFLAGS=-DR_PACKAGE=1 -I. +PKG_CPPFLAGS=-DR_PACKAGE=1 -I. +PKG_LIBS=-L. -ldebias + +$(SHLIB): Rcpp-debias.o RcppExports.o + +Rcpp-debias.o: debias.ts +RcppExports.o: debias.ts + +debias.ts: + gcc -shared -fPIC selectiveInference/src/debiasing_matrix.c -o debias.so + +clean: + rm -f debias.so diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp new file mode 100644 index 0000000..b154dcd --- /dev/null +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -0,0 +1,45 @@ +#include // need to include the main Rcpp header file +#include // where find_one_row is defined + +// [[Rcpp::export]] +Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, + int row, // 0-based + double bound, + int maxiter) { + + int nrow = Sigma.nrow(); // number of features + int nactive = 1; + Rcpp::IntegerVector ever_active(1); + Rcpp::NumericVector Sigma_diag(nrow); + Rcpp::NumericVector Sigma_theta(nrow); + +} + Rcpp::NumericVector xUL, + int maxEval, double absErr, double tol, int vectorInterface, unsigned norm) { + + count = 0; /* Zero count */ + fun = f; + + Rcpp::NumericVector integral(fDim); + Rcpp::NumericVector errVals(fDim); + int retCode; + + // Rcpp::Rcout<<"Call Integrator" < Date: Wed, 16 Aug 2017 17:44:22 -0700 Subject: [PATCH 10/55] don't lose this --- selectiveInference/src/Rcpp-debias.cpp | 64 +++++++++++++------------- selectiveInference/src/debias.c | 20 ++++---- selectiveInference/src/debias.h | 28 +++++++---- 3 files changed, 61 insertions(+), 51 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index b154dcd..384c105 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -1,5 +1,5 @@ #include // need to include the main Rcpp header file -#include // where find_one_row is defined +#include // where find_one_row_void is defined // [[Rcpp::export]] Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, @@ -8,38 +8,40 @@ Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, int maxiter) { int nrow = Sigma.nrow(); // number of features - int nactive = 1; + + // Active set + + int irow; + Rcpp::IntegerVector nactive(1); // An array so we can easily modify it Rcpp::IntegerVector ever_active(1); + int *ever_active_p = ever_active.begin(); + *ever_active_p = row; + + // Extract the diagonal Rcpp::NumericVector Sigma_diag(nrow); - Rcpp::NumericVector Sigma_theta(nrow); + double *sigma_p = Sigma_diag.begin(); -} - Rcpp::NumericVector xUL, - int maxEval, double absErr, double tol, int vectorInterface, unsigned norm) { - - count = 0; /* Zero count */ - fun = f; - - Rcpp::NumericVector integral(fDim); - Rcpp::NumericVector errVals(fDim); - int retCode; - - // Rcpp::Rcout<<"Call Integrator" < Date: Wed, 16 Aug 2017 17:52:02 -0700 Subject: [PATCH 11/55] segfaulting on the test_big.R :( --- selectiveInference/DESCRIPTION | 2 ++ selectiveInference/NAMESPACE | 2 +- selectiveInference/R/funs.fixed.R | 51 ++++++++++++++++--------------- selectiveInference/src/Makevars | 16 +++------- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index d0740ec..d902622 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -18,3 +18,5 @@ Description: New tools for post-selection inference, for use with forward models. License: GPL-2 RoxygenNote: 5.0.1 +LinkingTo: Rcpp +Imports: Rcpp diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 099fdc5..ab2a111 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,5 +44,5 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") - +importFrom("Rcpp", "sourceCpp") diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index a30fc0b..f4b5a26 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -341,34 +341,37 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { - p = nrow(Sigma) - basis_vector = rep(0, p) - basis_vector[i] = 1. - theta = rep(0, p) + theta = find_one_row(Sigma, i, mu, maxiter) + return(theta) +} +# p = nrow(Sigma) +# basis_vector = rep(0, p) +# basis_vector[i] = 1. +# theta = rep(0, p) - val = .C("find_one_row", - Sigma=as.double(Sigma), - Sigma_diag=as.double(diag(Sigma)), - Sigma_theta=as.double(rep(0, p)), - ever_active=as.integer(i), - nactive_ptr=as.integer(1), - nrow=as.integer(p), - bound=as.double(mu), - theta=as.double(theta), - maxiter=as.integer(50), - row=as.integer(i-1), - coord=as.integer(i-1), - dup=FALSE, - package="selectiveInference") +# val = .C("find_one_row", +# Sigma=as.double(Sigma), +# Sigma_diag=as.double(diag(Sigma)), +# Sigma_theta=as.double(rep(0, p)), +# ever_active=as.integer(i), +# nactive_ptr=as.integer(1), +# nrow=as.integer(p), +# bound=as.double(mu), +# theta=as.double(theta), +# maxiter=as.integer(50), +# row=as.integer(i-1), +# coord=as.integer(i-1), +# dup=FALSE, +# package="selectiveInference") - # Check feasibility +# # Check feasibility - if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { - warning("Solution for row of M does not seem to be feasible") - } +# if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { +# warning("Solution for row of M does not seem to be feasible") +# } - return(val$theta) -} +# return(val$theta) +# } InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { p <- nrow(sigma); diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index 191c774..c68d0bd 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -1,14 +1,8 @@ -PKG_CFLAGS=-DR_PACKAGE=1 -I. -PKG_CPPFLAGS=-DR_PACKAGE=1 -I. -PKG_LIBS=-L. -ldebias +PKG_CFLAGS= -I. +PKG_CPPFLAGS= -I. +PKG_LIBS=-L. -$(SHLIB): Rcpp-debias.o RcppExports.o - -Rcpp-debias.o: debias.ts -RcppExports.o: debias.ts - -debias.ts: - gcc -shared -fPIC selectiveInference/src/debiasing_matrix.c -o debias.so +$(SHLIB): Rcpp-debias.o RcppExports.o debias.o clean: - rm -f debias.so + rm -f *o From f53af23587372422cfcbe4e5d9105e2a384edbc6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 18:41:19 -0700 Subject: [PATCH 12/55] success -- about 2ms now -- python still faster (as timed through rpy2) :) --- README.md | 9 +++++++ selectiveInference/R/funs.fixed.R | 35 ++++++-------------------- selectiveInference/src/Rcpp-debias.cpp | 8 +++--- selectiveInference/src/debias.c | 13 +++------- selectiveInference/src/debias.h | 8 +++--- 5 files changed, 29 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 962151e..cf855e1 100644 --- a/README.md +++ b/README.md @@ -21,3 +21,12 @@ The latest release of the package can be installed through CRAN: install.packages("selectiveInference") ``` Code in repo is under development and may be unstable. + +## For development + +You will have to run + +```R +library(Rcpp) +Rcpp::compileAttributes('selectiveInference') +``` \ No newline at end of file diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f4b5a26..7314511 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -295,6 +295,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + useC = TRUE if (useC == FALSE) { output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) } else { @@ -341,37 +342,17 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { - theta = find_one_row(Sigma, i, mu, maxiter) - return(theta) -} -# p = nrow(Sigma) -# basis_vector = rep(0, p) -# basis_vector[i] = 1. -# theta = rep(0, p) + theta = find_one_row(Sigma, i-1, mu, maxiter) -# val = .C("find_one_row", -# Sigma=as.double(Sigma), -# Sigma_diag=as.double(diag(Sigma)), -# Sigma_theta=as.double(rep(0, p)), -# ever_active=as.integer(i), -# nactive_ptr=as.integer(1), -# nrow=as.integer(p), -# bound=as.double(mu), -# theta=as.double(theta), -# maxiter=as.integer(50), -# row=as.integer(i-1), -# coord=as.integer(i-1), -# dup=FALSE, -# package="selectiveInference") + # Check feasibility -# # Check feasibility + if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { + warning("Solution for row of M does not seem to be feasible") + } -# if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { -# warning("Solution for row of M does not seem to be feasible") -# } + return(theta) -# return(val$theta) -# } +} InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { p <- nrow(sigma); diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 384c105..d8cd729 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -37,11 +37,11 @@ Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, (double *) Sigma_theta.begin(), (int *) ever_active.begin(), (int *) nactive.begin(), - (int *) &nrow, - (double *) &bound, + nrow, + bound, (double *) theta.begin(), - (int *) &maxiter, - (int *) &row); + maxiter, + row); return theta; } diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 7932b2e..f8ff6e3 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -210,19 +210,15 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_theta, /* Sigma times theta */ int *ever_active, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int *nrow_ptr, /* How many rows in Sigma */ - double *bound_ptr, /* feasibility parameter */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ double *theta, /* current value */ - int *maxiter_ptr, /* how many iterations */ - int *row_ptr) /* which coordinate to update: 0-based */ + int maxiter, /* how many iterations */ + int row) /* which coordinate to update: 0-based */ { - int maxiter = *maxiter_ptr; int iter = 0; int icoord = 0; - int row = *row_ptr; - double bound = *bound_ptr; - int nrow = *nrow_ptr; fprintf(stderr, "starting now\n"); @@ -299,6 +295,5 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ old_value = new_value; } - *nrow_ptr = iter-1; } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 28d2a88..107d931 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -8,11 +8,11 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_theta, /* Sigma times theta */ int *ever_active, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int *nrow_ptr, /* How many rows in Sigma */ - double *bound_ptr, /* feasibility parameter */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ double *theta, /* current value */ - int *maxiter_ptr, /* how many iterations */ - int *row_ptr); /* which coordinate to update: 0-based */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ From 7920c8c5fd8564e5410104ac993303ceddb1205b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:27:06 -0700 Subject: [PATCH 13/55] BF: column major, removing print statements --- selectiveInference/src/debias.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index f8ff6e3..fd9be79 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -41,10 +40,8 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col_ptr = ((int *) ever_active + icol); active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); - - fprintf(stderr, "%d %d \n", active_row, active_col); - Sigma_ptr = ((double *) Sigma + nrow * active_row + active_col); + Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); } @@ -220,8 +217,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ int iter = 0; int icoord = 0; - fprintf(stderr, "starting now\n"); - double old_value = objective(Sigma, ever_active, nactive_ptr, @@ -252,7 +247,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ nrow, row, bound) == 1) { - fprintf(stderr, "ending in first KKT check\n"); break; } @@ -275,7 +269,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ nrow, row, bound) == 1) { - fprintf(stderr, "ending in second KKT check\n"); break; } @@ -288,7 +281,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ theta); if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { - fprintf(stderr, "ending in objective value check\n"); break; } From 6756ab2935c0dd93ab4bde228db31e09066b14a6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:28:14 -0700 Subject: [PATCH 14/55] changing name of function, evaluation of feasibility in Rcpp --- Makefile | 4 ++++ README.md | 5 ++--- selectiveInference/R/funs.fixed.R | 8 +++++--- selectiveInference/src/Rcpp-debias.cpp | 25 ++++++++++++++++++++----- 4 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..dfe6002 --- /dev/null +++ b/Makefile @@ -0,0 +1,4 @@ +Rcpp: + rm selectiveInference/src/RcppExports.cpp + rm selectiveInference/R/RcppExports.R + Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" \ No newline at end of file diff --git a/README.md b/README.md index cf855e1..0a60855 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,6 @@ Code in repo is under development and may be unstable. You will have to run -```R -library(Rcpp) -Rcpp::compileAttributes('selectiveInference') +``` +make Rcpp ``` \ No newline at end of file diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7314511..f2b9838 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -342,11 +342,13 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { - theta = find_one_row(Sigma, i-1, mu, maxiter) + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter) # C function uses 0-based indexing + theta = result$theta + feasible_val = result$feasible_val - # Check feasibility + # Check feasibility - if (max(abs(Sigma %*% val$theta - basis_vector)) > 1.01 * mu) { + if (feasible_val > 1.01 * mu) { warning("Solution for row of M does not seem to be feasible") } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index d8cd729..48f484f 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -2,10 +2,10 @@ #include // where find_one_row_void is defined // [[Rcpp::export]] -Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, - int row, // 0-based - double bound, - int maxiter) { +Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, + int row, // 0-based + double bound, + int maxiter) { int nrow = Sigma.nrow(); // number of features @@ -42,6 +42,21 @@ Rcpp::NumericVector find_one_row(Rcpp::NumericMatrix Sigma, (double *) theta.begin(), maxiter, row); + + // Check whether feasible + + double feasible_val = 0; + double val; + for (irow=0; irow feasible_val) { + feasible_val = fabs(val); + } + } - return theta; + return(Rcpp::List::create(Rcpp::Named("theta") = theta, + Rcpp::Named("feasible_val") = feasible_val)); } From 0e584354e49c2fb68039761a0d6b040c1e37c1b9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:37:00 -0700 Subject: [PATCH 15/55] RF: renamed C function, removing R solver for one row --- selectiveInference/R/funs.fixed.R | 82 ++------------------------ selectiveInference/src/Rcpp-debias.cpp | 25 ++++---- selectiveInference/src/debias.c | 22 +++---- selectiveInference/src/debias.h | 20 +++---- 4 files changed, 40 insertions(+), 109 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index f2b9838..d0bc861 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -159,8 +159,8 @@ sigma=NULL, alpha=0.1, hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - useC = TRUE - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, useC=useC) + + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -270,7 +270,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, useC = FALSE) { +InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ @@ -295,12 +295,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold incr <- 0; while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - useC = TRUE - if (useC == FALSE) { - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, threshold=threshold) - } else { - output <- InverseLinftyOneRowC(sigma, i, mu, maxiter=maxiter) - } + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter) beta <- output$optsol iter <- output$iter if (isgiven==1){ @@ -340,7 +335,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50) { result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter) # C function uses 0-based indexing theta = result$theta @@ -352,73 +347,8 @@ InverseLinftyOneRowC <- function (Sigma, i, mu, maxiter=50) { warning("Solution for row of M does not seem to be feasible") } - return(theta) - -} + return(result) -InverseLinftyOneRow <- function ( sigma, i, mu, maxiter=50, threshold=1e-2 ) { - p <- nrow(sigma); - rho <- max(abs(sigma[i,-i])) / sigma[i,i]; - mu0 <- rho/(1+rho); - beta <- rep(0,p); - - #if (mu >= mu0){ - # beta[i] <- (1-mu0)/sigma[i,i]; - # returnlist <- list("optsol" = beta, "iter" = 0); - # return(returnlist); - #} - - diff.norm2 <- 1; - last.norm2 <- 1; - iter <- 1; - iter.old <- 1; - beta[i] <- (1-mu0)/sigma[i,i]; - beta.old <- beta; - sigma.tilde <- sigma; - diag(sigma.tilde) <- 0; - vs <- -sigma.tilde%*%beta; - - while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ - - for (j in 1:p){ - oldval <- beta[j]; - v <- vs[j]; - if (j==i) - v <- v+1; - beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; - if (oldval != beta[j]){ - vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; - } - } - - iter <- iter + 1; - if (iter==2*iter.old){ - d <- beta - beta.old; - diff.norm2 <- sqrt(sum(d*d)); - last.norm2 <-sqrt(sum(beta*beta)); - iter.old <- iter; - beta.old <- beta; - if (iter>10) - vs <- -sigma.tilde%*%beta; - } - } - - returnlist <- list("optsol" = beta, "iter" = iter) - return(returnlist) -} - -SoftThreshold <- function( x, lambda ) { - # - # Standard soft thresholding - # - if (x>lambda){ - return (x-lambda);} - else { - if (x< (-lambda)){ - return (x+lambda);} - else { - return (0); } - } } ############################## diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 48f484f..bb8b5cd 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -32,16 +32,16 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Now call our C function - find_one_row_void((double *) Sigma.begin(), - (double *) Sigma_diag.begin(), - (double *) Sigma_theta.begin(), - (int *) ever_active.begin(), - (int *) nactive.begin(), - nrow, - bound, - (double *) theta.begin(), - maxiter, - row); + int iter = find_one_row_((double *) Sigma.begin(), + (double *) Sigma_diag.begin(), + (double *) Sigma_theta.begin(), + (int *) ever_active.begin(), + (int *) nactive.begin(), + nrow, + bound, + (double *) theta.begin(), + maxiter, + row); // Check whether feasible @@ -57,6 +57,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, } } - return(Rcpp::List::create(Rcpp::Named("theta") = theta, - Rcpp::Named("feasible_val") = feasible_val)); + return(Rcpp::List::create(Rcpp::Named("optsol") = theta, + Rcpp::Named("feasible_val") = feasible_val, + Rcpp::Named("iter") = iter)); } diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index fd9be79..d2f671d 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -202,16 +202,16 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n } -void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ - double *Sigma_diag, /* Diagonal entry of covariance matrix */ - double *Sigma_theta, /* Sigma times theta */ - int *ever_active, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row) /* which coordinate to update: 0-based */ +int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ + double *Sigma_diag, /* Diagonal entry of covariance matrix */ + double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row) /* which coordinate to update: 0-based */ { int iter = 0; @@ -286,6 +286,6 @@ void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ old_value = new_value; } - + return(iter); } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 107d931..48f9753 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -3,16 +3,16 @@ extern "C" { #endif /* __cplusplus */ -void find_one_row_void(double *Sigma, /* A covariance matrix: X^TX/n */ - double *Sigma_diag, /* Diagonal entry of covariance matrix */ - double *Sigma_theta, /* Sigma times theta */ - int *ever_active, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ +int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ + double *Sigma_diag, /* Diagonal entry of covariance matrix */ + double *Sigma_theta, /* Sigma times theta */ + int *ever_active, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ From 06143059f7732e98bd30f18976775a7a5954ba9b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:40:55 -0700 Subject: [PATCH 16/55] need to make Rcpp exports --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index aa61233..a3fdd1f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,8 +7,9 @@ r: - devel addons: apt: - packages: libmpfr-dev + packages: libmpfr-dev warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? + - make Rcpp - cd selectiveInference From ca60bc90d631c8723c031b6d7f8a2c748816a82a Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:44:51 -0700 Subject: [PATCH 17/55] ignore first two lines if status not OK? --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index dfe6002..5c55bb8 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ Rcpp: - rm selectiveInference/src/RcppExports.cpp - rm selectiveInference/R/RcppExports.R + - rm -f selectiveInference/src/RcppExports.cpp + - rm -f selectiveInference/R/RcppExports.R Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" \ No newline at end of file From 60f6cdea4e355d70b305bd61062371dedc4b7e06 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Wed, 16 Aug 2017 23:50:24 -0700 Subject: [PATCH 18/55] install Rcpp earlier --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a3fdd1f..21270e0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,5 +11,6 @@ addons: warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? + - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' - make Rcpp - cd selectiveInference From c1aef63975841d882e0a68ec4e84382b0b4c7cd6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 17 Aug 2017 00:26:42 -0700 Subject: [PATCH 19/55] using Rcpp for matrixcomps as well -- update1 and downdate1 -- examples now OK on Rcmd check --- selectiveInference/R/funs.common.R | 8 +---- selectiveInference/R/funs.lar.R | 9 +----- selectiveInference/src/Rcpp-matrixcomps.cpp | 34 ++++++++++++++++++++ selectiveInference/src/debias.h | 1 + selectiveInference/src/matrixcomps.c | 13 +++----- selectiveInference/src/matrixcomps.h | 12 +++++++ selectiveInference/src/symbols.rds | Bin 367 -> 0 bytes 7 files changed, 53 insertions(+), 24 deletions(-) create mode 100644 selectiveInference/src/Rcpp-matrixcomps.cpp create mode 100644 selectiveInference/src/matrixcomps.h delete mode 100644 selectiveInference/src/symbols.rds diff --git a/selectiveInference/R/funs.common.R b/selectiveInference/R/funs.common.R index 042e074..678d736 100644 --- a/selectiveInference/R/funs.common.R +++ b/selectiveInference/R/funs.common.R @@ -152,13 +152,7 @@ updateQR <- function(Q1,Q2,R,col) { n = ncol(Q1) k = ncol(Q2) - a = .C("update1", - Q2=as.double(Q2), - w=as.double(t(Q2)%*%col), - m=as.integer(m), - k=as.integer(k), - dup=FALSE, - package="selectiveInference") + a = update1_(as.matrix(Q2), t(Q2)%*%col, m, k) # Rcpp call Q2 = matrix(a$Q2,nrow=m) w = c(t(Q1)%*%col,a$w) diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index eb31b4e..a01b2d3 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -254,14 +254,7 @@ downdateQR <- function(Q1,Q2,R,col) { m = nrow(Q1) n = ncol(Q1) - a = .C("downdate1", - Q1=as.double(Q1), - R=as.double(R), - col=as.integer(col-1), - m=as.integer(m), - n=as.integer(n), - dup=FALSE, - package="selectiveInference") + a = downdate1_(as.matrix(Q1), R, col, m, n) # Rcpp call Q1 = matrix(a$Q1,nrow=m) R = matrix(a$R,nrow=n) diff --git a/selectiveInference/src/Rcpp-matrixcomps.cpp b/selectiveInference/src/Rcpp-matrixcomps.cpp new file mode 100644 index 0000000..045b4b7 --- /dev/null +++ b/selectiveInference/src/Rcpp-matrixcomps.cpp @@ -0,0 +1,34 @@ +#include // need to include the main Rcpp header file +#include // where update1, downdate1 are defined + +// [[Rcpp::export]] +Rcpp::List update1_(Rcpp::NumericMatrix Q2, + Rcpp::NumericVector w, + int m, + int k) { + + update1(Q2.begin(), + w.begin(), + m, + k); + + return(Rcpp::List::create(Rcpp::Named("Q2") = Q2, + Rcpp::Named("w") = w)); +} + +// [[Rcpp::export]] +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, + Rcpp::NumericMatrix R, + int j0, + int m, + int n) { + + downdate1(Q1.begin(), + R.begin(), + j0, + m, + n); + + return(Rcpp::List::create(Rcpp::Named("Q1") = Q1, + Rcpp::Named("R") = R)); +} diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 48f9753..ed14cf5 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -13,6 +13,7 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ double *theta, /* current value */ int maxiter, /* how many iterations */ int row); /* which coordinate to update: 0-based */ + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/matrixcomps.c b/selectiveInference/src/matrixcomps.c index 4a516a1..bec3506 100644 --- a/selectiveInference/src/matrixcomps.c +++ b/selectiveInference/src/matrixcomps.c @@ -53,11 +53,8 @@ void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, d // where Q1 is m x n and R is n x n. The other part of // the Q matrix, Q2 m x (m-n), isn't needed so it isn't // passed for efficiency -void downdate1(double *Q1, double *R, int *j0p, int *mp, int *np) { - int j0,m,n,j; - j0 = *j0p; - m = *mp; - n = *np; +void downdate1(double *Q1, double *R, int j0, int m, int n) { + int j; double c,s; for (j=j0+1; j=1; j--) { diff --git a/selectiveInference/src/matrixcomps.h b/selectiveInference/src/matrixcomps.h new file mode 100644 index 0000000..8632e3e --- /dev/null +++ b/selectiveInference/src/matrixcomps.h @@ -0,0 +1,12 @@ +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +void update1(double *Q2, double *w, int m, int k); + +void downdate1(double *Q1, double *R, int j0, int m, int n); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/selectiveInference/src/symbols.rds b/selectiveInference/src/symbols.rds deleted file mode 100644 index 06b0e85e927ffbb186121204a3be110d68965ede..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 367 zcmV-#0g(P5iwFP!000001HF`8PlGTNhRes4%`92sg}43!BMkib8)hcnHDxQak_6JIPWlgp?6ULIF7c6t)_l2M@%=qRk*KdBZ0#Ng2zV>P#g%js+AFbJLu%b;B1>Aj(q=rLtRA?|{WKk$T84 z()a@`zE^&?C?~o(^2)7UXU|@{XM1 Date: Thu, 17 Aug 2017 12:20:11 -0700 Subject: [PATCH 20/55] using a warm start if available --- selectiveInference/R/funs.fixed.R | 28 ++++++++++++++++------ selectiveInference/src/Rcpp-debias.cpp | 33 ++++++++++---------------- selectiveInference/src/debias.h | 15 ++++++++---- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index d0bc861..592d61d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -293,10 +293,13 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold mu.stop <- 0; try.no <- 1; incr <- 0; + + output = NULL + while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter) - beta <- output$optsol + output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start + beta <- output$soln iter <- output$iter if (isgiven==1){ mu.stop <- 1 @@ -335,15 +338,26 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { + + # If soln_result is not Null, it is used as a warm start. + # It should be a list + # with entries "soln" and "Sigma_soln" + + if (is.null(soln_result)) { + soln = rep(0, nrow(Sigma)) + Sigma_soln = rep(0, nrow(Sigma)) + } + else { + soln = soln_result$soln + Sigma_soln = soln_result$Sigma_soln + } - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter) # C function uses 0-based indexing - theta = result$theta - feasible_val = result$feasible_val + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, Sigma_soln) # C function uses 0-based indexing # Check feasibility - if (feasible_val > 1.01 * mu) { + if (!result$kkt_check) { warning("Solution for row of M does not seem to be feasible") } diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index bb8b5cd..b529c4a 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -5,7 +5,9 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, int row, // 0-based double bound, - int maxiter) { + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector Sigma_theta) { int nrow = Sigma.nrow(); // number of features @@ -25,11 +27,6 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, sigma_p[irow] = Sigma(irow, irow); } - // The solution and its product with Sigma - - Rcpp::NumericVector theta(nrow); - Rcpp::NumericVector Sigma_theta(nrow); - // Now call our C function int iter = find_one_row_((double *) Sigma.begin(), @@ -45,19 +42,15 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Check whether feasible - double feasible_val = 0; - double val; - for (irow=0; irow feasible_val) { - feasible_val = fabs(val); - } - } + int kkt_check = check_KKT(theta.begin(), + Sigma_theta.begin(), + nrow, + row, + bound); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("Sigma_soln") = Sigma_theta, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check)); - return(Rcpp::List::create(Rcpp::Named("optsol") = theta, - Rcpp::Named("feasible_val") = feasible_val, - Rcpp::Named("iter") = iter)); } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index ed14cf5..958df0e 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -8,11 +8,18 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_theta, /* Sigma times theta */ int *ever_active, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ + +int check_KKT(double *theta, /* current theta */ + double *Sigma_theta, /* Sigma times theta */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound); /* Lagrange multipler for \ell_1 */ + #ifdef __cplusplus } /* extern "C" */ From a8805b8e0685773ca49f44832cff6a6d1db88906 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Thu, 17 Aug 2017 17:24:20 -0700 Subject: [PATCH 21/55] trying to find segfault --- selectiveInference/R/funs.fixed.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 592d61d..4724f08 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -298,6 +298,10 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + if (!is.null(output)) { + print(c('at least second try', try.no)) + } + print(output) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter From abf03513c56d897078132d8922b1d03c95ff43db Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 08:14:31 -0700 Subject: [PATCH 22/55] BF: order of arguments to is_active, renamed the function as well --- selectiveInference/src/debias.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index d2f671d..5701b4c 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,3 +1,4 @@ +#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -41,6 +42,7 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); + fprintf(stderr, "%d %d %d\n", active_row, active_col, nactive); Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); @@ -54,9 +56,11 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ return(value); } -int is_active(int coord, - int *nactive_ptr, - int *ever_active) { +// Check if active and add it to active list if necessary + +int update_ever_active(int coord, + int *ever_active, + int *nactive_ptr) { int iactive; int active_var; int nactive = *nactive_ptr; @@ -65,9 +69,17 @@ int is_active(int coord, for (iactive=0; iactive Date: Fri, 18 Aug 2017 08:24:49 -0700 Subject: [PATCH 23/55] removing debug statements --- selectiveInference/R/funs.fixed.R | 4 --- selectiveInference/src/debias.c | 50 ++++++++++++++++++------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 4724f08..592d61d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -298,10 +298,6 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - if (!is.null(output)) { - print(c('at least second try', try.no)) - } - print(output) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 5701b4c..08c46ff 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -42,7 +41,6 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); - fprintf(stderr, "%d %d %d\n", active_row, active_col, nactive); Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); @@ -70,11 +68,12 @@ int update_ever_active(int coord, active_var = (*ever_active_ptr); if (active_var == coord) { - fprintf(stderr, "%d %d before\n", *nactive_ptr); + // Add it to the active set and increment the + // number of active variables + ever_active_ptr = ((int *) ever_active + *nactive_ptr); *ever_active_ptr = coord; *nactive_ptr += 1; - fprintf(stderr, "%d %d after\n", *nactive_ptr, coord); return(1); } @@ -126,7 +125,6 @@ int check_KKT(double *theta, /* current theta */ return(fail == 0); } - double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ double *Sigma_diag, /* Diagonal entries of Sigma */ double *Sigma_theta, /* Sigma times theta */ @@ -136,7 +134,8 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n double bound, /* feasibility parameter */ double *theta, /* current value */ int row, /* which row: 0-based */ - int coord) /* which coordinate to update: 0-based */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ { double delta; @@ -185,7 +184,9 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n // Add to active set if necessary - update_ever_active(coord, ever_active, nactive_ptr); + if (!is_active) { + update_ever_active(coord, ever_active, nactive_ptr); + } // Update the linear term @@ -224,6 +225,8 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ int iter = 0; int icoord = 0; + int iactive = 0; + int *active_ptr; double old_value = objective(Sigma, ever_active, @@ -237,19 +240,24 @@ int find_one_row_(double *Sigma, /* A covariance matrix: X^TX/n */ for (iter=0; iter Date: Fri, 18 Aug 2017 08:43:12 -0700 Subject: [PATCH 24/55] BF: percent signs in example --- selectiveInference/man/fixedLassoInf.Rd | 35 +++++++++++++++++++++---- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index d675c3f..3633524 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -145,7 +145,7 @@ p = 10 sigma = 1 x = matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE) +x = scale(x,TRUE,TRUE) beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) @@ -165,10 +165,10 @@ out ## as above, but use lar function instead to get initial ## lasso fit (should get same results) - lfit = lar(x,y,normalize=FALSE) - beta = coef(lfit,s=lambda,mode="lambda") - out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) - out2 +lfit = lar(x,y,normalize=FALSE) +beta = coef(lfit,s=lambda,mode="lambda") +out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out2 ## mimic different penalty factors by first scaling x set.seed(43) @@ -249,5 +249,30 @@ status=sample(c(0,1),size=n,replace=TRUE) # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") out + +# Debiased lasso or "full" + +n = 50 +p = 100 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = 2.8 +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) +out + } \ No newline at end of file From 7360386212cb047287211545c3eca4f8e8b477f4 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 09:02:24 -0700 Subject: [PATCH 25/55] smaller changes in mu per step in line search --- selectiveInference/R/funs.fixed.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 592d61d..db5b915 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -270,8 +270,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { - # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { isgiven <- 1; if (is.null(mu)){ isgiven <- 0; From a9a8bbbd4208086426a29ea3cd7dbaaacb807426 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 09:40:54 -0700 Subject: [PATCH 26/55] renaming Sigma_theta to gradient, making sure we loop through active set --- selectiveInference/src/debias.c | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 08c46ff..5b1592e 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -65,6 +65,7 @@ int update_ever_active(int coord, int *ever_active_ptr = ever_active; for (iactive=0; iactive Date: Fri, 18 Aug 2017 09:50:51 -0700 Subject: [PATCH 27/55] more renaming --- selectiveInference/src/debias.c | 36 ++++++++++++++++----------------- selectiveInference/src/debias.h | 14 ++++++------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 5b1592e..e043934 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -11,7 +11,7 @@ // Update one coordinate double objective(double *Sigma, /* A covariance matrix: X^TX/n */ - int *ever_active, /* Ever active set: 0-based */ + int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ int row, /* which row: 0-based */ @@ -31,13 +31,13 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ for (irow=0; irow Date: Fri, 18 Aug 2017 10:11:38 -0700 Subject: [PATCH 28/55] BF: forgot to rename in Rcpp file, passing active and ever_active from R --- selectiveInference/R/funs.fixed.R | 14 ++++++++++---- selectiveInference/src/Rcpp-debias.cpp | 23 ++++++++++++----------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index db5b915..460cd0b 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -341,18 +341,24 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { # If soln_result is not Null, it is used as a warm start. # It should be a list - # with entries "soln" and "Sigma_soln" + # with entries "soln", "gradient", "ever_active", "nactive" if (is.null(soln_result)) { soln = rep(0, nrow(Sigma)) - Sigma_soln = rep(0, nrow(Sigma)) + gradient = rep(0, nrow(Sigma)) + ever_active = rep(0, nrow(Sigma)) + ever_active[1] = i-1 # 0-based + ever_active = as.integer(ever_active) + nactive = as.integer(1) } else { soln = soln_result$soln - Sigma_soln = soln_result$Sigma_soln + gradient = soln_result$gradient + ever_active = as.integer(soln_result$ever_active) + nactive = as.integer(soln_result$nactive) } - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, Sigma_soln) # C function uses 0-based indexing + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index b529c4a..b6c80d2 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -7,31 +7,30 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, - Rcpp::NumericVector Sigma_theta) { + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive + ) { int nrow = Sigma.nrow(); // number of features // Active set int irow; - Rcpp::IntegerVector nactive(1); // An array so we can easily modify it - Rcpp::IntegerVector ever_active(1); - int *ever_active_p = ever_active.begin(); - *ever_active_p = row; // Extract the diagonal Rcpp::NumericVector Sigma_diag(nrow); - double *sigma_p = Sigma_diag.begin(); + double *sigma_diag_p = Sigma_diag.begin(); for (irow=0; irow Date: Fri, 18 Aug 2017 10:46:39 -0700 Subject: [PATCH 29/55] more renaming --- selectiveInference/src/debias.c | 44 +++++++++++++++++++-------------- selectiveInference/src/debias.h | 18 +++++++------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index e043934..1b841e0 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -10,7 +10,7 @@ // Therefore we don't have to negate the answer to get theta. // Update one coordinate -double objective(double *Sigma, /* A covariance matrix: X^TX/n */ +double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ @@ -20,7 +20,7 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ { int irow, icol; double value = 0; - double *Sigma_ptr = Sigma; + double *Sigma_ptr_tmp = Sigma_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; int active_row, active_col; @@ -41,9 +41,9 @@ double objective(double *Sigma, /* A covariance matrix: X^TX/n */ active_col = *active_col_ptr; theta_col_ptr = ((double *) theta + active_col); - Sigma_ptr = ((double *) Sigma + nrow * active_col + active_row); // Matrices are column-major order + Sigma_ptr_tmp = ((double *) Sigma_ptr + nrow * active_col + active_row); // Matrices are column-major order - value += 0.5 * (*Sigma_ptr) * (*theta_row_ptr) * (*theta_col_ptr); + value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); } value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term } @@ -126,8 +126,8 @@ int check_KKT(double *theta, /* current theta */ return(fail == 0); } -double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n */ - double *Sigma_diag, /* Diagonal entries of Sigma */ +double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ double *gradient_ptr, /* Sigma times theta */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ @@ -143,12 +143,12 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n double linear_term = 0; double value = 0; double old_value; - double *Sigma_ptr; + double *Sigma_ptr_tmp; double *gradient_ptr_tmp; double *theta_ptr; int icol = 0; - double *quadratic_ptr = ((double *) Sigma_diag + coord); + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); double quadratic_term = *quadratic_ptr; // int *ever_active_ptr_tmp; @@ -194,13 +194,13 @@ double update_one_coord(double *Sigma, /* A covariance matrix: X^TX/n if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { delta = value - old_value; - Sigma_ptr = ((double *) Sigma + coord * nrow); + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); gradient_ptr_tmp = ((double *) gradient_ptr); for (icol=0; icol Date: Fri, 18 Aug 2017 11:28:31 -0700 Subject: [PATCH 30/55] trying to get arbitrary linfunc --- selectiveInference/R/funs.fixed.R | 12 +++- selectiveInference/src/Rcpp-debias.cpp | 2 + selectiveInference/src/debias.c | 87 +++++++++++++++----------- selectiveInference/src/debias.h | 3 +- 4 files changed, 65 insertions(+), 39 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 460cd0b..0b6c018 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -297,6 +297,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta + #print(c("trying ", try.no)) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter @@ -358,7 +359,16 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { nactive = as.integer(soln_result$nactive) } - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + + #print(c(soln, "soln")) + #print(c(gradient, "grad")) + #print(c(ever_active, "ever_active")) + #print(c(nactive, "nactive")) + #print(c(linear_func, "linear_func")) + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index b6c80d2..83dd09c 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -7,6 +7,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive @@ -29,6 +30,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Now call our C function int iter = find_one_row_((double *) Sigma.begin(), + (double *) linear_func.begin(), (double *) Sigma_diag.begin(), (double *) gradient.begin(), (int *) ever_active.begin(), diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 1b841e0..0b34436 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,3 +1,4 @@ +#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -11,16 +12,18 @@ // Update one coordinate double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { int irow, icol; double value = 0; double *Sigma_ptr_tmp = Sigma_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; double *theta_row_ptr, *theta_col_ptr; int *active_row_ptr, *active_col_ptr; int active_row, active_col; @@ -45,12 +48,15 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ value += 0.5 * (*Sigma_ptr_tmp) * (*theta_row_ptr) * (*theta_col_ptr); } - value = value + bound * fabs((*theta_row_ptr)); // the \ell_1 term - } + value += bound * fabs((*theta_row_ptr)); // the \ell_1 term + + // The linear term in the objective - theta_row_ptr = ((double *) theta + row); - value -= (*theta_row_ptr); // the elementary basis vector term + value += (*linear_func_ptr_tmp) * (*theta_row_ptr); + linear_func_ptr_tmp++; + } + return(value); } @@ -66,19 +72,22 @@ int update_ever_active(int coord, for (iactive=0; iactive Date: Fri, 18 Aug 2017 12:09:13 -0700 Subject: [PATCH 31/55] can solve arbitrary LASSO now --- selectiveInference/R/funs.fixed.R | 11 +++++---- selectiveInference/src/Rcpp-debias.cpp | 1 + selectiveInference/src/debias.c | 32 +++++++------------------- selectiveInference/src/debias.h | 6 ++--- 4 files changed, 18 insertions(+), 32 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0b6c018..06c7a7d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -346,28 +346,29 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { if (is.null(soln_result)) { soln = rep(0, nrow(Sigma)) - gradient = rep(0, nrow(Sigma)) ever_active = rep(0, nrow(Sigma)) ever_active[1] = i-1 # 0-based ever_active = as.integer(ever_active) nactive = as.integer(1) + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func } else { soln = soln_result$soln gradient = soln_result$gradient ever_active = as.integer(soln_result$ever_active) nactive = as.integer(soln_result$nactive) + linear_func = soln_result$linear_func } - linear_func = rep(0, p) - linear_func[i] = -1 - linear_func = as.numeric(linear_func) - #print(c(soln, "soln")) #print(c(gradient, "grad")) #print(c(ever_active, "ever_active")) #print(c(nactive, "nactive")) #print(c(linear_func, "linear_func")) + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 83dd09c..be02abd 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -51,6 +51,7 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, + Rcpp::Named("linear_func") = linear_func, Rcpp::Named("iter") = iter, Rcpp::Named("kkt_check") = kkt_check, Rcpp::Named("ever_active") = ever_active, diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 0b34436..b0669f8 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -5,7 +5,8 @@ // Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf -// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1 +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below // This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf // Therefore we don't have to negate the answer to get theta. @@ -16,7 +17,6 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ double bound, /* Lagrange multipler for \ell_1 */ double *theta) /* current value */ { @@ -52,8 +52,8 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ // The linear term in the objective + linear_func_ptr_tmp = ((double *) linear_func_ptr + active_row); value += (*linear_func_ptr_tmp) * (*theta_row_ptr); - linear_func_ptr_tmp++; } @@ -74,7 +74,6 @@ int update_ever_active(int coord, ever_active_ptr_tmp = ((int *) ever_active_ptr + iactive); active_var = *ever_active_ptr_tmp; if (active_var == coord) { - // fprintf(stderr, "blah %d %d %d\n", coord, active_var, *nactive_ptr); return(1); } } @@ -92,11 +91,10 @@ int update_ever_active(int coord, return(0); } -int check_KKT(double *theta, /* current theta */ +int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound) /* Lagrange multipler for \ell_1 */ + int nrow, /* how many rows in Sigma */ + double bound) /* Lagrange multipler for \ell_1 */ { // First check inactive @@ -113,9 +111,6 @@ int check_KKT(double *theta, /* current theta */ // Compute this coordinate of the gradient gradient = *gradient_ptr_tmp; - if (row == irow) { - gradient -= 1; - } if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound if ((*theta_ptr > 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { @@ -144,7 +139,6 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int row, /* which row: 0-based */ int coord, /* which coordinate to update: 0-based */ int is_active) /* Is this part of ever_active */ { @@ -161,8 +155,6 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); double quadratic_term = *quadratic_ptr; - // int *ever_active_ptr_tmp; - gradient_ptr_tmp = ((double *) gradient_ptr + coord); linear_term = *gradient_ptr_tmp; @@ -172,9 +164,8 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T // The coord entry of gradient_ptr term has a diagonal term in it: // Sigma[coord, coord] * theta[coord] // This removes it. - linear_term -= quadratic_term * old_value; - linear_term += *((double *) linear_func_ptr + coord); + linear_term -= quadratic_term * old_value; // Now soft-threshold the coord entry of theta @@ -229,8 +220,7 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row) /* which coordinate to update: 0-based */ + int maxiter) { int iter = 0; @@ -243,7 +233,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ ever_active_ptr, nactive_ptr, nrow, - row, bound, theta); double new_value; @@ -265,7 +254,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ nrow, bound, theta, - row, *active_ptr, 1); active_ptr++; @@ -276,7 +264,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT(theta, gradient_ptr, nrow, - row, bound) == 1) { break; } @@ -294,7 +281,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ nrow, bound, theta, - row, icoord, 0); } @@ -304,7 +290,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT(theta, gradient_ptr, nrow, - row, bound) == 1) { break; } @@ -314,7 +299,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ ever_active_ptr, nactive_ptr, nrow, - row, bound, theta); diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index c20195c..bb9f6d6 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -12,13 +12,13 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ + int maxiter) //, /* how many iterations */ +// int row); /* which coordinate to update: 0-based */ int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ + // int row, /* which row: 0-based */ double bound); /* Lagrange multipler for \ell_1 */ From d248a8f88c2e4e7e12a94369ae02edc95838a3f8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:17:44 -0700 Subject: [PATCH 32/55] BF: updating active set --- selectiveInference/src/debias.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 1b841e0..107ae5d 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -69,17 +69,20 @@ int update_ever_active(int coord, active_var = (*ever_active_ptr_tmp); if (active_var == coord) { - // Add it to the active set and increment the - // number of active variables - - ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); - *ever_active_ptr_tmp = coord; - *nactive_ptr += 1; - return(1); } } + // If we have not returned yet, this variable + // was not in ever_active + + // Add it to the active set and increment the + // number of active variables + + ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); + *ever_active_ptr_tmp = coord; + *nactive_ptr += 1; + return(0); } From aa515c972c8e1f918d6572338eae25efc28af16f Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:24:22 -0700 Subject: [PATCH 33/55] cosmetic change --- selectiveInference/src/debias.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 107ae5d..532dbef 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -68,7 +68,6 @@ int update_ever_active(int coord, ever_active_ptr_tmp = ((int *) ever_active_ptr + iactive); active_var = (*ever_active_ptr_tmp); if (active_var == coord) { - return(1); } } @@ -188,7 +187,7 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T // Add to active set if necessary - if (!is_active) { + if (is_active == 0) { update_ever_active(coord, ever_active_ptr, nactive_ptr); } From 5d5e8f3c4a0d1c0ed70097d3480f778a208c36f9 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:30:59 -0700 Subject: [PATCH 34/55] BF: fixing function signatures --- selectiveInference/src/debias.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index bb9f6d6..730b1fb 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -12,13 +12,11 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter) //, /* how many iterations */ -// int row); /* which coordinate to update: 0-based */ + int maxiter); int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ - // int row, /* which row: 0-based */ double bound); /* Lagrange multipler for \ell_1 */ From d1debcd5bcd7932fdff213b882d83a8b47c1a875 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 12:31:30 -0700 Subject: [PATCH 35/55] BF: fixing function signatures --- selectiveInference/src/Rcpp-debias.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index be02abd..db6e77e 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -38,15 +38,13 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, nrow, bound, (double *) theta.begin(), - maxiter, - row); + maxiter); // Check whether feasible int kkt_check = check_KKT(theta.begin(), gradient.begin(), nrow, - row, bound); return(Rcpp::List::create(Rcpp::Named("soln") = theta, From 15762af11f53be618e21c83af9db194db8f9a537 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:12:02 -0700 Subject: [PATCH 36/55] BF: KKT wrong for active conditions, still have debug statements --- selectiveInference/R/funs.fixed.R | 8 +--- selectiveInference/src/Rcpp-debias.cpp | 1 - selectiveInference/src/debias.c | 52 ++++++++++++++++++-------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 06c7a7d..e6e0c5f 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -363,13 +363,7 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { linear_func = soln_result$linear_func } - #print(c(soln, "soln")) - #print(c(gradient, "grad")) - #print(c(ever_active, "ever_active")) - #print(c(nactive, "nactive")) - #print(c(linear_func, "linear_func")) - - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing + result = find_one_row_debiasingM(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index db6e77e..1095c29 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -3,7 +3,6 @@ // [[Rcpp::export]] Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, - int row, // 0-based double bound, int maxiter, Rcpp::NumericVector theta, diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index b0669f8..6af42c4 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -100,34 +100,52 @@ int check_KKT(double *theta, /* current theta */ int irow; int fail = 0; - double tol = 1.e-4; - double *theta_ptr, *gradient_ptr_tmp; + double tol = 1.e-5; + double *theta_ptr = theta; + double *gradient_ptr_tmp = gradient_ptr; double gradient; for (irow=0; irow 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { - fail += 1; + fprintf(stderr, "how does it look %d %f %f\n", irow, *theta_ptr, gradient); + + // Compute this coordinate of the gradient + + if (fabs(*theta_ptr) > tol) { // these coordinates of gradients should be equal to \pm bound + fprintf(stderr, "active %f %f\n", fabs(fabs(gradient) - bound), bound); + if (fabs(fabs(gradient) - bound) > tol * bound) { + fprintf(stderr, "here1 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > (1. + tol) * bound)) { - fail += 1; + else if ((*theta_ptr > 0) && (gradient > 0)) { + fprintf(stderr, "here2 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; + } + else if ((*theta_ptr < 0) && (gradient < 0)) { + fprintf(stderr, "here3 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; } } else { + fprintf(stderr, "before4 %d %f %f\n", irow, *theta_ptr, gradient); if (fabs(gradient) > (1. + tol) * bound) { - fail += 1; + fprintf(stderr, "here4 %d %f %f\n", irow, *theta_ptr, gradient); + return(0); + // fail += 1; } } + theta_ptr++; + gradient_ptr_tmp++; } + fprintf(stderr, "OK now\n"); return(fail == 0); + } double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ @@ -236,7 +254,7 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); double new_value; - double tol=1.e-5; + double tol=1.e-8; for (iter=0; iter 0)) { - break; - } +/* if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */ +/* break; */ +/* } */ old_value = new_value; } From 9872b45ae05b79358c17effeaf7b7b6cca395fe7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:19:21 -0700 Subject: [PATCH 37/55] removing debug statements --- selectiveInference/src/debias.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 6af42c4..c9809e3 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -109,41 +108,28 @@ int check_KKT(double *theta, /* current theta */ gradient = *gradient_ptr_tmp; - fprintf(stderr, "how does it look %d %f %f\n", irow, *theta_ptr, gradient); - // Compute this coordinate of the gradient if (fabs(*theta_ptr) > tol) { // these coordinates of gradients should be equal to \pm bound - fprintf(stderr, "active %f %f\n", fabs(fabs(gradient) - bound), bound); if (fabs(fabs(gradient) - bound) > tol * bound) { - fprintf(stderr, "here1 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } else if ((*theta_ptr > 0) && (gradient > 0)) { - fprintf(stderr, "here2 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } else if ((*theta_ptr < 0) && (gradient < 0)) { - fprintf(stderr, "here3 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } } else { - fprintf(stderr, "before4 %d %f %f\n", irow, *theta_ptr, gradient); if (fabs(gradient) > (1. + tol) * bound) { - fprintf(stderr, "here4 %d %f %f\n", irow, *theta_ptr, gradient); return(0); - // fail += 1; } } theta_ptr++; gradient_ptr_tmp++; } - fprintf(stderr, "OK now\n"); return(fail == 0); } @@ -283,7 +269,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, bound) == 1) { - fprintf(stderr, "here5 \n"); break; } @@ -310,7 +295,6 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, bound) == 1) { - fprintf(stderr, "here6 \n"); break; } @@ -322,9 +306,9 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); -/* if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */ -/* break; */ -/* } */ + if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { + break; + } old_value = new_value; } From be84df2690dc8a62f543693d80ff44dd2a751418 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:23:09 -0700 Subject: [PATCH 38/55] BF: KKT condition was wrong --- selectiveInference/src/debias.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 532dbef..0d71e49 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -95,7 +95,7 @@ int check_KKT(double *theta, /* current theta */ int irow; int fail = 0; - double tol = 1.e-4; + double tol = 1.e-6; double *theta_ptr, *gradient_ptr_tmp; double gradient; @@ -111,10 +111,10 @@ int check_KKT(double *theta, /* current theta */ } if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound - if ((*theta_ptr > 0) && (fabs(gradient + bound) > (1. + tol) * bound)) { + if ((*theta_ptr > 0) && (fabs(gradient + bound) > tol * bound)) { fail += 1; } - else if ((*theta_ptr < 0) && (fabs(gradient - bound) > (1. + tol) * bound)) { + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { fail += 1; } } From 0de59af0c8f90410ae621d981b5458e3b38e1b99 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:28:35 -0700 Subject: [PATCH 39/55] lenient tolerance --- selectiveInference/src/debias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index c9809e3..52daea9 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -99,7 +99,7 @@ int check_KKT(double *theta, /* current theta */ int irow; int fail = 0; - double tol = 1.e-5; + double tol = 1.e-4; double *theta_ptr = theta; double *gradient_ptr_tmp = gradient_ptr; double gradient; From 190c0a0c1a491a265b203c258247b288e9bbaff7 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:35:18 -0700 Subject: [PATCH 40/55] added a few targets to makefile --- Makefile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5c55bb8..91a89a4 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,14 @@ Rcpp: - rm -f selectiveInference/src/RcppExports.cpp - rm -f selectiveInference/R/RcppExports.R - Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" \ No newline at end of file + Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" + +install: Rcpp + R CMD install selectiveInference + +build: + R CMD build selectiveInference + +check: Rcpp build + R CMD build selectiveInference + R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable \ No newline at end of file From 2c4cf3a19487cdcfcc7ad9912894c3238d35130b Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 16:43:41 -0700 Subject: [PATCH 41/55] not checking objective --- selectiveInference/src/debias.c | 71 +++++++++++++++------------------ 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 52daea9..92e2fb4 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -90,34 +90,32 @@ int update_ever_active(int coord, return(0); } -int check_KKT(double *theta, /* current theta */ +int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ - double bound) /* Lagrange multipler for \ell_1 */ + int nrow, /* how many rows in Sigma */ + double bound) /* Lagrange multipler for \ell_1 */ { // First check inactive int irow; int fail = 0; - double tol = 1.e-4; - double *theta_ptr = theta; - double *gradient_ptr_tmp = gradient_ptr; + double tol = 1.e-6; + double *theta_ptr, *gradient_ptr_tmp; double gradient; for (irow=0; irow tol) { // these coordinates of gradients should be equal to \pm bound - if (fabs(fabs(gradient) - bound) > tol * bound) { - return(0); - } - else if ((*theta_ptr > 0) && (gradient > 0)) { + gradient = *gradient_ptr_tmp; + + if (*theta_ptr != 0) { // these coordinates of gradients should be equal to -bound + if ((*theta_ptr > 0) && (fabs(gradient + bound) > tol * bound)) { return(0); } - else if ((*theta_ptr < 0) && (gradient < 0)) { + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { return(0); } } @@ -126,12 +124,9 @@ int check_KKT(double *theta, /* current theta */ return(0); } } - theta_ptr++; - gradient_ptr_tmp++; } - return(fail == 0); - + return(1); } double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ @@ -232,13 +227,13 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int iactive = 0; int *active_ptr; - double old_value = objective(Sigma_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - bound, - theta); +/* double old_value = objective(Sigma_ptr, */ +/* linear_func_ptr, */ +/* ever_active_ptr, */ +/* nactive_ptr, */ +/* nrow, */ +/* bound, */ +/* theta); */ double new_value; double tol=1.e-8; @@ -298,19 +293,19 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ break; } - new_value = objective(Sigma_ptr, - linear_func_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - bound, - theta); - - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { - break; - } - - old_value = new_value; +/* new_value = objective(Sigma_ptr, */ +/* linear_func_ptr, */ +/* ever_active_ptr, */ +/* nactive_ptr, */ +/* nrow, */ +/* bound, */ +/* theta); */ + +/* if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { */ +/* break; */ +/* } */ + +// old_value = new_value; } return(iter); } From 9ebcaabfce17a0cb7e4d48506b0695678ec495d8 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:00:52 -0700 Subject: [PATCH 42/55] making Rcpp in Makevars now --- selectiveInference/src/Makevars | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars index c68d0bd..cde36c9 100644 --- a/selectiveInference/src/Makevars +++ b/selectiveInference/src/Makevars @@ -2,7 +2,10 @@ PKG_CFLAGS= -I. PKG_CPPFLAGS= -I. PKG_LIBS=-L. -$(SHLIB): Rcpp-debias.o RcppExports.o debias.o +$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o debias.o clean: rm -f *o + +Rcpp: + Rscript -e "library(Rcpp); Rcpp::compileAttributes('..')" \ No newline at end of file From 735c4d6742b8782c7f1e8af328c0ab3bf3fca2e5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:01:44 -0700 Subject: [PATCH 43/55] trying without making Rcpp --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 21270e0..1120a44 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,5 +12,5 @@ warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' - - make Rcpp + #- make Rcpp - cd selectiveInference From 0865d09eebbbf1e2f8533f1badf09bb0ba04acd6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:26:02 -0700 Subject: [PATCH 44/55] making new quadratic_program C file --- selectiveInference/src/Rcpp-debias.cpp | 20 +- selectiveInference/src/debias.h | 21 +- selectiveInference/src/quadratic_program.c | 319 +++++++++++++++++++++ 3 files changed, 339 insertions(+), 21 deletions(-) create mode 100644 selectiveInference/src/quadratic_program.c diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index 1095c29..c1b5e91 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -28,16 +28,16 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Now call our C function - int iter = find_one_row_((double *) Sigma.begin(), - (double *) linear_func.begin(), - (double *) Sigma_diag.begin(), - (double *) gradient.begin(), - (int *) ever_active.begin(), - (int *) nactive.begin(), - nrow, - bound, - (double *) theta.begin(), - maxiter); + int iter = solve_qp((double *) Sigma.begin(), + (double *) linear_func.begin(), + (double *) Sigma_diag.begin(), + (double *) gradient.begin(), + (int *) ever_active.begin(), + (int *) nactive.begin(), + nrow, + bound, + (double *) theta.begin(), + maxiter); // Check whether feasible diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 730b1fb..1284659 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -3,23 +3,22 @@ extern "C" { #endif /* __cplusplus */ -int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *linear_func_ptr, /* Linear term in objective */ - double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Current gradient of quadratic loss */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter); +int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter); int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ double bound); /* Lagrange multipler for \ell_1 */ - #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c new file mode 100644 index 0000000..cf4d650 --- /dev/null +++ b/selectiveInference/src/quadratic_program.c @@ -0,0 +1,319 @@ +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *Sigma_ptr_tmp = Sigma_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* Sigma times theta */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *Sigma_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr; + int icol = 0; + + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr = ((double *) theta + coord); + old_value = *theta_ptr; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // Sigma[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if (is_active == 0) { + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol 0)) { + break; + } + old_value = new_value; + } + } + return(iter); +} + From bfad5057a3b91d80ae3477e74f66e59f24e8db3d Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 18:27:05 -0700 Subject: [PATCH 45/55] need to make Rcpp for travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1120a44..21270e0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,5 +12,5 @@ warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' - #- make Rcpp + - make Rcpp - cd selectiveInference From 1237c78d74ca87edec5b9e6496288618eca6d7d5 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Fri, 18 Aug 2017 22:29:06 -0700 Subject: [PATCH 46/55] both solvers working --- selectiveInference/R/funs.fixed.R | 11 ++-- selectiveInference/src/Rcpp-debias.cpp | 75 +++++++++++++++++++--- selectiveInference/src/debias.c | 62 ++++++++++-------- selectiveInference/src/debias.h | 18 ++++++ selectiveInference/src/quadratic_program.c | 9 +-- 5 files changed, 123 insertions(+), 52 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index df87b5e..de609e7 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -297,7 +297,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold while ((mu.stop != 1)&&(try.no<10)){ last.beta <- beta - print(c("#######################trying ", try.no)) + #print(c("#######################trying ", try.no)) output <- InverseLinftyOneRow(sigma, i, mu, maxiter=maxiter, soln_result=output) # uses a warm start beta <- output$soln iter <- output$iter @@ -365,11 +365,10 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { linear_func = soln_result$linear_func } - result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) # C function uses 0-based indexing - result2 = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive) # C function uses 0-based indexing - - print('close?') - print(c(sqrt(sum((result$soln-result2$soln)^2)/sum(result$soln^2)), sqrt(sum(result$soln^2)), result2$nactive)) + result = find_one_row_debiasingM(Sigma, i, mu, maxiter, 0 * soln, gradient, ever_active, nactive) # C function uses 0-based indexing + #result1 = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) + #print("close?") + #print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index c1b5e91..ce33854 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -2,15 +2,15 @@ #include // where find_one_row_void is defined // [[Rcpp::export]] -Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, - double bound, - int maxiter, - Rcpp::NumericVector theta, - Rcpp::NumericVector linear_func, - Rcpp::NumericVector gradient, - Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive - ) { +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive + ) { int nrow = Sigma.nrow(); // number of features @@ -41,14 +41,69 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, // Check whether feasible + int kkt_check = check_KKT_qp(theta.begin(), + gradient.begin(), + nrow, + bound); + + return(Rcpp::List::create(Rcpp::Named("soln") = theta, + Rcpp::Named("gradient") = gradient, + Rcpp::Named("linear_func") = linear_func, + Rcpp::Named("iter") = iter, + Rcpp::Named("kkt_check") = kkt_check, + Rcpp::Named("ever_active") = ever_active, + Rcpp::Named("nactive") = nactive)); + +} + +// [[Rcpp::export]] +Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, + int row, // 0-based + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive + ) { + + int nrow = Sigma.nrow(); // number of features + + // Active set + + int irow; + + // Extract the diagonal + Rcpp::NumericVector Sigma_diag(nrow); + double *sigma_diag_p = Sigma_diag.begin(); + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { - fail += 1; + return(0); } else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { - fail += 1; + return(0); } } else { if (fabs(gradient) > (1. + tol) * bound) { - fail += 1; + return(0); } } } - return(fail == 0); + return(1); } double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ @@ -230,16 +229,21 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int icoord = 0; int iactive = 0; int *active_ptr; + double old_value, new_value, tol=1.e-5; + + int check_objective = 1; + + if (check_objective) { + + old_value = objective(Sigma_ptr, + ever_active_ptr, + nactive_ptr, + nrow, + row, + bound, + theta); + } - double old_value = objective(Sigma_ptr, - ever_active_ptr, - nactive_ptr, - nrow, - row, - bound, - theta); - double new_value; - double tol=1.e-5; for (iter=0; iter 0)) { + break; + } - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { - break; + old_value = new_value; } - - old_value = new_value; } return(iter); } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 1284659..5e9d621 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -14,11 +14,29 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *theta, /* current value */ int maxiter); +int check_KKT_qp(double *theta, /* current theta */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int nrow, /* how many rows in Sigma */ + double bound); /* Lagrange multipler for \ell_1 */ + +int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *Sigma_ptr_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row); /* which coordinate to update: 0-based */ + int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ double bound); /* Lagrange multipler for \ell_1 */ + #ifdef __cplusplus } /* extern "C" */ #endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index c923398..50d6514 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -1,5 +1,4 @@ #include // for fabs -#include // Find an approximate row of \hat{Sigma}^{-1} @@ -82,8 +81,6 @@ int update_ever_active_qp(int coord, // Add it to the active set and increment the // number of active variables - fprintf(stderr, "adding %d\n", coord); - ever_active_ptr_tmp = ((int *) ever_active_ptr + *nactive_ptr); *ever_active_ptr_tmp = coord; *nactive_ptr += 1; @@ -99,7 +96,6 @@ int check_KKT_qp(double *theta, /* current theta */ // First check inactive int irow; - int fail = 0; double tol = 1.e-6; double *theta_ptr, *gradient_ptr_tmp; double gradient; @@ -228,13 +224,11 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int iactive = 0; int *active_ptr; - int check_objective = 0; + int check_objective = 1; double old_value, new_value; double tol=1.e-8; - fprintf(stderr, "%d nactive start\n", *nactive_ptr); - if (check_objective) { old_value = objective_qp(Sigma_ptr, @@ -250,7 +244,6 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ for (iter=0; iter Date: Sun, 20 Aug 2017 15:16:24 -0700 Subject: [PATCH 47/55] passing tol as argument; both solvers agree --- selectiveInference/R/funs.fixed.R | 18 ++++-- selectiveInference/src/Rcpp-debias.cpp | 22 ++++++-- selectiveInference/src/debias.c | 64 +++++++++++++--------- selectiveInference/src/debias.h | 18 ++++-- selectiveInference/src/quadratic_program.c | 21 ++++--- 5 files changed, 90 insertions(+), 53 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index de609e7..c8f5352 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -338,7 +338,7 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6) { # If soln_result is not Null, it is used as a warm start. # It should be a list @@ -365,10 +365,18 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL) { linear_func = soln_result$linear_func } - result = find_one_row_debiasingM(Sigma, i, mu, maxiter, 0 * soln, gradient, ever_active, nactive) # C function uses 0-based indexing - #result1 = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive) - #print("close?") - #print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) + soln1 = rep(0, p) + gradient1 = rep(0, p) + ever_active1 = rep(0, p) + ever_active1[1] = i-1 + nactive1 = as.integer(1) + result1 = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln1, gradient1, ever_active1, nactive1, kkt_tol, objective_tol) # C function uses 0-based indexing + result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) + print("close?") + print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) + print(c(result1$iter, result$iter, sum(result1$soln^2))) + + #result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing # Check feasibility diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp index ce33854..a78ae81 100644 --- a/selectiveInference/src/Rcpp-debias.cpp +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -9,7 +9,9 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol ) { int nrow = Sigma.nrow(); // number of features @@ -37,14 +39,17 @@ Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, nrow, bound, (double *) theta.begin(), - maxiter); + maxiter, + kkt_tol, + objective_tol); // Check whether feasible int kkt_check = check_KKT_qp(theta.begin(), gradient.begin(), nrow, - bound); + bound, + kkt_tol); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, @@ -64,7 +69,9 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, Rcpp::NumericVector theta, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, - Rcpp::IntegerVector nactive + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol ) { int nrow = Sigma.nrow(); // number of features @@ -92,7 +99,9 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, bound, (double *) theta.begin(), maxiter, - row); + row, + kkt_tol, + objective_tol); // Check whether feasible @@ -100,7 +109,8 @@ Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, gradient.begin(), nrow, row, - bound); + bound, + kkt_tol); return(Rcpp::List::create(Rcpp::Named("soln") = theta, Rcpp::Named("gradient") = gradient, diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index e083ccc..4d36056 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,3 +1,4 @@ +#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -67,6 +68,7 @@ int update_ever_active(int coord, for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { @@ -122,6 +127,7 @@ int check_KKT(double *theta, /* current theta */ return(0); } } + } return(1); @@ -129,15 +135,15 @@ int check_KKT(double *theta, /* current theta */ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ - double *gradient_ptr, /* Sigma times theta */ + double *gradient_ptr, /* Sigma times theta */ int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int row, /* which row: 0-based */ - int coord, /* which coordinate to update: 0-based */ - int is_active) /* Is this part of ever_active */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int row, /* which row: 0-based */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ { double delta; @@ -215,21 +221,23 @@ double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^T int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ - double *gradient_ptr, /* Sigma times theta */ + double *gradient_ptr, /* Sigma times theta */ int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* How many rows in Sigma */ - double bound, /* feasibility parameter */ - double *theta, /* current value */ - int maxiter, /* how many iterations */ - int row) /* which coordinate to update: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row, /* which coordinate to update: 0-based */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol) /* precision for checking relative decrease in objective value */ { int iter = 0; int icoord = 0; int iactive = 0; int *active_ptr; - double old_value, new_value, tol=1.e-5; + double old_value, new_value; int check_objective = 1; @@ -272,7 +280,8 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, row, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -299,7 +308,8 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ gradient_ptr, nrow, row, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -312,7 +322,7 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { + if (((old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { break; } diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h index 5e9d621..5510adf 100644 --- a/selectiveInference/src/debias.h +++ b/selectiveInference/src/debias.h @@ -12,15 +12,19 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter); + int maxiter, /* how many iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol); /* precision for checking relative decrease in objective value */ + int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ - double bound); /* Lagrange multipler for \ell_1 */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - double *Sigma_ptr_ptr, /* Diagonal entry of covariance matrix */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ double *gradient_ptr, /* Current gradient of quadratic loss */ int *ever_active_ptr, /* Ever active set: 0-based */ int *nactive_ptr, /* Size of ever active set */ @@ -28,14 +32,16 @@ int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ double bound, /* feasibility parameter */ double *theta, /* current value */ int maxiter, /* how many iterations */ - int row); /* which coordinate to update: 0-based */ + int row, /* which coordinate to update: 0-based */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol); /* precision for checking relative decrease in objective value */ int check_KKT(double *theta, /* current theta */ double *gradient_ptr, /* Current gradient of quadratic loss */ int nrow, /* how many rows in Sigma */ int row, /* which row: 0-based */ - double bound); /* Lagrange multipler for \ell_1 */ - + double bound, /* Lagrange multipler for \ell_1 */ + double kkt_tol); /* precision for checking KKT conditions */ #ifdef __cplusplus } /* extern "C" */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c index 50d6514..0cefac4 100644 --- a/selectiveInference/src/quadratic_program.c +++ b/selectiveInference/src/quadratic_program.c @@ -88,15 +88,15 @@ int update_ever_active_qp(int coord, return(0); } -int check_KKT_qp(double *theta, /* current theta */ +int check_KKT_qp(double *theta, /* current theta */ double *gradient_ptr, /* Sigma times theta */ - int nrow, /* how many rows in Sigma */ - double bound) /* Lagrange multipler for \ell_1 */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol) /* precision for checking KKT conditions */ { // First check inactive int irow; - double tol = 1.e-6; double *theta_ptr, *gradient_ptr_tmp; double gradient; @@ -216,7 +216,9 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int nrow, /* How many rows in Sigma */ double bound, /* feasibility parameter */ double *theta, /* current value */ - int maxiter) + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol) /* precision for checking relative decrease in objective value */ { int iter = 0; @@ -227,7 +229,6 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ int check_objective = 1; double old_value, new_value; - double tol=1.e-8; if (check_objective) { @@ -268,7 +269,8 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT_qp(theta, gradient_ptr, nrow, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -294,7 +296,8 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ if (check_KKT_qp(theta, gradient_ptr, nrow, - bound) == 1) { + bound, + kkt_tol) == 1) { break; } @@ -307,7 +310,7 @@ int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ bound, theta); - if (((old_value - new_value) < tol * fabs(new_value)) && (iter > 0)) { + if ((fabs(old_value - new_value) < objective_tol * fabs(new_value)) && (iter > 0)) { break; } old_value = new_value; From 8c956f84949acc42e65af548bd1c6b18ca31ddac Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sun, 20 Aug 2017 15:46:37 -0700 Subject: [PATCH 48/55] making a choice for solver --- selectiveInference/R/funs.fixed.R | 38 +++++++++++++++---------------- selectiveInference/src/debias.c | 2 -- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index c8f5352..2ddc3d5 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -26,8 +26,6 @@ sigma=NULL, alpha=0.1, else{ - - checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") @@ -338,7 +336,8 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold return(M) } -InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6) { +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6, + use_QP=TRUE) { # If soln_result is not Null, it is used as a warm start. # It should be a list @@ -352,31 +351,30 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt ever_active[1] = i-1 # 0-based ever_active = as.integer(ever_active) nactive = as.integer(1) - linear_func = rep(0, p) - linear_func[i] = -1 - linear_func = as.numeric(linear_func) - gradient = 1. * linear_func + if (use_QP) { + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func + } else { + gradient = rep(0, p) + } } else { soln = soln_result$soln gradient = soln_result$gradient ever_active = as.integer(soln_result$ever_active) nactive = as.integer(soln_result$nactive) - linear_func = soln_result$linear_func + if (use_QP) { + linear_func = soln_result$linear_func + } } - soln1 = rep(0, p) - gradient1 = rep(0, p) - ever_active1 = rep(0, p) - ever_active1[1] = i-1 - nactive1 = as.integer(1) - result1 = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln1, gradient1, ever_active1, nactive1, kkt_tol, objective_tol) # C function uses 0-based indexing - result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) - print("close?") - print(c(sqrt(sum((result1$soln-result$soln)^2)/sum(result$soln^2)), sum(result$soln^2))) - print(c(result1$iter, result$iter, sum(result1$soln^2))) - - #result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing + if (use_QP) { + result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) + } else { + result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing + } # Check feasibility diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index 4d36056..df9d0ea 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -1,4 +1,3 @@ -#include #include // for fabs // Find an approximate row of \hat{Sigma}^{-1} @@ -68,7 +67,6 @@ int update_ever_active(int coord, for (iactive=0; iactive Date: Sun, 20 Aug 2017 16:05:30 -0700 Subject: [PATCH 49/55] RF: ever_active is now 1-based indices to be more R like --- selectiveInference/R/funs.fixed.R | 4 +-- selectiveInference/src/debias.c | 42 +++++++++++----------- selectiveInference/src/quadratic_program.c | 18 +++++----- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 2ddc3d5..ae9fdc6 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -348,7 +348,7 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt if (is.null(soln_result)) { soln = rep(0, p) ever_active = rep(0, p) - ever_active[1] = i-1 # 0-based + ever_active[1] = i # 1-based ever_active = as.integer(ever_active) nactive = as.integer(1) if (use_QP) { @@ -373,7 +373,7 @@ InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt if (use_QP) { result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) } else { - result = find_one_row_debiasingM(Sigma, i-1, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 0-based indexing + result = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 1-based indexing for active set } # Check feasibility diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c index df9d0ea..3e3efb5 100644 --- a/selectiveInference/src/debias.c +++ b/selectiveInference/src/debias.c @@ -11,12 +11,12 @@ // Update one coordinate double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ - int *ever_active_ptr, /* Ever active set: 0-based */ - int *nactive_ptr, /* Size of ever active set */ - int nrow, /* how many rows in Sigma */ - int row, /* which row: 0-based */ - double bound, /* Lagrange multipler for \ell_1 */ - double *theta) /* current value */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 1-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ { int irow, icol; double value = 0; @@ -32,13 +32,13 @@ double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ for (irow=0; irow Date: Sun, 20 Aug 2017 16:39:12 -0700 Subject: [PATCH 50/55] added an argument to limit the number of times the linesearch runs --- selectiveInference/R/funs.fixed.R | 15 +++++++-------- selectiveInference/man/fixedLassoInf.Rd | 10 +++++++--- selectiveInference/src/Rcpp-debias.cpp | 3 +++ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index ae9fdc6..0ca60a3 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -3,9 +3,9 @@ # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, -sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=5) { family = match.arg(family) this.call = match.call() @@ -158,7 +158,7 @@ sigma=NULL, alpha=0.1, # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) + htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, max.try=linesearch.try) # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) @@ -268,7 +268,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, max.try=10) { isgiven <- 1; if (is.null(mu)){ isgiven <- 0; @@ -293,13 +293,12 @@ InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold output = NULL - while ((mu.stop != 1)&&(try.no<10)){ + while ((mu.stop != 1) && (try.no // need to include the main Rcpp header file #include // where find_one_row_void is defined +// Below, the gradient should be equal to Sigma * theta + linear_func!! +// No check is done on this. + // [[Rcpp::export]] Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, From acbf82cf120bdfbf2c55c2ff3898425f44d234c6 Mon Sep 17 00:00:00 2001 From: Jonathan Taylor Date: Sun, 20 Aug 2017 16:43:30 -0700 Subject: [PATCH 51/55] making it 10 tries as before --- selectiveInference/R/funs.fixed.R | 2 +- selectiveInference/man/fixedLassoInf.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 0ca60a3..b03fd4d 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -5,7 +5,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=5) { + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { family = match.arg(family) this.call = match.call() diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index 2f8517d..bd740f8 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -12,7 +12,7 @@ fixed value of the tuning parameter lambda fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", "cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=5) + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) } \arguments{ \item{x}{ From 1cc094ef7ad900c71cd376ecd65cf9e4b2df87ac Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 24 Oct 2017 21:54:18 -0700 Subject: [PATCH 52/55] Tried different eestimates of covariance --- selectiveInference/R/RcppExports.R | 19 ++ selectiveInference/R/funs.fixed.R | 360 ++++++++++++++----------- selectiveInference/R/funs.fixedLogit.R | 278 +++++++++++++------ selectiveInference/src/RcppExports.cpp | 89 ++++++ 4 files changed, 512 insertions(+), 234 deletions(-) create mode 100644 selectiveInference/R/RcppExports.R create mode 100644 selectiveInference/src/RcppExports.cpp diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R new file mode 100644 index 0000000..a65dc0b --- /dev/null +++ b/selectiveInference/R/RcppExports.R @@ -0,0 +1,19 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) { + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) +} + +find_one_row_debiasingM <- function(Sigma, row, bound, maxiter, theta, gradient, ever_active, nactive, kkt_tol, objective_tol) { + .Call('_selectiveInference_find_one_row_debiasingM', PACKAGE = 'selectiveInference', Sigma, row, bound, maxiter, theta, gradient, ever_active, nactive, kkt_tol, objective_tol) +} + +update1_ <- function(Q2, w, m, k) { + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) +} + +downdate1_ <- function(Q1, R, j0, m, n) { + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) +} + diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index b03fd4d..72cefdc 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -2,18 +2,18 @@ # for the solution of # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 -fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, +fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) { - + gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + family = match.arg(family) this.call = match.call() type = match.arg(type) if(family=="binomial") { - if(type!="partial") stop("Only type= partial allowed with binomial family") - out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + # if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type=type, tol.beta=tol.beta, tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) return(out) } @@ -26,31 +26,11 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co else{ + + checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") - - n = nrow(x) - p = ncol(x) - beta = as.numeric(beta) - if (type == "full") { - if (p > n) { - # need intercept (if there is one) for debiased lasso - hbeta = beta - if (intercept == T) { - if (length(beta) != p + 1) { - stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") - } - # remove intercept if included - beta = beta[-1] - } else if (length(beta) != p) { - stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") - } - } - } else if (length(beta) != p) { - stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") - } - checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { @@ -58,12 +38,53 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co bits = NULL } - if (!is.null(add.targets) && (!is.vector(add.targets) - || !all(is.numeric(add.targets)) || !all(add.targets==floor(add.targets)) - || !all(add.targets >= 1 && add.targets <= p))) { - stop("'add.targets' must be a vector of integers between 1 and p") + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } + } + + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + if (intercept == F & length(beta) != p) stop("Since intercept=FALSE, beta must have length equal to ncol(x)") + if (intercept == T & length(beta) != p+1) stop("Since intercept=TRUE, beta must have length equal to ncol(x)+1") + + + + if (intercept == T) { + bbeta = beta[-1] + # m=beta[-1]!=0 #active set + vars = which(abs(bbeta) > tol.beta * sqrt(n / colSums(x^2))) + xm=cbind(1,x[,vars]) + bhat=c(beta[1],bbeta[vars]) # intcpt plus active vars + } else { + bbeta = beta + # m=beta!=0 #active set + vars = which(abs(bbeta) > tol.beta * sqrt(n / colSums(x^2))) + bhat=bbeta[vars] # active vars + xm=x[,vars] + } + xnotm=x[,-vars] + + # vars = which(abs(bbeta) > tol.beta / sqrt(colSums(x^2))) + nvar = length(vars) + if(nvar==0){ + cat("Empty model",fill=T) + return() } + pv=vlo=vup=sd=rep(NA, nvar) + vmat = matrix(0,nvar,n) + ci=tailarea=matrix(NA,nvar,2) + # If glmnet was run with an intercept term, center x and y if (intercept==TRUE) { obj = standardize(x,y,TRUE,FALSE) @@ -71,36 +92,139 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co y = obj$y } - # Check the KKT conditions - g = t(x)%*%(y-x%*%beta) / lambda - if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + s2=sign(bhat) + + #check KKT + g = t(x)%*%(y-xm%*%bhat)/lambda # negative gradient scaled by lambda + # print(g[which(abs(g)>1)]) + if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) - tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) - # print(tol.coef) - vars = which(abs(beta) > tol.coef) - # print(beta) - # print(vars) - if(length(vars)==0){ - cat("Empty model",fill=T) - return() - } - if (any(sign(g[vars]) != sign(beta[vars]))) + if (any(sign(g[vars]) != sign(bbeta[vars]))) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances). You might try rerunning", "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - # Get lasso polyhedral region, of form Gy >= u - if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) - else out = fixedLasso.poly(x,y,beta,lambda,vars) - G = out$G - u = out$u + MM = pinv(crossprod(xm))*sigma^2 + # gradient at LASSO solution, first entry is 0 because intercept is unpenalized + # at exact LASSO solution it should be s2[-1] + if (intercept == T) gm = c(0,-g[vars]*lambda) + else gm = -g[vars]*lambda + + dbeta = MM%*%gm + + bbar = bhat - dbeta + + # print(length(bhat)) + + if (intercept == T) { + A1 = -(mydiag(s2))[-1,] + b1 = (s2*dbeta)[-1] + V = diag(length(bbar))[-1,] + } else { + A1 = -(mydiag(s2)) + b1 = (s2*dbeta) + V = diag(length(bbar)) + } + + null_value = rep(0,nvar) + + # if (p > n) { + if (type=="full") { + + + # Reorder so that active set is first + Xordered = cbind(xm,xnotm) + + hsigma <- 1/n*(t(Xordered)%*%Xordered) + + M <- InverseLinfty(hsigma,n,dim(xm)[2],verbose=F)[-1,] # remove intercept row + I <- diag(dim(xm)[2])[-1,] + if (intercept) Msubset <- M[,-c(1,vars+1)] + else Msubset <- M[,-vars] + B <- cbind(I,Msubset/sqrt(n)) + V = B + + c <- matrix(c(gm,t(xnotm)%*%xm%*%(-dbeta)),ncol=1) + d <- -dbeta[-1] # remove intercept + + null_value = -(M%*%c/sqrt(n) - d) + + A0 = matrix(0,ncol(xnotm),ncol(A1)) + A0 = cbind(A0,diag(nrow(A0))) + fill = matrix(0,nrow(A1),nrow(A0)) + A1 = cbind(A1,fill) + A1 = rbind(A1,A0,-A0) + + b1 = matrix(c(b1,rep(lambda,2*nrow(A0))),ncol=1) + + # full covariance + MMbr = (crossprod(xnotm) - t(xnotm)%*%xm%*%pinv(crossprod(xm))%*%t(xm)%*%xnotm)*sigma^2 + MM = cbind(MM,matrix(0,nrow(MM),ncol(MMbr))) + MMbr = cbind(matrix(0,nrow(MMbr),nrow(MM)),MMbr) + MM = rbind(MM,MMbr) + + # # pairs bootstrap estimate of full covariance + # boot.vec <- function(data, indices, bbar) { + # sample=data[indices,-1] + # y=data[indices,1] + # xa=sample[,1:length(bbar)] + # xnota=sample[,-c(1:length(bbar))] + # + # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) + # } + # + # R=100 + # boot.obj=boot(cbind(y,Xordered),boot.vec,R,parallel="multicore",bbar=bbar) + # boot.est=boot.obj$t + # boot.mean=colMeans(boot.est) + # boot.diff=t(boot.est-boot.mean) + # temp=apply(boot.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) + # term=array(0,dim=c(p+intercept,p+intercept,R)) + # for(i in 1:R) { + # term[,,i]=matrix(temp[,i],p+intercept,p+intercept) + # } + # boot.cov = apply(term,1:2,function(x){Reduce("+",x)})/(R-1) + # boot.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM + # MM=boot.cov + + # # jacknife estimate of covariance + # jk.vec <- function(idx, Xordered, bbar) { + # sample=Xordered[-idx,] + # y=y[-idx] + # xa=sample[,1:length(bbar)] + # xnota=sample[,-c(1:length(bbar))] + # + # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) + # } + # + # jk.est = matrix(0,p+intercept,p+intercept) + # for(i in 1:n) { + # jk.est[i,]=jk.vec(i,Xordered,bbar) + # } + # jk.mean=colMeans(jk.est) + # jk.diff=t(jk.est-jk.mean) + # temp=apply(jk.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) + # term=array(0,dim=c(p+intercept,p+intercept,n)) + # for(i in 1:n) { + # term[,,i]=matrix(temp[,i],p+intercept,p+intercept) + # } + # jk.cov = (n-1)*apply(term,1:2,function(x){Reduce("+",x)})/n + # jk.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM + # MM=jk.cov + + + + gnotm = g[-vars]*lambda + bbar = matrix(c(bbar,gnotm),ncol=1) + } + # } # Check polyhedral region tol.poly = 0.01 - if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + if (max((A1 %*% bbar) - b1) > tol.poly) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", "and check whether the specified value of lambda is too small", @@ -108,115 +232,45 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co "You might also try rerunning glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - # Estimate sigma - if (is.null(sigma)) { - if (n >= 2*p) { - oo = intercept - sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) - } - else { - sigma = sd(y) - warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), - "you may want to use the estimateSigma function")) - } - } - - # add additional targets for inference if provided - if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) + sign = numeric(nvar) + coef0 = numeric(nvar) - k = length(vars) - pv = vlo = vup = numeric(k) - vmat = matrix(0,k,n) - ci = tailarea = matrix(0,k,2) - sign = numeric(k) - - if (type=="full" & p > n) { - if (intercept == T) { - pp=p+1 - Xint <- cbind(rep(1,n),x) - # indices of selected predictors - S = c(1,vars + 1) - } else { - pp=p - Xint <- x - # indices of selected predictors - S = vars - # notS = which(abs(beta) <= tol.coef) - } + for (j in 1:nvar) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - notS = setdiff(1:pp,S) - - XS = Xint[,S] - hbetaS = hbeta[S] - - # Reorder so that active set S is first - Xordered = Xint[,c(S,notS,recursive=T)] - - hsigma <- 1/n*(t(Xordered)%*%Xordered) - hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] - hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) - - # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE, max.try=linesearch.try) - # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) + vj = V[j,] + # mj = sqrt(sum(vj^2)) + # vj = vj/mj + coef0[j] = sum(vj * bbar) + sign[j] = sign(coef0[j]) + vj = vj * sign[j] - FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) - GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) - ithetasigma = (GS-(htheta%*%hsigma)) - # ithetasigma = (diag(pp) - (htheta%*%hsigma)) + limits.info = TG.limits(bbar, A1, b1, vj, Sigma=MM) + if(is.null(limits.info)) return(list(pv=NULL,MM=MM,eta=vj)) + a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) + pv[j] = a$pv + vlo[j] = a$vlo # * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup # * mj # Unstandardize (mult by norm of vj) + sd[j] = a$sd # * mj # Unstandardize (mult by norm of vj) - M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) - # vector which is offset for testing debiased beta's - null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) - if (intercept == T) { - M = M[-1,] # remove intercept row - null_value = null_value[-1] # remove intercept element - } - } else if (type=="partial" || p > n) { - xa = x[,vars,drop=F] - M = pinv(crossprod(xa)) %*% t(xa) - null_value = rep(0,k) - } else { - M = pinv(crossprod(x)) %*% t(x) - M = M[vars,,drop=F] - null_value = rep(0,k) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) + ci[j,] = (a$int-null_value[j]) # * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea } - - for (j in 1:k) { - if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - - vj = M[j,] - mj = sqrt(sum(vj^2)) - vj = vj / mj # Standardize (divide by norm of vj) - sign[j] = sign(sum(vj*y)) - vj = sign[j] * vj - - limits.info = TG.limits(y, -G, -u, vj, Sigma=diag(rep(sigma^2, n))) - a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) - pv[j] = a$pv - vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) - vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) - vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) - - a = TG.interval.base(limits.info, - alpha=alpha, - gridrange=gridrange, - flip=(sign[j]==-1), - bits=bits) - ci[j,] = (a$int-null_value[j]) * mj # Unstandardize (mult by norm of vj) - tailarea[j,] = a$tailarea + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sd, + coef0=coef0, + call=this.call) + class(out) = "fixedLassoInf" + return(out) } - - out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign,sigma=sigma,alpha=alpha, - sd=sigma*sqrt(rowSums(vmat^2)), - coef0=vmat%*%y, - call=this.call) - class(out) = "fixedLassoInf" - return(out) -} } ############################# diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 19936b0..90bce1c 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -1,8 +1,8 @@ -fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ - - +fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + type = match.arg(type) checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") @@ -13,68 +13,171 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet warning("Package Rmpfr is not installed, reverting to standard precision") bits = NULL } - - - n=length(y) - p=ncol(x) - # I assume that intcpt was used - if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") - nvar=sum(beta[-1]!=0) - pv=vlo=vup=sd=rep(NA, nvar) - ci=tailarea=matrix(NA,nvar,2) - -#do we need to worry about standardization? - -# obj = standardize(x,y,TRUE,FALSE) + + + n=length(y) + p=ncol(x) + # I assume that intcpt was used + if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") + vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + nvar=length(vars) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + + #do we need to worry about standardization? + + # obj = standardize(x,y,TRUE,FALSE) # x = obj$x # y = obj$y - m=beta[-1]!=0 #active set - - bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars - s2=sign(bhat) - lam2m=diag(c(0,rep(lambda,sum(m)))) - - - xxm=cbind(1,x[,m]) - - etahat = xxm %*% bhat - prhat = as.vector(exp(etahat) / (1 + exp(etahat))) - ww=prhat*(1-prhat) - # w=diag(ww) - -#check KKT - z=etahat+(y-prhat)/ww + # m=beta[-1]!=0 #active set + + bhat=c(beta[1],beta[-1][vars]) # intcpt plus active vars + s2=sign(bhat) + lam2m=diag(c(0,rep(lambda,nvar))) + + + xm=cbind(1,x[,vars]) + xnotm=x[,-vars] + + etahat = xm %*% bhat + prhat = as.vector(exp(etahat) / (1 + exp(etahat))) + ww=prhat*(1-prhat) + w=diag(ww) + + #check KKT + z=etahat+(y-prhat)/ww # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda g=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda - if (any(abs(g) > 1+tol.kkt) ) + if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) - - vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ - cat("Empty model",fill=T) - return() + cat("Empty model",fill=T) + return() } if (any(sign(g[vars]) != sign(beta[-1][vars]))) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances). You might try rerunning", "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) + # warnings(paste(g[vars],beta[-1][vars])) - #constraints for active variables - # MM=solve(t(xxm)%*%w%*%xxm) - MM=solve(scale(t(xxm),F,1/ww)%*%xxm) + #constraints for active variables + # MM=solve(t(xxm)%*%w%*%xxm) + MM=solve(scale(t(xm),F,1/ww)%*%xm) gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized - # at exact LASSO solution it should be s2[-1] + # at exact LASSO solution it should be s2[-1] dbeta = MM %*% gm - + # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... bbar = bhat - dbeta - - A1=-(mydiag(s2))[-1,] + + A1= matrix(-(mydiag(s2))[-1,],nrow=length(s2)-1) b1= (s2 * dbeta)[-1] + V = diag(length(bbar))[-1,] + null_value = rep(0,nvar) + + if (type=='full') { + Xordered = cbind(xm,xnotm) + + hsigma <- 1/n*(t(Xordered)%*%w%*%Xordered) + + M <- matrix(InverseLinfty(hsigma,n,dim(xm)[2],verbose=F,max.try=10),ncol=p+1)[-1,] # remove intercept row + I <- matrix(diag(dim(xm)[2])[-1,],nrow=dim(xm)[2]-1) + if (is.null(dim(M))) Msubset <- M[-c(1,vars+1)] + else Msubset <- M[,-c(1,vars+1)] + Msubset = matrix(Msubset,nrow=dim(xm)[2]-1) + # print("*********") + # print(Msubset) + # print(I) + # print("*********") + V <- cbind(I,Msubset/sqrt(n)) # matrix(cbind(I,Msubset/sqrt(n)),nrow=dim(xm)[2]-1) + + c <- matrix(c(gm,t(xnotm)%*%w%*%xm%*%(-dbeta)),ncol=1) + d <- -dbeta[-1] # remove intercept + + null_value = -(M%*%c/sqrt(n) - d) + + A0 = matrix(0,ncol(xnotm),ncol(A1)) + A0 = cbind(A0,diag(nrow(A0))) + fill = matrix(0,nrow(A1),nrow(A0)) + A1 = cbind(A1,fill) + A1 = rbind(A1,A0,-A0) + + b1 = matrix(c(b1,rep(lambda,2*nrow(A0))),ncol=1) + + # full covariance + MMbr = t(xnotm)%*%w%*%xnotm - t(xnotm)%*%w%*%xm%*%MM%*%t(xm)%*%w%*%xnotm + MM = cbind(MM,matrix(0,nrow(MM),ncol(MMbr))) + MMbr = cbind(matrix(0,nrow(MMbr),nrow(MM)),MMbr) + MM = rbind(MM,MMbr) + # # pairs bootstrap estimate of full covariance + # boot.vec <- function(data, indices, bbar) { + # sample=data[indices,-1] + # y=data[indices,1] + # xa=sample[,1:length(bbar)] + # xnota=sample[,-c(1:length(bbar))] + # # print(dim(xnota)) + # + # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) + # # return(c(t(xnota)%*%(y-xa%*%bbar))) + # } + # + # R=100 + # boot.obj=boot(cbind(y,Xordered),boot.vec,R,parallel="multicore",bbar=bbar) + # boot.est=boot.obj$t + # # print(dim(boot.est)) + # boot.mean=colMeans(boot.est) + # boot.diff=t(boot.est-boot.mean) + # temp=apply(boot.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) + # term=array(0,dim=c(p+1,p+1,R)) + # for(i in 1:R) { + # term[,,i]=matrix(temp[,i],p+1,p+1) + # } + # boot.cov = apply(term,1:2,function(x){Reduce("+",x)})/(R-1) + # boot.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM + # MM=boot.cov + # print(dim(boot.cov)) + + # MM = cbind(MM,matrix(0,nrow(MM),ncol(boot.cov))) + # boot.cov = cbind(matrix(0,nrow(boot.cov),nrow(MM)),boot.cov) + # MM = rbind(MM,boot.cov) + + # # jacknife estimate of covariance + # jk.vec <- function(idx, Xordered, bbar) { + # sample=Xordered[-idx,] + # y=y[-idx] + # xa=sample[,1:length(bbar)] + # xnota=sample[,-c(1:length(bbar))] + # + # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) + # } + # + # jk.est = matrix(0,p+1,p+1) + # for(i in 1:n) { + # jk.est[i,]=jk.vec(i,Xordered,bbar) + # } + # jk.mean=colMeans(jk.est) + # jk.diff=t(jk.est-jk.mean) + # temp=apply(jk.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) + # term=array(0,dim=c(p+1,p+1,n)) + # for(i in 1:n) { + # term[,,i]=matrix(temp[,i],p+1,p+1) + # } + # jk.cov = (n-1)*apply(term,1:2,function(x){Reduce("+",x)})/n + # jk.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM + # MM=jk.cov + + gnotm = g[-vars]*lambda + bbar = matrix(c(bbar,gnotm),ncol=1) + } + + + if (is.null(dim(V))) V=matrix(V,nrow=1) + tol.poly = 0.01 if (max((A1 %*% bbar) - b1) > tol.poly) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", @@ -83,57 +186,70 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet "(beyond the grid of values visited by glmnet).", "You might also try rerunning glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - - - for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] - # compute p-values - junk=TG.pvalue(bbar, A1, b1, vj, MM) - pv[jj] = junk$pv - - vlo[jj]=junk$vlo - vup[jj]=junk$vup - sd[jj]=junk$sd - - junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha) - - ci[jj,]=junk2$int - tailarea[jj,] = junk2$tailarea - } - - # JT: these are not the one step estimators but they are close - fit0=glm(y~x[,m],family="binomial") - sfit0=summary(fit0) - coef0=bbar[-1] #fit0$coef[-1] - se0=sqrt(diag(MM)[-1]) # sfit0$cov.scaled)[-1]) - zscore0=coef0/se0 - + + sign=numeric(nvar) + coef0=numeric(nvar) + + + for(j in 1:nvar){ + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + if (is.null(dim(V))) vj = V + else vj = matrix(V[j,],nrow=1) + coef0[j] = vj%*%bbar # sum(vj * bbar) + sign[j] = sign(coef0[j]) + vj = vj * sign[j] + # print(dim(MM)) + # print(dim(vj)) + # print(nvar) + + # compute p-values + limits.info = TG.limits(bbar, A1, b1, vj, Sigma=MM) + # if(is.null(limits.info)) return(list(pv=NULL,MM=MM,eta=vj)) + a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) + pv[j] = a$pv + vlo[j] = a$vlo # * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup # * mj # Unstandardize (mult by norm of vj) + sd[j] = a$sd # * mj # Unstandardize (mult by norm of vj) + + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) + ci[j,] = (a$int-null_value[j]) # * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + se0 = sqrt(diag(V%*%MM%*%t(V))) + zscore0 = (coef0+null_value)/se0 + out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, - vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, - call=this.call, - info.matrix=MM) # info.matrix is output just for debugging purposes at the moment + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call, + info.matrix=MM) # info.matrix is output just for debugging purposes at the moment class(out) = "fixedLogitLassoInf" return(out) - - } + +} print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { cat("\nCall:\n") dput(x$call) - + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) cat("",fill=T) tab = cbind(x$vars, - round(x$coef0,3), - round(x$zscore0,3), - round(x$pv,3),round(x$ci,3)) + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") if (tailarea) { tab = cbind(tab,round(x$tailarea,3)) @@ -141,7 +257,7 @@ print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { } rownames(tab) = rep("",nrow(tab)) print(tab) - + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", ifelse(x$type=="partial","partial","full"))) invisible() diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp new file mode 100644 index 0000000..c05f594 --- /dev/null +++ b/selectiveInference/src/RcppExports.cpp @@ -0,0 +1,89 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// solve_QP +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol); +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); + Rcpp::traits::input_parameter< double >::type bound(boundSEXP); + Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); + Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); + Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol)); + return rcpp_result_gen; +END_RCPP +} +// find_one_row_debiasingM +Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, int row, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol); +RcppExport SEXP _selectiveInference_find_one_row_debiasingM(SEXP SigmaSEXP, SEXP rowSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); + Rcpp::traits::input_parameter< int >::type row(rowSEXP); + Rcpp::traits::input_parameter< double >::type bound(boundSEXP); + Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); + Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); + Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); + rcpp_result_gen = Rcpp::wrap(find_one_row_debiasingM(Sigma, row, bound, maxiter, theta, gradient, ever_active, nactive, kkt_tol, objective_tol)); + return rcpp_result_gen; +END_RCPP +} +// update1_ +Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); + Rcpp::traits::input_parameter< int >::type m(mSEXP); + Rcpp::traits::input_parameter< int >::type k(kSEXP); + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; +END_RCPP +} +// downdate1_ +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); + Rcpp::traits::input_parameter< int >::type j0(j0SEXP); + Rcpp::traits::input_parameter< int >::type m(mSEXP); + Rcpp::traits::input_parameter< int >::type n(nSEXP); + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 10}, + {"_selectiveInference_find_one_row_debiasingM", (DL_FUNC) &_selectiveInference_find_one_row_debiasingM, 10}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} From 7de4f282d95671eaae3b714c04abe18f8d545596 Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 3 Apr 2018 20:35:53 -0700 Subject: [PATCH 53/55] Trying stuff with full target logistic --- C-software/README.md | 3 + C-software/src/debias.h | 79 ++++ C-software/src/matrixcomps.c | 261 +++++++++++ C-software/src/matrixcomps.h | 12 + C-software/src/quadratic_program.c | 441 +++++++++++++++++ C-software/src/quadratic_program_wide.c | 597 ++++++++++++++++++++++++ C-software/src/randomized_lasso.c | 190 ++++++++ C-software/src/randomized_lasso.h | 42 ++ C-software/src/truncnorm.c | 188 ++++++++ selectiveInference/R/funs.fixedLogit.R | 101 ++-- 10 files changed, 1854 insertions(+), 60 deletions(-) create mode 100755 C-software/README.md create mode 100755 C-software/src/debias.h create mode 100755 C-software/src/matrixcomps.c create mode 100755 C-software/src/matrixcomps.h create mode 100755 C-software/src/quadratic_program.c create mode 100755 C-software/src/quadratic_program_wide.c create mode 100755 C-software/src/randomized_lasso.c create mode 100755 C-software/src/randomized_lasso.h create mode 100755 C-software/src/truncnorm.c diff --git a/C-software/README.md b/C-software/README.md new file mode 100755 index 0000000..f8644cc --- /dev/null +++ b/C-software/README.md @@ -0,0 +1,3 @@ +This repo contains C code relevant for selective inference: http://github.com/selective-inference + +This code is meant to be used by R, python, etc. for the basic operations needed for selective inference. \ No newline at end of file diff --git a/C-software/src/debias.h b/C-software/src/debias.h new file mode 100755 index 0000000..da27467 --- /dev/null +++ b/C-software/src/debias.h @@ -0,0 +1,79 @@ +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +void multiply_by_2(double *X, int nval); + +int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* nndef times theta */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nfeature, /* How many features in nndef */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + double *theta_old, /* previous value */ + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int kkt_stop, /* Break based on KKT? */ + int objective_stop, /* Break based on convergence of objective value? */ + int param_stop); /* Break based on parameter convergence? */ + +int check_KKT_qp(double *theta, /* current theta */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int nrow, /* how many rows in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ + +int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *X_theta_ptr, /* Fitted values */ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal entries of non-neg def matrix */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + int *need_update_ptr, /* Keeps track of updated gradient coords */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int ncase, /* How many rows in X */ + int nfeature, /* How many columns in X */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ + double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int kkt_stop, /* Break based on KKT? */ + int objective_stop, /* Break based on convergence of objective value? */ + int param_stop); /* Break based on parameter convergence? */ + +int check_KKT_wide(double *theta_ptr, /* current theta */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase, /* how many rows in X */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol); /* precision for checking KKT conditions */ + +void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase); /* how many rows in X */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/C-software/src/matrixcomps.c b/C-software/src/matrixcomps.c new file mode 100755 index 0000000..bec3506 --- /dev/null +++ b/C-software/src/matrixcomps.c @@ -0,0 +1,261 @@ +#include +#include + +// Matrices are stored as vectors, in column-major order + +// Givens rotation of a and b, stored in c and s +void givens(double a, double b, double *c, double *s) { + if (b==0) { + *c = 1; + *s = 0; + } + else { + if (fabs(b)>fabs(a)) { + double t = -a/b; + *s = 1/sqrt(1+t*t); + *c = (*s)*t; + } + else { + double t = -b/a; + *c = 1/sqrt(1+t*t); + *s = (*c)*t; + } + } +} + +// Givens rotation applied to rows i1 and i2 of the m x n +// matrix A, on the subset of columns j1 through j2 +void rowrot(double *A, int i1, int i2, int m, int n, int j1, int j2, double c, double s) { + int j; + double t1,t2; + for (j=j1; j<=j2; j++) { + t1 = A[i1+j*m]; + t2 = A[i2+j*m]; + A[i1+j*m] = c*t1-s*t2; + A[i2+j*m] = s*t1+c*t2; + } +} + +// Givens rotation applied to columns j1 and j2 of the m x n +// matrix A, on the subset of rows i1 through i2 +void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, double s) { + int i; + double t1,t2; + for (i=i1; i<=i2; i++) { + t1 = A[i+j1*m]; + t2 = A[i+j2*m]; + A[i+j1*m] = c*t1-s*t2; + A[i+j2*m] = s*t1+c*t2; + } +} + +// Downdate the QR factorization after deleting column j0, +// where Q1 is m x n and R is n x n. The other part of +// the Q matrix, Q2 m x (m-n), isn't needed so it isn't +// passed for efficiency +void downdate1(double *Q1, double *R, int j0, int m, int n) { + int j; + + double c,s; + for (j=j0+1; j=1; j--) { + // Compute the appropriate c and s + givens(w[j-1],w[j],&c,&s); + + // Pre-multiply w + rowrot(w,j-1,j,k,1,0,0,c,s); + + // Post-multiply Q2 + colrot(Q2,j-1,j,m,k,0,m-1,c,s); + } +} + +// Downdate the QR factorization after deleting the first row, +// where Q is m x m and R is m x n +void downdate2(double *Q, double *R, int *mp, int *np) { + int m,n,i; + m = *mp; + n = *np; + + double c,s; + for (i=m-1; i>=1; i--) { + // Compute the appropriate c and s + givens(Q[(i-1)*m],Q[i*m],&c,&s); + + // Post-mutiply Q + colrot(Q,i-1,i,m,m,0,m-1,c,s); + + // Pre-multiply R + if (i<=n) rowrot(R,i-1,i,m,n,i-1,n-1,c,s); + } +} + +// Update the QR factorization after adding the last row, +// where Q is m x m and R is m x n. For efficiency, Q is not +// passed, and only the first row of R is passed. Not counting +// its first row, the first q columns of R are zero +void update2(double *y, double *D, double *r, int *mp, int *np, int *qp) { + int m,n,q,j; + m = *mp; + n = *np; + q = *qp; + + double c,s; + for (j=0; j=0; i--) { + for (j=i; j=0; i--) { + for (j=i; j=0; i--) { + // Compute the appropriate c and s + givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); + + // Post-multiply R + colrot(R,i+q+1,i+q,m2,n,0,i,c,s); + + // Post-multiply D + colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); + + // Pre-multiply y + rowrot(y,i+q+1,i+q,n,1,0,0,c,s); + } +} + +// Make the R factor upper triangular, by Givens rotating +// its columns and rows, appropriately. Here A is m1 x n, +// Q is m2 x m2, and R is m2 x n with rank(R) = n-q-1. The +// first q columns of R are zero. The kth row of R is the +// last row with a zero element on the diagonal +void maketri4(double *y, double *A, double *Q, double *R, int *m1p, int *m2p, int *np, int *qp, int *kp) { + int m1,m2,n,q,k,i,j; + m1 = *m1p; + m2 = *m2p; + n = *np; + q = *qp; + k = *kp; + + double c,s; + + // First rotate the columns + for (i=k-1; i>=0; i--) { + // Compute the appropriate c and s + givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); + + // Post-multiply R + colrot(R,i+q+1,i+q,m2,n,0,i,c,s); + + // Post-multiply D + colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); + + // Pre-multiply y + rowrot(y,i+q+1,i+q,n,1,0,0,c,s); + } + + // Next rotate the rows + for (j=k+q+1; j // for fabs + +// Find an approximate row of \hat{nndef}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +double objective_qp(double *nndef_ptr, /* A non-negative definite matrix */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta_ptr) /* current value */ +{ + int irow, icol; + double value = 0; + double *nndef_ptr_tmp = nndef_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta_ptr; + theta_col_ptr = theta_ptr; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +int check_KKT_qp_active(int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + double *theta_ptr, /* current theta */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int nfeature, /* how many features in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol) /* precision for checking KKT conditions */ +{ + // First check inactive + + int iactive; + double *theta_ptr_tmp; + double gradient; + double *gradient_ptr_tmp; + int nactive = *nactive_ptr; + int active_feature; + int *active_feature_ptr; + + for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + + +double update_one_coord_qp(double *nndef_ptr, /* A non-negative definite matrix */ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nfeature, /* How many features in nndef */ + double bound, /* feasibility parameter */ + double *theta_ptr, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *nndef_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr_tmp; + int icol = 0; + + double *quadratic_ptr = ((double *) nndef_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr_tmp = ((double *) theta_ptr + coord); + old_value = *theta_ptr_tmp; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // nndef[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + nndef_ptr_tmp = ((double *) nndef_ptr + coord * nfeature); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol 0)) { + break; + } + old_value = new_value; + } + + } + + // Check size of active set + + if (*nactive_ptr >= max_active) { + break; + } + + } + return(iter); +} + diff --git a/C-software/src/quadratic_program_wide.c b/C-software/src/quadratic_program_wide.c new file mode 100755 index 0000000..ce7284b --- /dev/null +++ b/C-software/src/quadratic_program_wide.c @@ -0,0 +1,597 @@ +#include // for fabs + +// Find an approximate row of \hat{nndef}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n + l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +// Throughout X is a design matrix + +double objective_wide(double *X_theta_ptr, /* Fitted values */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int ncase, /* how many rows in X */ + int nfeature, /* how many columns in X */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double *theta_ptr) /* current value */ +{ + int icase, iactive; + double value = 0; + double *bound_ptr_tmp; + double *X_theta_ptr_tmp = X_theta_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_ptr_tmp; + int *active_feature_ptr; + int active_feature; + int nactive = *nactive_ptr; + + // The term \|X\theta\|^2_2/n, with n=ncase + + for (icase=0; icase 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { + return(0); + } + + } + else if (bound != 0) { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + double *theta_ptr, /* current theta */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int ncase, /* how many rows in X */ + int nfeature, /* how many columns in X */ + double *bound_ptr, /* Lagrange multipliers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol) /* precision for checking KKT conditions */ +{ + // First check inactive + + int iactive; + double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; + double gradient; + int nactive = *nactive_ptr; + int active_feature; + int *active_feature_ptr; + + for (iactive=0; iactive 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { + return(0); + } + + } + else if (bound != 0) { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +double update_one_coord_wide(double *X_ptr, /* A design matrix*/ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + double *X_theta_ptr, /* X\theta -- fitted values */ + int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ + int ncase, /* How many rows in X */ + int nfeature, /* How many rows in X */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ + double *theta_ptr, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *X_ptr_tmp; + double *X_theta_ptr_tmp; + int *need_update_ptr_tmp; + double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; + int ifeature, icase; + + double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); + double diagonal_entry = *diagonal_ptr; + + linear_term = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, coord, ncase, nfeature); + + theta_ptr_tmp = ((double *) theta_ptr + coord); + old_value = *theta_ptr_tmp; + + bound_ptr_tmp = ((double *) bound_ptr + coord); + bound = *bound_ptr_tmp; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // (X^TX)[coord, coord] * theta[coord] / ncase + // This removes it. + + linear_term -= diagonal_entry * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto (q+eps)/2 * t^2 + l * t + bound |t| + + // with q=diagonal_entry and l=linear_term and eps=ridge_Term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / (diagonal_entry + ridge_term); + } + else if (linear_term > bound) { + value = -(linear_term - bound) / (diagonal_entry + ridge_term); + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active_wide(coord, ever_active_ptr, nactive_ptr); + } + + // Update X\theta if changed + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + // Set the update_gradient_ptr to 1 + + need_update_ptr_tmp = need_update_ptr; + for (ifeature=0; ifeature 0)) { + break; + } + old_value = new_value; + } + } + + // Check size of active set + + if (*nactive_ptr >= max_active) { + break; + } + + } + return(iter); +} + diff --git a/C-software/src/randomized_lasso.c b/C-software/src/randomized_lasso.c new file mode 100755 index 0000000..1f396b1 --- /dev/null +++ b/C-software/src/randomized_lasso.c @@ -0,0 +1,190 @@ +#include // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +// Matrices are assumed in column major order! + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow +#include + +// Take a Gibbs hit and run step along a given direction + +// Assumes the covariance is identity + +void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *direction, /* direction we will take Gibbs step */ + double *U, /* A %*% state - b */ + double *alpha, /* A %*% direction */ + int nconstraint, /* number of rows of A */ + int nstate) /* dimension of state */ +{ + + int istate; + double value = 0; + + /* Compute V=\eta^Ty */ + + for (istate = 0; istate < nstate; istate++) { + value += direction[istate] * state[istate]; + } + + /* Compute upper and lower bounds */ + + double lower_bound = -1e12; + double upper_bound = 1e12; + double bound_val = 0; + double tol=1.e-7; + int iconstraint; + + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + + bound_val = -U[iconstraint] / alpha[iconstraint] + value; + + if ((alpha[iconstraint] > tol) && + (bound_val < upper_bound)) { + upper_bound = bound_val; + } + else if ((alpha[iconstraint] < -tol) && + (bound_val > lower_bound)) { + lower_bound = bound_val; + } + + } + + /* Ensure constraints are satisfied */ + + if (lower_bound > value) { + lower_bound = value - tol; + } + else if (upper_bound < value) { + upper_bound = value + tol; + } + + /* Check to see if constraints are satisfied */ + + /* if (lower_bound > upper_bound) { + + }*/ + + /* Now, take a step */ + + double tnorm; /* the 1D gaussian variable */ + double cdfU, cdfL, unif; /* temp variables */ + + if (upper_bound < -10) { + + /* use Exp approximation */ + /* the approximation is that */ + /* Z | lower_bound < Z < upper_bound */ + /* is fabs(upper_bound) * (upper_bound - Z) = E approx Exp(1) */ + /* so Z = upper_bound - E / fabs(upper_bound) */ + /* and the truncation of the exponential is */ + /* E < fabs(upper_bound - lower_bound) * fabs(upper_bound) = D */ + + /* this has distribution function (1 - exp(-x)) / (1 - exp(-D)) */ + /* so to draw from this distribution */ + /* we set E = - log(1 - U * (1 - exp(-D))) where U is Unif(0,1) */ + /* and Z (= tnorm below) is as stated */ + + unif = runif(0., 1.) * (1 - exp(-fabs((lower_bound - upper_bound) * upper_bound))); + tnorm = (upper_bound + log(1 - unif) / fabs(upper_bound)); + } + else if (lower_bound > 10) { + + /* here Z = lower_bound + E / fabs(lower_bound) (though lower_bound is positive) */ + /* and D = fabs((upper_bound - lower_bound) * lower_bound) */ + + unif = runif(0., 1.) * (1 - exp(-fabs((upper_bound - lower_bound) * lower_bound))); + tnorm = (lower_bound - log(1 - unif) / lower_bound); + } + else if (lower_bound < 0) { + cdfL = pnorm(lower_bound, 0., 1., 1, 0); + cdfU = pnorm(upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; + if (unif < 0.5) { + tnorm = qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = -qnorm(1-unif, 0., 1., 1, 0); + } + } + else { + cdfL = pnorm(-lower_bound, 0., 1., 1, 0); + cdfU = pnorm(-upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; + if (unif < 0.5) { + tnorm = -qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = qnorm(1-unif, 0., 1., 1, 0); + } + } + + /* Now update the state and U */ + + double delta = tnorm - value; + + for (istate = 0; istate < nstate; istate++) { + state[istate] += delta * direction[istate]; + } + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + U[iconstraint] += delta * alpha[iconstraint] ; + } + + /* End of gibbs_step */ + +} + +void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *U, /* A %*% state - b */ + double *directions, /* possible steps for sampler to take */ + /* assumed to be stored as list of columns of dimension nstate */ + /* has shape (nstate, ndirection) */ + double *alphas, /* The matrix A %*% directions */ + /* has shape (nconstraint, ndirection) */ + double *output, /* array in which to store samples */ + /* assumed will stored as list of vectors of dimension nstate */ + /* has shape (nstate, ndraw) */ + int *pnconstraint, /* number of rows of A */ + int *pndirection, /* the possible number of directions to choose from */ + /* `directions` should have size nstate*ndirection */ + int *pnstate, /* dimension of state */ + int *pburnin, /* number of burnin steps */ + int *pndraw) /* total number of samples to return */ +{ + + int iter_count; + int which_direction; + + int nconstraint = *pnconstraint; + int ndirection = *pndirection; + int nstate = *pnstate; + int burnin = *pburnin; + int ndraw = *pndraw; + + double *direction, *alpha; + + for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { + + which_direction = (int) floor(runif(0., 1.) * ndirection); + direction = ((double *) directions) + nstate * which_direction; + alpha = ((double *) alphas) + nconstraint * which_direction; + + /* take a step, which implicitly updates `state` and `U` */ + + gibbs_step(state, + direction, + U, + alpha, + nconstraint, + nstate); + + /* Store result if after burnin */ + + int istate; + if (iter_count >= burnin) { + for (istate = 0; istate < nstate; istate++) { + *output = state[istate]; + output++; + } + } + } + +} + diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 90bce1c..230110b 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -85,10 +85,12 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial","full"), hsigma <- 1/n*(t(Xordered)%*%w%*%Xordered) M <- matrix(InverseLinfty(hsigma,n,dim(xm)[2],verbose=F,max.try=10),ncol=p+1)[-1,] # remove intercept row + # I <- matrix(diag(dim(xm)[2]),nrow=dim(xm)[2]) I <- matrix(diag(dim(xm)[2])[-1,],nrow=dim(xm)[2]-1) if (is.null(dim(M))) Msubset <- M[-c(1,vars+1)] else Msubset <- M[,-c(1,vars+1)] - Msubset = matrix(Msubset,nrow=dim(xm)[2]-1) + # Msubset = matrix(Msubset,nrow=dim(xm)[2]) + # Msubset = matrix(Msubset,nrow=dim(xm)[2]-1) # print("*********") # print(Msubset) # print(I) @@ -98,81 +100,60 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial","full"), c <- matrix(c(gm,t(xnotm)%*%w%*%xm%*%(-dbeta)),ncol=1) d <- -dbeta[-1] # remove intercept + # d <- matrix(-dbeta,ncol=1) # remove intercept + # d <- matrix(-dbeta*lambda,ncol=1) # remove intercept + # print(dim(xnotm)) + # print(dim(w)) + # print(dim(xm)) + # c <- matrix(c(gm,t(xnotm)%*%w%*%xm%*%d),ncol=1) + # c <- matrix(c(gm[-1],t(xnotm)%*%w%*%xm%*%d),ncol=1) + # d <- d[-1,] + + # print(dim(c)) + # print(dim(M)) + # print(length(d)) + null_value = -(M%*%c/sqrt(n) - d) + # null_value = -(M[,-1]%*%c/sqrt(n) - d) + + # A0 = (t(xnotm)%*%w%*%xm)/lambda + # A0 = cbind(A0,matrix(0,nrow(A0),ncol(xnotm))) + # A0 = rbind(A0,-A0) + # + # b0 = matrix(t(xnotm)%*%w%*%(z/lambda+xm%*%MM%*%gm),ncol=1) + # + # + # print("------") + # print(dim(A0)) + # print(dim(b0)) + # print(length(b1)) + + + # b1 = rbind(1+b0,1-b0,matrix(b1,ncol=1)) A0 = matrix(0,ncol(xnotm),ncol(A1)) A0 = cbind(A0,diag(nrow(A0))) - fill = matrix(0,nrow(A1),nrow(A0)) + fill = matrix(0,nrow(A1),ncol(xnotm)) A1 = cbind(A1,fill) A1 = rbind(A1,A0,-A0) b1 = matrix(c(b1,rep(lambda,2*nrow(A0))),ncol=1) # full covariance - MMbr = t(xnotm)%*%w%*%xnotm - t(xnotm)%*%w%*%xm%*%MM%*%t(xm)%*%w%*%xnotm + MMbr = (t(xnotm)%*%w%*%xnotm - t(xnotm)%*%w%*%xm%*%MM%*%t(xm)%*%w%*%xnotm) # matrix(0,ncol(xnotm),ncol(xnotm)) MM = cbind(MM,matrix(0,nrow(MM),ncol(MMbr))) MMbr = cbind(matrix(0,nrow(MMbr),nrow(MM)),MMbr) + # print(dim(MM)) + # print(dim(MMbr)) MM = rbind(MM,MMbr) - - # # pairs bootstrap estimate of full covariance - # boot.vec <- function(data, indices, bbar) { - # sample=data[indices,-1] - # y=data[indices,1] - # xa=sample[,1:length(bbar)] - # xnota=sample[,-c(1:length(bbar))] - # # print(dim(xnota)) - # - # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) - # # return(c(t(xnota)%*%(y-xa%*%bbar))) - # } - # - # R=100 - # boot.obj=boot(cbind(y,Xordered),boot.vec,R,parallel="multicore",bbar=bbar) - # boot.est=boot.obj$t - # # print(dim(boot.est)) - # boot.mean=colMeans(boot.est) - # boot.diff=t(boot.est-boot.mean) - # temp=apply(boot.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) - # term=array(0,dim=c(p+1,p+1,R)) - # for(i in 1:R) { - # term[,,i]=matrix(temp[,i],p+1,p+1) - # } - # boot.cov = apply(term,1:2,function(x){Reduce("+",x)})/(R-1) - # boot.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM - # MM=boot.cov - # print(dim(boot.cov)) - - # MM = cbind(MM,matrix(0,nrow(MM),ncol(boot.cov))) - # boot.cov = cbind(matrix(0,nrow(boot.cov),nrow(MM)),boot.cov) - # MM = rbind(MM,boot.cov) - - # # jacknife estimate of covariance - # jk.vec <- function(idx, Xordered, bbar) { - # sample=Xordered[-idx,] - # y=y[-idx] - # xa=sample[,1:length(bbar)] - # xnota=sample[,-c(1:length(bbar))] - # - # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) - # } - # - # jk.est = matrix(0,p+1,p+1) - # for(i in 1:n) { - # jk.est[i,]=jk.vec(i,Xordered,bbar) - # } - # jk.mean=colMeans(jk.est) - # jk.diff=t(jk.est-jk.mean) - # temp=apply(jk.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) - # term=array(0,dim=c(p+1,p+1,n)) - # for(i in 1:n) { - # term[,,i]=matrix(temp[,i],p+1,p+1) - # } - # jk.cov = (n-1)*apply(term,1:2,function(x){Reduce("+",x)})/n - # jk.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM - # MM=jk.cov gnotm = g[-vars]*lambda bbar = matrix(c(bbar,gnotm),ncol=1) + + # print(dim(A1)) + # print(dim(b1)) + # print(dim(bbar)) + # print(dim(V)) } From bd12151730fde3769c44e63f6fbde1de2aafe33c Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Tue, 3 Apr 2018 21:40:59 -0700 Subject: [PATCH 54/55] Removed comments --- selectiveInference/R/funs.fixed.R | 55 +------------------------------ 1 file changed, 1 insertion(+), 54 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 72cefdc..8ac3a8c 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -107,7 +107,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - MM = pinv(crossprod(xm))*sigma^2 + MM = pinv(crossprod(xm))/sigma^2 # gradient at LASSO solution, first entry is 0 because intercept is unpenalized # at exact LASSO solution it should be s2[-1] if (intercept == T) gm = c(0,-g[vars]*lambda) @@ -117,8 +117,6 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co bbar = bhat - dbeta - # print(length(bhat)) - if (intercept == T) { A1 = -(mydiag(s2))[-1,] b1 = (s2*dbeta)[-1] @@ -165,57 +163,6 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co MM = cbind(MM,matrix(0,nrow(MM),ncol(MMbr))) MMbr = cbind(matrix(0,nrow(MMbr),nrow(MM)),MMbr) MM = rbind(MM,MMbr) - - # # pairs bootstrap estimate of full covariance - # boot.vec <- function(data, indices, bbar) { - # sample=data[indices,-1] - # y=data[indices,1] - # xa=sample[,1:length(bbar)] - # xnota=sample[,-c(1:length(bbar))] - # - # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) - # } - # - # R=100 - # boot.obj=boot(cbind(y,Xordered),boot.vec,R,parallel="multicore",bbar=bbar) - # boot.est=boot.obj$t - # boot.mean=colMeans(boot.est) - # boot.diff=t(boot.est-boot.mean) - # temp=apply(boot.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) - # term=array(0,dim=c(p+intercept,p+intercept,R)) - # for(i in 1:R) { - # term[,,i]=matrix(temp[,i],p+intercept,p+intercept) - # } - # boot.cov = apply(term,1:2,function(x){Reduce("+",x)})/(R-1) - # boot.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM - # MM=boot.cov - - # # jacknife estimate of covariance - # jk.vec <- function(idx, Xordered, bbar) { - # sample=Xordered[-idx,] - # y=y[-idx] - # xa=sample[,1:length(bbar)] - # xnota=sample[,-c(1:length(bbar))] - # - # return(c(bbar,t(xnota)%*%(y-xa%*%bbar))) - # } - # - # jk.est = matrix(0,p+intercept,p+intercept) - # for(i in 1:n) { - # jk.est[i,]=jk.vec(i,Xordered,bbar) - # } - # jk.mean=colMeans(jk.est) - # jk.diff=t(jk.est-jk.mean) - # temp=apply(jk.diff,2,function(vec){vec=matrix(vec,ncol=1);return(vec%*%t(vec))}) - # term=array(0,dim=c(p+intercept,p+intercept,n)) - # for(i in 1:n) { - # term[,,i]=matrix(temp[,i],p+intercept,p+intercept) - # } - # jk.cov = (n-1)*apply(term,1:2,function(x){Reduce("+",x)})/n - # jk.cov[1:dim(MM)[1],1:dim(MM)[2]]=MM - # MM=jk.cov - - gnotm = g[-vars]*lambda bbar = matrix(c(bbar,gnotm),ncol=1) From 9112cc285662613f57a1187973a510cddb0f292e Mon Sep 17 00:00:00 2001 From: kevinbfry Date: Wed, 4 Apr 2018 08:40:49 -0700 Subject: [PATCH 55/55] Small cleanup for funs.fixed.R --- selectiveInference/R/funs.fixed.R | 46 ++++++++++--------------------- 1 file changed, 14 insertions(+), 32 deletions(-) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 8ac3a8c..d0435c8 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -54,24 +54,16 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co n = nrow(x) p = ncol(x) beta = as.numeric(beta) - if (intercept == F & length(beta) != p) stop("Since intercept=FALSE, beta must have length equal to ncol(x)") - if (intercept == T & length(beta) != p+1) stop("Since intercept=TRUE, beta must have length equal to ncol(x)+1") + # if (intercept == F & length(beta) != p) stop("Since intercept=FALSE, beta must have length equal to ncol(x)") + # if (intercept == T & length(beta) != p+1) stop("Since intercept=TRUE, beta must have length equal to ncol(x)+1") - - if (intercept == T) { - bbeta = beta[-1] - # m=beta[-1]!=0 #active set - vars = which(abs(bbeta) > tol.beta * sqrt(n / colSums(x^2))) - xm=cbind(1,x[,vars]) - bhat=c(beta[1],bbeta[vars]) # intcpt plus active vars - } else { - bbeta = beta - # m=beta!=0 #active set - vars = which(abs(bbeta) > tol.beta * sqrt(n / colSums(x^2))) - bhat=bbeta[vars] # active vars - xm=x[,vars] - } + bbeta = beta + # m=beta!=0 #active set + vars = which(abs(bbeta) > tol.beta * sqrt(n / colSums(x^2))) + bhat=bbeta[vars] # active vars + xm=matrix(x[,vars],ncol=length(vars)) + xnotm=x[,-vars] # vars = which(abs(bbeta) > tol.beta / sqrt(colSums(x^2))) @@ -96,7 +88,6 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co #check KKT g = t(x)%*%(y-xm%*%bhat)/lambda # negative gradient scaled by lambda - # print(g[which(abs(g)>1)]) if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) @@ -110,27 +101,19 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co MM = pinv(crossprod(xm))/sigma^2 # gradient at LASSO solution, first entry is 0 because intercept is unpenalized # at exact LASSO solution it should be s2[-1] - if (intercept == T) gm = c(0,-g[vars]*lambda) - else gm = -g[vars]*lambda + gm = -g[vars]*lambda dbeta = MM%*%gm bbar = bhat - dbeta - if (intercept == T) { - A1 = -(mydiag(s2))[-1,] - b1 = (s2*dbeta)[-1] - V = diag(length(bbar))[-1,] - } else { - A1 = -(mydiag(s2)) - b1 = (s2*dbeta) - V = diag(length(bbar)) - } + A1 = -(mydiag(s2)) + b1 = (s2*dbeta) + V = diag(length(bbar)) null_value = rep(0,nvar) - # if (p > n) { - if (type=="full") { + if (p > n && type=="full") { # Reorder so that active set is first @@ -166,8 +149,7 @@ fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","co gnotm = g[-vars]*lambda bbar = matrix(c(bbar,gnotm),ncol=1) - } - # } + } # Check polyhedral region tol.poly = 0.01