diff --git a/.travis.yml b/.travis.yml index aa61233..21270e0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,8 +7,10 @@ r: - devel addons: apt: - packages: libmpfr-dev + packages: libmpfr-dev warnings_are_errors: true before_install: - tlmgr install index # for texlive and vignette? + - R -e 'install.packages("Rcpp", repos="http://cloud.r-project.org")' + - make Rcpp - cd selectiveInference diff --git a/C-software/README.md b/C-software/README.md new file mode 100755 index 0000000..f8644cc --- /dev/null +++ b/C-software/README.md @@ -0,0 +1,3 @@ +This repo contains C code relevant for selective inference: http://github.com/selective-inference + +This code is meant to be used by R, python, etc. for the basic operations needed for selective inference. \ No newline at end of file diff --git a/C-software/src/debias.h b/C-software/src/debias.h new file mode 100755 index 0000000..da27467 --- /dev/null +++ b/C-software/src/debias.h @@ -0,0 +1,79 @@ +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +void multiply_by_2(double *X, int nval); + +int solve_qp(double *nndef_ptr, /* A non-negative definite matrix */ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* nndef times theta */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nfeature, /* How many features in nndef */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + double *theta_old, /* previous value */ + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int kkt_stop, /* Break based on KKT? */ + int objective_stop, /* Break based on convergence of objective value? */ + int param_stop); /* Break based on parameter convergence? */ + +int check_KKT_qp(double *theta, /* current theta */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int nrow, /* how many rows in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ + +int solve_wide(double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *X_theta_ptr, /* Fitted values */ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal entries of non-neg def matrix */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + int *need_update_ptr, /* Keeps track of updated gradient coords */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int ncase, /* How many rows in X */ + int nfeature, /* How many columns in X */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ + double *theta_ptr, /* current value */ + double *theta_old_ptr, /* previous value */ + int maxiter, /* max number of iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol, /* precision for checking relative decrease in objective value */ + double parameter_tol, /* precision for checking relative convergence of parameter */ + int max_active, /* Upper limit for size of active set -- otherwise break */ + int kkt_stop, /* Break based on KKT? */ + int objective_stop, /* Break based on convergence of objective value? */ + int param_stop); /* Break based on parameter convergence? */ + +int check_KKT_wide(double *theta_ptr, /* current theta */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase, /* how many rows in X */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol); /* precision for checking KKT conditions */ + +void update_gradient_wide(double *gradient_ptr, /* X^TX/ncase times theta + linear_func */ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int nfeature, /* how many columns in X */ + int ncase); /* how many rows in X */ + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/C-software/src/matrixcomps.c b/C-software/src/matrixcomps.c new file mode 100755 index 0000000..bec3506 --- /dev/null +++ b/C-software/src/matrixcomps.c @@ -0,0 +1,261 @@ +#include +#include + +// Matrices are stored as vectors, in column-major order + +// Givens rotation of a and b, stored in c and s +void givens(double a, double b, double *c, double *s) { + if (b==0) { + *c = 1; + *s = 0; + } + else { + if (fabs(b)>fabs(a)) { + double t = -a/b; + *s = 1/sqrt(1+t*t); + *c = (*s)*t; + } + else { + double t = -b/a; + *c = 1/sqrt(1+t*t); + *s = (*c)*t; + } + } +} + +// Givens rotation applied to rows i1 and i2 of the m x n +// matrix A, on the subset of columns j1 through j2 +void rowrot(double *A, int i1, int i2, int m, int n, int j1, int j2, double c, double s) { + int j; + double t1,t2; + for (j=j1; j<=j2; j++) { + t1 = A[i1+j*m]; + t2 = A[i2+j*m]; + A[i1+j*m] = c*t1-s*t2; + A[i2+j*m] = s*t1+c*t2; + } +} + +// Givens rotation applied to columns j1 and j2 of the m x n +// matrix A, on the subset of rows i1 through i2 +void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, double s) { + int i; + double t1,t2; + for (i=i1; i<=i2; i++) { + t1 = A[i+j1*m]; + t2 = A[i+j2*m]; + A[i+j1*m] = c*t1-s*t2; + A[i+j2*m] = s*t1+c*t2; + } +} + +// Downdate the QR factorization after deleting column j0, +// where Q1 is m x n and R is n x n. The other part of +// the Q matrix, Q2 m x (m-n), isn't needed so it isn't +// passed for efficiency +void downdate1(double *Q1, double *R, int j0, int m, int n) { + int j; + + double c,s; + for (j=j0+1; j=1; j--) { + // Compute the appropriate c and s + givens(w[j-1],w[j],&c,&s); + + // Pre-multiply w + rowrot(w,j-1,j,k,1,0,0,c,s); + + // Post-multiply Q2 + colrot(Q2,j-1,j,m,k,0,m-1,c,s); + } +} + +// Downdate the QR factorization after deleting the first row, +// where Q is m x m and R is m x n +void downdate2(double *Q, double *R, int *mp, int *np) { + int m,n,i; + m = *mp; + n = *np; + + double c,s; + for (i=m-1; i>=1; i--) { + // Compute the appropriate c and s + givens(Q[(i-1)*m],Q[i*m],&c,&s); + + // Post-mutiply Q + colrot(Q,i-1,i,m,m,0,m-1,c,s); + + // Pre-multiply R + if (i<=n) rowrot(R,i-1,i,m,n,i-1,n-1,c,s); + } +} + +// Update the QR factorization after adding the last row, +// where Q is m x m and R is m x n. For efficiency, Q is not +// passed, and only the first row of R is passed. Not counting +// its first row, the first q columns of R are zero +void update2(double *y, double *D, double *r, int *mp, int *np, int *qp) { + int m,n,q,j; + m = *mp; + n = *np; + q = *qp; + + double c,s; + for (j=0; j=0; i--) { + for (j=i; j=0; i--) { + for (j=i; j=0; i--) { + // Compute the appropriate c and s + givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); + + // Post-multiply R + colrot(R,i+q+1,i+q,m2,n,0,i,c,s); + + // Post-multiply D + colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); + + // Pre-multiply y + rowrot(y,i+q+1,i+q,n,1,0,0,c,s); + } +} + +// Make the R factor upper triangular, by Givens rotating +// its columns and rows, appropriately. Here A is m1 x n, +// Q is m2 x m2, and R is m2 x n with rank(R) = n-q-1. The +// first q columns of R are zero. The kth row of R is the +// last row with a zero element on the diagonal +void maketri4(double *y, double *A, double *Q, double *R, int *m1p, int *m2p, int *np, int *qp, int *kp) { + int m1,m2,n,q,k,i,j; + m1 = *m1p; + m2 = *m2p; + n = *np; + q = *qp; + k = *kp; + + double c,s; + + // First rotate the columns + for (i=k-1; i>=0; i--) { + // Compute the appropriate c and s + givens(R[i+(i+q+1)*m2],R[i+(i+q)*m2],&c,&s); + + // Post-multiply R + colrot(R,i+q+1,i+q,m2,n,0,i,c,s); + + // Post-multiply D + colrot(A,i+q+1,i+q,m1,n,0,m1-1,c,s); + + // Pre-multiply y + rowrot(y,i+q+1,i+q,n,1,0,0,c,s); + } + + // Next rotate the rows + for (j=k+q+1; j // for fabs + +// Find an approximate row of \hat{nndef}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +double objective_qp(double *nndef_ptr, /* A non-negative definite matrix */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta_ptr) /* current value */ +{ + int irow, icol; + double value = 0; + double *nndef_ptr_tmp = nndef_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta_ptr; + theta_col_ptr = theta_ptr; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +int check_KKT_qp_active(int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + double *theta_ptr, /* current theta */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int nfeature, /* how many features in nndef */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol) /* precision for checking KKT conditions */ +{ + // First check inactive + + int iactive; + double *theta_ptr_tmp; + double gradient; + double *gradient_ptr_tmp; + int nactive = *nactive_ptr; + int active_feature; + int *active_feature_ptr; + + for (iactive=0; iactive 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + + +double update_one_coord_qp(double *nndef_ptr, /* A non-negative definite matrix */ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* nndef times theta + linear_func */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nfeature, /* How many features in nndef */ + double bound, /* feasibility parameter */ + double *theta_ptr, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *nndef_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr_tmp; + int icol = 0; + + double *quadratic_ptr = ((double *) nndef_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr_tmp = ((double *) theta_ptr + coord); + old_value = *theta_ptr_tmp; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // nndef[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + nndef_ptr_tmp = ((double *) nndef_ptr + coord * nfeature); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol 0)) { + break; + } + old_value = new_value; + } + + } + + // Check size of active set + + if (*nactive_ptr >= max_active) { + break; + } + + } + return(iter); +} + diff --git a/C-software/src/quadratic_program_wide.c b/C-software/src/quadratic_program_wide.c new file mode 100755 index 0000000..ce7284b --- /dev/null +++ b/C-software/src/quadratic_program_wide.c @@ -0,0 +1,597 @@ +#include // for fabs + +// Find an approximate row of \hat{nndef}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \|X\theta\|^2/n + l^T\theta + \mu \|\theta\|_1 + \frac{\epsilon}{2} \|\theta\|^2_2 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +// Throughout X is a design matrix + +double objective_wide(double *X_theta_ptr, /* Fitted values */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int ncase, /* how many rows in X */ + int nfeature, /* how many columns in X */ + double *bound_ptr, /* Lagrange multiplers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double *theta_ptr) /* current value */ +{ + int icase, iactive; + double value = 0; + double *bound_ptr_tmp; + double *X_theta_ptr_tmp = X_theta_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_ptr_tmp; + int *active_feature_ptr; + int active_feature; + int nactive = *nactive_ptr; + + // The term \|X\theta\|^2_2/n, with n=ncase + + for (icase=0; icase 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { + return(0); + } + + } + else if (bound != 0) { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +int check_KKT_wide_active(int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + double *theta_ptr, /* current theta */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + double *X_theta_ptr, /* Current fitted values */ + double *X_ptr, /* Sqrt of non-neg def matrix -- X^TX/ncase = nndef */ + double *linear_func_ptr, /* Linear term in objective */ + int *need_update_ptr, /* Which coordinates need to be updated? */ + int ncase, /* how many rows in X */ + int nfeature, /* how many columns in X */ + double *bound_ptr, /* Lagrange multipliers for \ell_1 */ + double ridge_term, /* Ridge / ENet term */ + double tol) /* precision for checking KKT conditions */ +{ + // First check inactive + + int iactive; + double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; + double gradient; + int nactive = *nactive_ptr; + int active_feature; + int *active_feature_ptr; + + for (iactive=0; iactive 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr_tmp < 0) && (fabs(gradient + ridge_term * (*theta_ptr_tmp) - bound) > tol * bound)) { + return(0); + } + + } + else if (bound != 0) { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +double update_one_coord_wide(double *X_ptr, /* A design matrix*/ + double *linear_func_ptr, /* Linear term in objective */ + double *nndef_diag_ptr, /* Diagonal of nndef */ + double *gradient_ptr, /* X^TX/ncase times theta + linear_func*/ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + double *X_theta_ptr, /* X\theta -- fitted values */ + int *need_update_ptr, /* Whether a gradient coordinate needs update or not */ + int ncase, /* How many rows in X */ + int nfeature, /* How many rows in X */ + double *bound_ptr, /* Lagrange multipliers */ + double ridge_term, /* Ridge / ENet term */ + double *theta_ptr, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *X_ptr_tmp; + double *X_theta_ptr_tmp; + int *need_update_ptr_tmp; + double *theta_ptr_tmp; + double *bound_ptr_tmp; + double bound; + int ifeature, icase; + + double *diagonal_ptr = ((double *) nndef_diag_ptr + coord); + double diagonal_entry = *diagonal_ptr; + + linear_term = compute_gradient_coord(gradient_ptr, X_theta_ptr, X_ptr, linear_func_ptr, need_update_ptr, coord, ncase, nfeature); + + theta_ptr_tmp = ((double *) theta_ptr + coord); + old_value = *theta_ptr_tmp; + + bound_ptr_tmp = ((double *) bound_ptr + coord); + bound = *bound_ptr_tmp; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // (X^TX)[coord, coord] * theta[coord] / ncase + // This removes it. + + linear_term -= diagonal_entry * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto (q+eps)/2 * t^2 + l * t + bound |t| + + // with q=diagonal_entry and l=linear_term and eps=ridge_Term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / (diagonal_entry + ridge_term); + } + else if (linear_term > bound) { + value = -(linear_term - bound) / (diagonal_entry + ridge_term); + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active_wide(coord, ever_active_ptr, nactive_ptr); + } + + // Update X\theta if changed + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + // Set the update_gradient_ptr to 1 + + need_update_ptr_tmp = need_update_ptr; + for (ifeature=0; ifeature 0)) { + break; + } + old_value = new_value; + } + } + + // Check size of active set + + if (*nactive_ptr >= max_active) { + break; + } + + } + return(iter); +} + diff --git a/C-software/src/randomized_lasso.c b/C-software/src/randomized_lasso.c new file mode 100755 index 0000000..1f396b1 --- /dev/null +++ b/C-software/src/randomized_lasso.c @@ -0,0 +1,190 @@ +#include // for fabs + +// Augmented density for randomized LASSO after +// Gaussian randomization + +// Described in https://arxiv.org/abs/1609.05609 + +// Gaussian is product of IID N(0, noise_scale^2) density +// Evaluated at A_D D + A_O O + h + +// Laplace is product of IID Laplace with scale noise_scale +// Also evaluated at A_D D + A_O O + h + +// Matrices are assumed in column major order! + +double log_density_gaussian(double noise_scale, // Scale of randomization + int ndim, // Number of features -- "p" + int ninternal, // Dimension of internal data representation often 1 + int noptimization, // Dimension of optimization variables -- "p" + double *internal_linear, // A_D -- linear part for data + double *internal_state, // D -- data state + double *optimization_linear, // A_O -- linear part for optimization variables + double *optimization_state, // O -- optimization state + double *offset) // h -- offset in affine transform -- "p" dimensional +{ + int irow, icol; + double denom = 2 * noise_scale * noise_scale; + double value = 0; + double reconstruction = 0; + double *offset_ptr; + double *internal_linear_ptr; + double *internal_state_ptr; + double *optimization_linear_ptr; + double *optimization_state_ptr; + + for (irow=0; irow +#include + +// Take a Gibbs hit and run step along a given direction + +// Assumes the covariance is identity + +void gibbs_step(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *direction, /* direction we will take Gibbs step */ + double *U, /* A %*% state - b */ + double *alpha, /* A %*% direction */ + int nconstraint, /* number of rows of A */ + int nstate) /* dimension of state */ +{ + + int istate; + double value = 0; + + /* Compute V=\eta^Ty */ + + for (istate = 0; istate < nstate; istate++) { + value += direction[istate] * state[istate]; + } + + /* Compute upper and lower bounds */ + + double lower_bound = -1e12; + double upper_bound = 1e12; + double bound_val = 0; + double tol=1.e-7; + int iconstraint; + + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + + bound_val = -U[iconstraint] / alpha[iconstraint] + value; + + if ((alpha[iconstraint] > tol) && + (bound_val < upper_bound)) { + upper_bound = bound_val; + } + else if ((alpha[iconstraint] < -tol) && + (bound_val > lower_bound)) { + lower_bound = bound_val; + } + + } + + /* Ensure constraints are satisfied */ + + if (lower_bound > value) { + lower_bound = value - tol; + } + else if (upper_bound < value) { + upper_bound = value + tol; + } + + /* Check to see if constraints are satisfied */ + + /* if (lower_bound > upper_bound) { + + }*/ + + /* Now, take a step */ + + double tnorm; /* the 1D gaussian variable */ + double cdfU, cdfL, unif; /* temp variables */ + + if (upper_bound < -10) { + + /* use Exp approximation */ + /* the approximation is that */ + /* Z | lower_bound < Z < upper_bound */ + /* is fabs(upper_bound) * (upper_bound - Z) = E approx Exp(1) */ + /* so Z = upper_bound - E / fabs(upper_bound) */ + /* and the truncation of the exponential is */ + /* E < fabs(upper_bound - lower_bound) * fabs(upper_bound) = D */ + + /* this has distribution function (1 - exp(-x)) / (1 - exp(-D)) */ + /* so to draw from this distribution */ + /* we set E = - log(1 - U * (1 - exp(-D))) where U is Unif(0,1) */ + /* and Z (= tnorm below) is as stated */ + + unif = runif(0., 1.) * (1 - exp(-fabs((lower_bound - upper_bound) * upper_bound))); + tnorm = (upper_bound + log(1 - unif) / fabs(upper_bound)); + } + else if (lower_bound > 10) { + + /* here Z = lower_bound + E / fabs(lower_bound) (though lower_bound is positive) */ + /* and D = fabs((upper_bound - lower_bound) * lower_bound) */ + + unif = runif(0., 1.) * (1 - exp(-fabs((upper_bound - lower_bound) * lower_bound))); + tnorm = (lower_bound - log(1 - unif) / lower_bound); + } + else if (lower_bound < 0) { + cdfL = pnorm(lower_bound, 0., 1., 1, 0); + cdfU = pnorm(upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfU - cdfL) + cdfL; + if (unif < 0.5) { + tnorm = qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = -qnorm(1-unif, 0., 1., 1, 0); + } + } + else { + cdfL = pnorm(-lower_bound, 0., 1., 1, 0); + cdfU = pnorm(-upper_bound, 0., 1., 1, 0); + unif = runif(0., 1.) * (cdfL - cdfU) + cdfU; + if (unif < 0.5) { + tnorm = -qnorm(unif, 0., 1., 1, 0); + } + else { + tnorm = qnorm(1-unif, 0., 1., 1, 0); + } + } + + /* Now update the state and U */ + + double delta = tnorm - value; + + for (istate = 0; istate < nstate; istate++) { + state[istate] += delta * direction[istate]; + } + for (iconstraint = 0; iconstraint < nconstraint; iconstraint++) { + U[iconstraint] += delta * alpha[iconstraint] ; + } + + /* End of gibbs_step */ + +} + +void sample_truncnorm_white(double *state, /* state has law N(0,I) constrained to polyhedral set \{y:Ay \leq b\}*/ + double *U, /* A %*% state - b */ + double *directions, /* possible steps for sampler to take */ + /* assumed to be stored as list of columns of dimension nstate */ + /* has shape (nstate, ndirection) */ + double *alphas, /* The matrix A %*% directions */ + /* has shape (nconstraint, ndirection) */ + double *output, /* array in which to store samples */ + /* assumed will stored as list of vectors of dimension nstate */ + /* has shape (nstate, ndraw) */ + int *pnconstraint, /* number of rows of A */ + int *pndirection, /* the possible number of directions to choose from */ + /* `directions` should have size nstate*ndirection */ + int *pnstate, /* dimension of state */ + int *pburnin, /* number of burnin steps */ + int *pndraw) /* total number of samples to return */ +{ + + int iter_count; + int which_direction; + + int nconstraint = *pnconstraint; + int ndirection = *pndirection; + int nstate = *pnstate; + int burnin = *pburnin; + int ndraw = *pndraw; + + double *direction, *alpha; + + for (iter_count = 0; iter_count < burnin + ndraw; iter_count++) { + + which_direction = (int) floor(runif(0., 1.) * ndirection); + direction = ((double *) directions) + nstate * which_direction; + alpha = ((double *) alphas) + nconstraint * which_direction; + + /* take a step, which implicitly updates `state` and `U` */ + + gibbs_step(state, + direction, + U, + alpha, + nconstraint, + nstate); + + /* Store result if after burnin */ + + int istate; + if (iter_count >= burnin) { + for (istate = 0; istate < nstate; istate++) { + *output = state[istate]; + output++; + } + } + } + +} + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..91a89a4 --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +Rcpp: + - rm -f selectiveInference/src/RcppExports.cpp + - rm -f selectiveInference/R/RcppExports.R + Rscript -e "library(Rcpp); Rcpp::compileAttributes('selectiveInference')" + +install: Rcpp + R CMD install selectiveInference + +build: + R CMD build selectiveInference + +check: Rcpp build + R CMD build selectiveInference + R CMD check selectiveInference_1.2.2.tar.gz # fix this to be a script variable \ No newline at end of file diff --git a/README.md b/README.md index 962151e..0a60855 100644 --- a/README.md +++ b/README.md @@ -21,3 +21,11 @@ The latest release of the package can be installed through CRAN: install.packages("selectiveInference") ``` Code in repo is under development and may be unstable. + +## For development + +You will have to run + +``` +make Rcpp +``` \ No newline at end of file diff --git a/selectiveInference/DESCRIPTION b/selectiveInference/DESCRIPTION index d0740ec..d902622 100644 --- a/selectiveInference/DESCRIPTION +++ b/selectiveInference/DESCRIPTION @@ -18,3 +18,5 @@ Description: New tools for post-selection inference, for use with forward models. License: GPL-2 RoxygenNote: 5.0.1 +LinkingTo: Rcpp +Imports: Rcpp diff --git a/selectiveInference/NAMESPACE b/selectiveInference/NAMESPACE index 099fdc5..ab2a111 100644 --- a/selectiveInference/NAMESPACE +++ b/selectiveInference/NAMESPACE @@ -44,5 +44,5 @@ importFrom("stats", dnorm, lsfit, pexp, pnorm, predict, qnorm, rnorm, sd, uniroot, dchisq, model.matrix, pchisq) importFrom("stats", "coef", "df", "lm", "pf") importFrom("stats", "glm", "residuals", "vcov") - +importFrom("Rcpp", "sourceCpp") diff --git a/selectiveInference/R/RcppExports.R b/selectiveInference/R/RcppExports.R new file mode 100644 index 0000000..a65dc0b --- /dev/null +++ b/selectiveInference/R/RcppExports.R @@ -0,0 +1,19 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +solve_QP <- function(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) { + .Call('_selectiveInference_solve_QP', PACKAGE = 'selectiveInference', Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) +} + +find_one_row_debiasingM <- function(Sigma, row, bound, maxiter, theta, gradient, ever_active, nactive, kkt_tol, objective_tol) { + .Call('_selectiveInference_find_one_row_debiasingM', PACKAGE = 'selectiveInference', Sigma, row, bound, maxiter, theta, gradient, ever_active, nactive, kkt_tol, objective_tol) +} + +update1_ <- function(Q2, w, m, k) { + .Call('_selectiveInference_update1_', PACKAGE = 'selectiveInference', Q2, w, m, k) +} + +downdate1_ <- function(Q1, R, j0, m, n) { + .Call('_selectiveInference_downdate1_', PACKAGE = 'selectiveInference', Q1, R, j0, m, n) +} + diff --git a/selectiveInference/R/funs.common.R b/selectiveInference/R/funs.common.R index 042e074..678d736 100644 --- a/selectiveInference/R/funs.common.R +++ b/selectiveInference/R/funs.common.R @@ -152,13 +152,7 @@ updateQR <- function(Q1,Q2,R,col) { n = ncol(Q1) k = ncol(Q2) - a = .C("update1", - Q2=as.double(Q2), - w=as.double(t(Q2)%*%col), - m=as.integer(m), - k=as.integer(k), - dup=FALSE, - package="selectiveInference") + a = update1_(as.matrix(Q2), t(Q2)%*%col, m, k) # Rcpp call Q2 = matrix(a$Q2,nrow=m) w = c(t(Q1)%*%col,a$w) diff --git a/selectiveInference/R/funs.fixed.R b/selectiveInference/R/funs.fixed.R index 7073e2e..d0435c8 100644 --- a/selectiveInference/R/funs.fixed.R +++ b/selectiveInference/R/funs.fixed.R @@ -2,18 +2,18 @@ # for the solution of # min 1/2 || y - \beta_0 - X \beta ||_2^2 + \lambda || \beta ||_1 -fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, add.targets=NULL, status=NULL, -sigma=NULL, alpha=0.1, - type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE) { - +fixedLassoInf <- function(x, y, beta, lambda, family=c("gaussian","binomial","cox"),intercept=TRUE, status=NULL, + sigma=NULL, alpha=0.1, + type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE) { + family = match.arg(family) this.call = match.call() type = match.arg(type) if(family=="binomial") { - if(type!="partial") stop("Only type= partial allowed with binomial family") - out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type="partial", tol.beta=tol.beta, tol.kkt=tol.kkt, + # if(type!="partial") stop("Only type= partial allowed with binomial family") + out=fixedLogitLassoInf(x,y,beta,lambda,alpha=alpha, type=type, tol.beta=tol.beta, tol.kkt=tol.kkt, gridrange=gridrange, bits=bits, verbose=verbose,this.call=this.call) return(out) } @@ -31,28 +31,6 @@ sigma=NULL, alpha=0.1, checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") if (missing(lambda) || is.null(lambda)) stop("Must supply the tuning parameter value lambda") - - n = nrow(x) - p = ncol(x) - beta = as.numeric(beta) - if (type == "full") { - if (p > n) { - # need intercept (if there is one) for debiased lasso - hbeta = beta - if (intercept == T) { - if (length(beta) != p + 1) { - stop("Since type='full', p > n, and intercept=TRUE, beta must have length equal to ncol(x)+1") - } - # remove intercept if included - beta = beta[-1] - } else if (length(beta) != p) { - stop("Since family='gaussian', type='full' and intercept=FALSE, beta must have length equal to ncol(x)") - } - } - } else if (length(beta) != p) { - stop("Since family='gaussian' and type='partial', beta must have length equal to ncol(x)") - } - checkargs.misc(beta=beta,lambda=lambda,sigma=sigma,alpha=alpha, gridrange=gridrange,tol.beta=tol.beta,tol.kkt=tol.kkt) if (!is.null(bits) && !requireNamespace("Rmpfr",quietly=TRUE)) { @@ -60,12 +38,45 @@ sigma=NULL, alpha=0.1, bits = NULL } - if (!is.null(add.targets) && (!is.vector(add.targets) - || !all(is.numeric(add.targets)) || !all(add.targets==floor(add.targets)) - || !all(add.targets >= 1 && add.targets <= p))) { - stop("'add.targets' must be a vector of integers between 1 and p") + # Estimate sigma + if (is.null(sigma)) { + if (n >= 2*p) { + oo = intercept + sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) + } + else { + sigma = sd(y) + warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), + "you may want to use the estimateSigma function")) + } } + n = nrow(x) + p = ncol(x) + beta = as.numeric(beta) + # if (intercept == F & length(beta) != p) stop("Since intercept=FALSE, beta must have length equal to ncol(x)") + # if (intercept == T & length(beta) != p+1) stop("Since intercept=TRUE, beta must have length equal to ncol(x)+1") + + + bbeta = beta + # m=beta!=0 #active set + vars = which(abs(bbeta) > tol.beta * sqrt(n / colSums(x^2))) + bhat=bbeta[vars] # active vars + xm=matrix(x[,vars],ncol=length(vars)) + + xnotm=x[,-vars] + + # vars = which(abs(bbeta) > tol.beta / sqrt(colSums(x^2))) + nvar = length(vars) + if(nvar==0){ + cat("Empty model",fill=T) + return() + } + + pv=vlo=vup=sd=rep(NA, nvar) + vmat = matrix(0,nvar,n) + ci=tailarea=matrix(NA,nvar,2) + # If glmnet was run with an intercept term, center x and y if (intercept==TRUE) { obj = standardize(x,y,TRUE,FALSE) @@ -73,36 +84,76 @@ sigma=NULL, alpha=0.1, y = obj$y } - # Check the KKT conditions - g = t(x)%*%(y-x%*%beta) / lambda - if (any(abs(g) > 1+tol.kkt * sqrt(sum(y^2)))) + s2=sign(bhat) + + #check KKT + g = t(x)%*%(y-xm%*%bhat)/lambda # negative gradient scaled by lambda + if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) - tol.coef = tol.beta * sqrt(n^2 / colSums(x^2)) - # print(tol.coef) - vars = which(abs(beta) > tol.coef) - # print(beta) - # print(vars) - if(length(vars)==0){ - cat("Empty model",fill=T) - return() - } - if (any(sign(g[vars]) != sign(beta[vars]))) + if (any(sign(g[vars]) != sign(bbeta[vars]))) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances). You might try rerunning", "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - # Get lasso polyhedral region, of form Gy >= u - if (type == 'full' & p > n) out = fixedLasso.poly(x,y,beta,lambda,vars,inactive=TRUE) - else out = fixedLasso.poly(x,y,beta,lambda,vars) - G = out$G - u = out$u + MM = pinv(crossprod(xm))/sigma^2 + # gradient at LASSO solution, first entry is 0 because intercept is unpenalized + # at exact LASSO solution it should be s2[-1] + gm = -g[vars]*lambda + + dbeta = MM%*%gm + + bbar = bhat - dbeta + + A1 = -(mydiag(s2)) + b1 = (s2*dbeta) + V = diag(length(bbar)) + + null_value = rep(0,nvar) + + if (p > n && type=="full") { + + + # Reorder so that active set is first + Xordered = cbind(xm,xnotm) + + hsigma <- 1/n*(t(Xordered)%*%Xordered) + + M <- InverseLinfty(hsigma,n,dim(xm)[2],verbose=F)[-1,] # remove intercept row + I <- diag(dim(xm)[2])[-1,] + if (intercept) Msubset <- M[,-c(1,vars+1)] + else Msubset <- M[,-vars] + B <- cbind(I,Msubset/sqrt(n)) + V = B + + c <- matrix(c(gm,t(xnotm)%*%xm%*%(-dbeta)),ncol=1) + d <- -dbeta[-1] # remove intercept + + null_value = -(M%*%c/sqrt(n) - d) + + A0 = matrix(0,ncol(xnotm),ncol(A1)) + A0 = cbind(A0,diag(nrow(A0))) + fill = matrix(0,nrow(A1),nrow(A0)) + A1 = cbind(A1,fill) + A1 = rbind(A1,A0,-A0) + + b1 = matrix(c(b1,rep(lambda,2*nrow(A0))),ncol=1) + + # full covariance + MMbr = (crossprod(xnotm) - t(xnotm)%*%xm%*%pinv(crossprod(xm))%*%t(xm)%*%xnotm)*sigma^2 + MM = cbind(MM,matrix(0,nrow(MM),ncol(MMbr))) + MMbr = cbind(matrix(0,nrow(MMbr),nrow(MM)),MMbr) + MM = rbind(MM,MMbr) + + gnotm = g[-vars]*lambda + bbar = matrix(c(bbar,gnotm),ncol=1) + } # Check polyhedral region tol.poly = 0.01 - if (min(G %*% y - u) < -tol.poly * sqrt(sum(y^2))) + if (max((A1 %*% bbar) - b1) > tol.poly) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", "more accurately. With glmnet, make sure to use exact=TRUE in coef(),", "and check whether the specified value of lambda is too small", @@ -110,114 +161,45 @@ sigma=NULL, alpha=0.1, "You might also try rerunning glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - # Estimate sigma - if (is.null(sigma)) { - if (n >= 2*p) { - oo = intercept - sigma = sqrt(sum(lsfit(x,y,intercept=oo)$res^2)/(n-p-oo)) - } - else { - sigma = sd(y) - warning(paste(sprintf("p > n/2, and sd(y) = %0.3f used as an estimate of sigma;",sigma), - "you may want to use the estimateSigma function")) - } - } + sign = numeric(nvar) + coef0 = numeric(nvar) - # add additional targets for inference if provided - if (!is.null(add.targets)) vars = sort(unique(c(vars,add.targets,recursive=T))) - - k = length(vars) - pv = vlo = vup = numeric(k) - vmat = matrix(0,k,n) - ci = tailarea = matrix(0,k,2) - sign = numeric(k) - - if (type=="full" & p > n) { - if (intercept == T) { - pp=p+1 - Xint <- cbind(rep(1,n),x) - # indices of selected predictors - S = c(1,vars + 1) - } else { - pp=p - Xint <- x - # indices of selected predictors - S = vars - # notS = which(abs(beta) <= tol.coef) - } + for (j in 1:nvar) { + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - notS = setdiff(1:pp,S) + vj = V[j,] + # mj = sqrt(sum(vj^2)) + # vj = vj/mj + coef0[j] = sum(vj * bbar) + sign[j] = sign(coef0[j]) + vj = vj * sign[j] - XS = Xint[,S] - hbetaS = hbeta[S] + limits.info = TG.limits(bbar, A1, b1, vj, Sigma=MM) + if(is.null(limits.info)) return(list(pv=NULL,MM=MM,eta=vj)) + a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) + pv[j] = a$pv + vlo[j] = a$vlo # * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup # * mj # Unstandardize (mult by norm of vj) + sd[j] = a$sd # * mj # Unstandardize (mult by norm of vj) - # Reorder so that active set S is first - Xordered = Xint[,c(S,notS,recursive=T)] - - hsigma <- 1/n*(t(Xordered)%*%Xordered) - hsigmaS <- 1/n*(t(XS)%*%XS) # hsigma[S,S] - hsigmaSinv <- solve(hsigmaS) # pinv(hsigmaS) - - # Approximate inverse covariance matrix for when (n < p) from lasso_Inference.R - htheta <- InverseLinfty(hsigma, n, length(S), verbose=FALSE) - # htheta <- InverseLinfty(hsigma, n, verbose=FALSE) - - FS = rbind(diag(length(S)),matrix(0,pp-length(S),length(S))) - GS = cbind(diag(length(S)),matrix(0,length(S),pp-length(S))) - ithetasigma = (GS-(htheta%*%hsigma)) - # ithetasigma = (diag(pp) - (htheta%*%hsigma)) - - M <- (((htheta%*%t(Xordered))+ithetasigma%*%FS%*%hsigmaSinv%*%t(XS))/n) - # vector which is offset for testing debiased beta's - null_value <- (((ithetasigma%*%FS%*%hsigmaSinv)%*%sign(hbetaS))*lambda/n) - if (intercept == T) { - M = M[-1,] # remove intercept row - null_value = null_value[-1] # remove intercept element - } - } else if (type=="partial" || p > n) { - xa = x[,vars,drop=F] - M = pinv(crossprod(xa)) %*% t(xa) - null_value = rep(0,k) - } else { - M = pinv(crossprod(x)) %*% t(x) - M = M[vars,,drop=F] - null_value = rep(0,k) + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) + ci[j,] = (a$int-null_value[j]) # * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea } - - for (j in 1:k) { - if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) - - vj = M[j,] - mj = sqrt(sum(vj^2)) - vj = vj / mj # Standardize (divide by norm of vj) - sign[j] = sign(sum(vj*y)) - vj = sign[j] * vj - - limits.info = TG.limits(y, -G, -u, vj, Sigma=diag(rep(sigma^2, n))) - a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) - pv[j] = a$pv - vlo[j] = a$vlo * mj # Unstandardize (mult by norm of vj) - vup[j] = a$vup * mj # Unstandardize (mult by norm of vj) - vmat[j,] = vj * mj * sign[j] # Unstandardize (mult by norm of vj) - - a = TG.interval.base(limits.info, - alpha=alpha, - gridrange=gridrange, - flip=(sign[j]==-1), - bits=bits) - ci[j,] = (a$int-null_value[j]) * mj # Unstandardize (mult by norm of vj) - tailarea[j,] = a$tailarea + + out = list(type=type,lambda=lambda,pv=pv,ci=ci, + tailarea=tailarea,vlo=vlo,vup=vup,y=y, + vars=vars,sign=sign,sigma=sigma,alpha=alpha, + sd=sd, + coef0=coef0, + call=this.call) + class(out) = "fixedLassoInf" + return(out) } - - out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,vmat=vmat,y=y, - vars=vars,sign=sign,sigma=sigma,alpha=alpha, - sd=sigma*sqrt(rowSums(vmat^2)), - coef0=vmat%*%y, - call=this.call) - class(out) = "fixedLassoInf" - return(out) -} } ############################# @@ -269,8 +251,7 @@ fixedLasso.poly= ### Functions borrowed and slightly modified from lasso_inference.R ## Approximates inverse covariance matrix theta -InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { - # InverseLinfty <- function(sigma, n, resol=1.5, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE) { +InverseLinfty <- function(sigma, n, e, resol=1.2, mu=NULL, maxiter=50, threshold=1e-2, verbose = TRUE, max.try=10) { isgiven <- 1; if (is.null(mu)){ isgiven <- 0; @@ -292,12 +273,15 @@ InverseLinfty <- function(sigma, n, e, resol=1.5, mu=NULL, maxiter=50, threshold mu.stop <- 0; try.no <- 1; incr <- 0; - while ((mu.stop != 1)&&(try.no<10)){ + + output = NULL + + while ((mu.stop != 1) && (try.no= mu0){ - beta[i] <- (1-mu0)/sigma[i,i]; - returnlist <- list("optsol" = beta, "iter" = 0); - return(returnlist); +InverseLinftyOneRow <- function (Sigma, i, mu, maxiter=50, soln_result=NULL, kkt_tol=1.e-6, objective_tol=1.e-6, + use_QP=TRUE) { + + # If soln_result is not Null, it is used as a warm start. + # It should be a list + # with entries "soln", "gradient", "ever_active", "nactive" + + p = nrow(Sigma) + + if (is.null(soln_result)) { + soln = rep(0, p) + ever_active = rep(0, p) + ever_active[1] = i # 1-based + ever_active = as.integer(ever_active) + nactive = as.integer(1) + if (use_QP) { + linear_func = rep(0, p) + linear_func[i] = -1 + linear_func = as.numeric(linear_func) + gradient = 1. * linear_func + } else { + gradient = rep(0, p) + } } - - diff.norm2 <- 1; - last.norm2 <- 1; - iter <- 1; - iter.old <- 1; - beta[i] <- (1-mu0)/sigma[i,i]; - beta.old <- beta; - sigma.tilde <- sigma; - diag(sigma.tilde) <- 0; - vs <- -sigma.tilde%*%beta; - - while ((iter <= maxiter) && (diff.norm2 >= threshold*last.norm2)){ - - for (j in 1:p){ - oldval <- beta[j]; - v <- vs[j]; - if (j==i) - v <- v+1; - beta[j] <- SoftThreshold(v,mu)/sigma[j,j]; - if (oldval != beta[j]){ - vs <- vs + (oldval-beta[j])*sigma.tilde[,j]; - } - } - - iter <- iter + 1; - if (iter==2*iter.old){ - d <- beta - beta.old; - diff.norm2 <- sqrt(sum(d*d)); - last.norm2 <-sqrt(sum(beta*beta)); - iter.old <- iter; - beta.old <- beta; - if (iter>10) - vs <- -sigma.tilde%*%beta; - } + else { + soln = soln_result$soln + gradient = soln_result$gradient + ever_active = as.integer(soln_result$ever_active) + nactive = as.integer(soln_result$nactive) + if (use_QP) { + linear_func = soln_result$linear_func + } } - - returnlist <- list("optsol" = beta, "iter" = iter) - return(returnlist) -} -SoftThreshold <- function( x, lambda ) { - # - # Standard soft thresholding - # - if (x>lambda){ - return (x-lambda);} - else { - if (x< (-lambda)){ - return (x+lambda);} - else { - return (0); } + if (use_QP) { + result = solve_QP(Sigma, mu, maxiter, soln, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol) + } else { + result = find_one_row_debiasingM(Sigma, i, mu, maxiter, soln, gradient, ever_active, nactive, kkt_tol, objective_tol) # C function uses 1-based indexing for active set } + + # Check feasibility + + if (!result$kkt_check) { + warning("Solution for row of M does not seem to be feasible") + } + + return(result) + } ############################## diff --git a/selectiveInference/R/funs.fixedLogit.R b/selectiveInference/R/funs.fixedLogit.R index 19936b0..230110b 100644 --- a/selectiveInference/R/funs.fixedLogit.R +++ b/selectiveInference/R/funs.fixedLogit.R @@ -1,8 +1,8 @@ -fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ - - +fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, + gridrange=c(-100,100), bits=NULL, verbose=FALSE,this.call=NULL){ + + type = match.arg(type) checkargs.xy(x,y) if (missing(beta) || is.null(beta)) stop("Must supply the solution beta") @@ -13,68 +13,152 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet warning("Package Rmpfr is not installed, reverting to standard precision") bits = NULL } - - - n=length(y) - p=ncol(x) - # I assume that intcpt was used - if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") - nvar=sum(beta[-1]!=0) - pv=vlo=vup=sd=rep(NA, nvar) - ci=tailarea=matrix(NA,nvar,2) - -#do we need to worry about standardization? - -# obj = standardize(x,y,TRUE,FALSE) + + + n=length(y) + p=ncol(x) + # I assume that intcpt was used + if(length(beta)!=p+1) stop("Since family='binomial', beta must be of length ncol(x)+1, that is, it should include an intercept") + vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + nvar=length(vars) + pv=vlo=vup=sd=rep(NA, nvar) + ci=tailarea=matrix(NA,nvar,2) + + #do we need to worry about standardization? + + # obj = standardize(x,y,TRUE,FALSE) # x = obj$x # y = obj$y - m=beta[-1]!=0 #active set - - bhat=c(beta[1],beta[-1][beta[-1]!=0]) # intcpt plus active vars - s2=sign(bhat) - lam2m=diag(c(0,rep(lambda,sum(m)))) - - - xxm=cbind(1,x[,m]) - - etahat = xxm %*% bhat - prhat = as.vector(exp(etahat) / (1 + exp(etahat))) - ww=prhat*(1-prhat) - # w=diag(ww) - -#check KKT - z=etahat+(y-prhat)/ww + # m=beta[-1]!=0 #active set + + bhat=c(beta[1],beta[-1][vars]) # intcpt plus active vars + s2=sign(bhat) + lam2m=diag(c(0,rep(lambda,nvar))) + + + xm=cbind(1,x[,vars]) + xnotm=x[,-vars] + + etahat = xm %*% bhat + prhat = as.vector(exp(etahat) / (1 + exp(etahat))) + ww=prhat*(1-prhat) + w=diag(ww) + + #check KKT + z=etahat+(y-prhat)/ww # g= t(x)%*%w%*%(z-etahat)/lambda # negative gradient scaled by lambda g=scale(t(x),FALSE,1/ww)%*%(z-etahat)/lambda # negative gradient scaled by lambda - if (any(abs(g) > 1+tol.kkt) ) + if (any(abs(g) > 1+tol.kkt) ) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances)")) - - vars = which(abs(beta[-1]) > tol.beta / sqrt(colSums(x^2))) + if(length(vars)==0){ - cat("Empty model",fill=T) - return() + cat("Empty model",fill=T) + return() } if (any(sign(g[vars]) != sign(beta[-1][vars]))) warning(paste("Solution beta does not satisfy the KKT conditions", "(to within specified tolerances). You might try rerunning", "glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) + # warnings(paste(g[vars],beta[-1][vars])) - #constraints for active variables - # MM=solve(t(xxm)%*%w%*%xxm) - MM=solve(scale(t(xxm),F,1/ww)%*%xxm) + #constraints for active variables + # MM=solve(t(xxm)%*%w%*%xxm) + MM=solve(scale(t(xm),F,1/ww)%*%xm) gm = c(0,-g[vars]*lambda) # gradient at LASSO solution, first entry is 0 because intercept is unpenalized - # at exact LASSO solution it should be s2[-1] + # at exact LASSO solution it should be s2[-1] dbeta = MM %*% gm - + # bbar=(bhat+lam2m%*%MM%*%s2) # JT: this is wrong, shouldn't use sign of intercept anywhere... bbar = bhat - dbeta - - A1=-(mydiag(s2))[-1,] + + A1= matrix(-(mydiag(s2))[-1,],nrow=length(s2)-1) b1= (s2 * dbeta)[-1] - + V = diag(length(bbar))[-1,] + null_value = rep(0,nvar) + + if (type=='full') { + Xordered = cbind(xm,xnotm) + + hsigma <- 1/n*(t(Xordered)%*%w%*%Xordered) + + M <- matrix(InverseLinfty(hsigma,n,dim(xm)[2],verbose=F,max.try=10),ncol=p+1)[-1,] # remove intercept row + # I <- matrix(diag(dim(xm)[2]),nrow=dim(xm)[2]) + I <- matrix(diag(dim(xm)[2])[-1,],nrow=dim(xm)[2]-1) + if (is.null(dim(M))) Msubset <- M[-c(1,vars+1)] + else Msubset <- M[,-c(1,vars+1)] + # Msubset = matrix(Msubset,nrow=dim(xm)[2]) + # Msubset = matrix(Msubset,nrow=dim(xm)[2]-1) + # print("*********") + # print(Msubset) + # print(I) + # print("*********") + V <- cbind(I,Msubset/sqrt(n)) # matrix(cbind(I,Msubset/sqrt(n)),nrow=dim(xm)[2]-1) + + c <- matrix(c(gm,t(xnotm)%*%w%*%xm%*%(-dbeta)),ncol=1) + d <- -dbeta[-1] # remove intercept + + # d <- matrix(-dbeta,ncol=1) # remove intercept + # d <- matrix(-dbeta*lambda,ncol=1) # remove intercept + # print(dim(xnotm)) + # print(dim(w)) + # print(dim(xm)) + # c <- matrix(c(gm,t(xnotm)%*%w%*%xm%*%d),ncol=1) + # c <- matrix(c(gm[-1],t(xnotm)%*%w%*%xm%*%d),ncol=1) + # d <- d[-1,] + + # print(dim(c)) + # print(dim(M)) + # print(length(d)) + + null_value = -(M%*%c/sqrt(n) - d) + # null_value = -(M[,-1]%*%c/sqrt(n) - d) + + # A0 = (t(xnotm)%*%w%*%xm)/lambda + # A0 = cbind(A0,matrix(0,nrow(A0),ncol(xnotm))) + # A0 = rbind(A0,-A0) + # + # b0 = matrix(t(xnotm)%*%w%*%(z/lambda+xm%*%MM%*%gm),ncol=1) + # + # + # print("------") + # print(dim(A0)) + # print(dim(b0)) + # print(length(b1)) + + + # b1 = rbind(1+b0,1-b0,matrix(b1,ncol=1)) + + A0 = matrix(0,ncol(xnotm),ncol(A1)) + A0 = cbind(A0,diag(nrow(A0))) + fill = matrix(0,nrow(A1),ncol(xnotm)) + A1 = cbind(A1,fill) + A1 = rbind(A1,A0,-A0) + + b1 = matrix(c(b1,rep(lambda,2*nrow(A0))),ncol=1) + + # full covariance + MMbr = (t(xnotm)%*%w%*%xnotm - t(xnotm)%*%w%*%xm%*%MM%*%t(xm)%*%w%*%xnotm) # matrix(0,ncol(xnotm),ncol(xnotm)) + MM = cbind(MM,matrix(0,nrow(MM),ncol(MMbr))) + MMbr = cbind(matrix(0,nrow(MMbr),nrow(MM)),MMbr) + # print(dim(MM)) + # print(dim(MMbr)) + MM = rbind(MM,MMbr) + + gnotm = g[-vars]*lambda + bbar = matrix(c(bbar,gnotm),ncol=1) + + # print(dim(A1)) + # print(dim(b1)) + # print(dim(bbar)) + # print(dim(V)) + } + + + if (is.null(dim(V))) V=matrix(V,nrow=1) + tol.poly = 0.01 if (max((A1 %*% bbar) - b1) > tol.poly) stop(paste("Polyhedral constraints not satisfied; you must recompute beta", @@ -83,57 +167,70 @@ fixedLogitLassoInf=function(x,y,beta,lambda,alpha=.1, type=c("partial"), tol.bet "(beyond the grid of values visited by glmnet).", "You might also try rerunning glmnet with a lower setting of the", "'thresh' parameter, for a more accurate convergence.")) - - - for(jj in 1:sum(m)){ - vj=c(rep(0,sum(m)+1));vj[jj+1]=s2[jj+1] - # compute p-values - junk=TG.pvalue(bbar, A1, b1, vj, MM) - pv[jj] = junk$pv - - vlo[jj]=junk$vlo - vup[jj]=junk$vup - sd[jj]=junk$sd - - junk2=TG.interval(bbar, A1, b1, vj, MM,alpha=alpha) - - ci[jj,]=junk2$int - tailarea[jj,] = junk2$tailarea - } - - # JT: these are not the one step estimators but they are close - fit0=glm(y~x[,m],family="binomial") - sfit0=summary(fit0) - coef0=bbar[-1] #fit0$coef[-1] - se0=sqrt(diag(MM)[-1]) # sfit0$cov.scaled)[-1]) - zscore0=coef0/se0 - + + sign=numeric(nvar) + coef0=numeric(nvar) + + + for(j in 1:nvar){ + if (verbose) cat(sprintf("Inference for variable %i ...\n",vars[j])) + + if (is.null(dim(V))) vj = V + else vj = matrix(V[j,],nrow=1) + coef0[j] = vj%*%bbar # sum(vj * bbar) + sign[j] = sign(coef0[j]) + vj = vj * sign[j] + # print(dim(MM)) + # print(dim(vj)) + # print(nvar) + + # compute p-values + limits.info = TG.limits(bbar, A1, b1, vj, Sigma=MM) + # if(is.null(limits.info)) return(list(pv=NULL,MM=MM,eta=vj)) + a = TG.pvalue.base(limits.info, null_value=null_value[j], bits=bits) + pv[j] = a$pv + vlo[j] = a$vlo # * mj # Unstandardize (mult by norm of vj) + vup[j] = a$vup # * mj # Unstandardize (mult by norm of vj) + sd[j] = a$sd # * mj # Unstandardize (mult by norm of vj) + + a = TG.interval.base(limits.info, + alpha=alpha, + gridrange=gridrange, + flip=(sign[j]==-1), + bits=bits) + ci[j,] = (a$int-null_value[j]) # * mj # Unstandardize (mult by norm of vj) + tailarea[j,] = a$tailarea + } + + se0 = sqrt(diag(V%*%MM%*%t(V))) + zscore0 = (coef0+null_value)/se0 + out = list(type=type,lambda=lambda,pv=pv,ci=ci, - tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, - vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, - call=this.call, - info.matrix=MM) # info.matrix is output just for debugging purposes at the moment + tailarea=tailarea,vlo=vlo,vup=vup,sd=sd, + vars=vars,alpha=alpha,coef0=coef0,zscore0=zscore0, + call=this.call, + info.matrix=MM) # info.matrix is output just for debugging purposes at the moment class(out) = "fixedLogitLassoInf" return(out) - - } + +} print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { cat("\nCall:\n") dput(x$call) - + cat(sprintf("\nStandard deviation of noise (specified or estimated) sigma = %0.3f\n", x$sigma)) cat(sprintf("\nTesting results at lambda = %0.3f, with alpha = %0.3f\n",x$lambda,x$alpha)) cat("",fill=T) tab = cbind(x$vars, - round(x$coef0,3), - round(x$zscore0,3), - round(x$pv,3),round(x$ci,3)) + round(x$coef0,3), + round(x$zscore0,3), + round(x$pv,3),round(x$ci,3)) colnames(tab) = c("Var", "Coef", "Z-score", "P-value", "LowConfPt", "UpConfPt") if (tailarea) { tab = cbind(tab,round(x$tailarea,3)) @@ -141,7 +238,7 @@ print.fixedLogitLassoInf <- function(x, tailarea=TRUE, ...) { } rownames(tab) = rep("",nrow(tab)) print(tab) - + cat(sprintf("\nNote: coefficients shown are %s regression coefficients\n", ifelse(x$type=="partial","partial","full"))) invisible() diff --git a/selectiveInference/R/funs.lar.R b/selectiveInference/R/funs.lar.R index eb31b4e..a01b2d3 100644 --- a/selectiveInference/R/funs.lar.R +++ b/selectiveInference/R/funs.lar.R @@ -254,14 +254,7 @@ downdateQR <- function(Q1,Q2,R,col) { m = nrow(Q1) n = ncol(Q1) - a = .C("downdate1", - Q1=as.double(Q1), - R=as.double(R), - col=as.integer(col-1), - m=as.integer(m), - n=as.integer(n), - dup=FALSE, - package="selectiveInference") + a = downdate1_(as.matrix(Q1), R, col, m, n) # Rcpp call Q1 = matrix(a$Q1,nrow=m) R = matrix(a$R,nrow=n) diff --git a/selectiveInference/man/fixedLassoInf.Rd b/selectiveInference/man/fixedLassoInf.Rd index d675c3f..bd740f8 100644 --- a/selectiveInference/man/fixedLassoInf.Rd +++ b/selectiveInference/man/fixedLassoInf.Rd @@ -10,9 +10,9 @@ fixed value of the tuning parameter lambda } \usage{ fixedLassoInf(x, y, beta, lambda, family = c("gaussian", "binomial", - "cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, + "cox"),intercept=TRUE, add.targets=NULL, status=NULL, sigma=NULL, alpha=0.1, type=c("partial","full"), tol.beta=1e-5, tol.kkt=0.1, - gridrange=c(-100,100), bits=NULL, verbose=FALSE) + gridrange=c(-100,100), bits=NULL, verbose=FALSE, linesearch.try=10) } \arguments{ \item{x}{ @@ -85,7 +85,11 @@ helpful (though computationally more costly). In particular, extra precision mig if the values in the output columns of \code{tailarea} differ noticeably from alpha/2. } \item{verbose}{ -Print out progress along the way? Default is FALSE} +Print out progress along the way? Default is FALSE +} +\item{linesearch.try}{ +When running type="full" (i.e. debiased LASSO) how many attempts in the line search? +} } \details{ @@ -145,7 +149,7 @@ p = 10 sigma = 1 x = matrix(rnorm(n*p),n,p) -x=scale(x,TRUE,TRUE) +x = scale(x,TRUE,TRUE) beta = c(3,2,rep(0,p-2)) y = x\%*\%beta + sigma*rnorm(n) @@ -165,10 +169,10 @@ out ## as above, but use lar function instead to get initial ## lasso fit (should get same results) - lfit = lar(x,y,normalize=FALSE) - beta = coef(lfit,s=lambda,mode="lambda") - out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) - out2 +lfit = lar(x,y,normalize=FALSE) +beta = coef(lfit,s=lambda,mode="lambda") +out2 = fixedLassoInf(x,y,beta,lambda,sigma=sigma) +out2 ## mimic different penalty factors by first scaling x set.seed(43) @@ -249,5 +253,30 @@ status=sample(c(0,1),size=n,replace=TRUE) # compute fixed lambda p-values and selection intervals out = fixedLassoInf(x,tim,beta_hat,lambda,status=status,family="cox") out + +# Debiased lasso or "full" + +n = 50 +p = 100 +sigma = 1 + +x = matrix(rnorm(n*p),n,p) +x = scale(x,TRUE,TRUE) + +beta = c(3,2,rep(0,p-2)) +y = x\%*\%beta + sigma*rnorm(n) + +# first run glmnet +gfit = glmnet(x, y, standardize=FALSE, intercept=FALSE) + +# extract coef for a given lambda; note the 1/n factor! +# (and we don't save the intercept term) +lambda = 2.8 +beta = coef(gfit, s=lambda/n, exact=TRUE)[-1] + +# compute fixed lambda p-values and selection intervals +out = fixedLassoInf(x, y, beta, lambda, sigma=sigma, type='full', intercept=FALSE) +out + } \ No newline at end of file diff --git a/selectiveInference/src/Makevars b/selectiveInference/src/Makevars new file mode 100644 index 0000000..cde36c9 --- /dev/null +++ b/selectiveInference/src/Makevars @@ -0,0 +1,11 @@ +PKG_CFLAGS= -I. +PKG_CPPFLAGS= -I. +PKG_LIBS=-L. + +$(SHLIB): Rcpp Rcpp-matrixcomps.o Rcpp-debias.o RcppExports.o debias.o + +clean: + rm -f *o + +Rcpp: + Rscript -e "library(Rcpp); Rcpp::compileAttributes('..')" \ No newline at end of file diff --git a/selectiveInference/src/Rcpp-debias.cpp b/selectiveInference/src/Rcpp-debias.cpp new file mode 100644 index 0000000..85338b4 --- /dev/null +++ b/selectiveInference/src/Rcpp-debias.cpp @@ -0,0 +1,125 @@ +#include // need to include the main Rcpp header file +#include // where find_one_row_void is defined + +// Below, the gradient should be equal to Sigma * theta + linear_func!! +// No check is done on this. + +// [[Rcpp::export]] +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, + double bound, + int maxiter, + Rcpp::NumericVector theta, + Rcpp::NumericVector linear_func, + Rcpp::NumericVector gradient, + Rcpp::IntegerVector ever_active, + Rcpp::IntegerVector nactive, + double kkt_tol, + double objective_tol + ) { + + int nrow = Sigma.nrow(); // number of features + + // Active set + + int irow; + + // Extract the diagonal + Rcpp::NumericVector Sigma_diag(nrow); + double *sigma_diag_p = Sigma_diag.begin(); + + for (irow=0; irow // need to include the main Rcpp header file +#include // where update1, downdate1 are defined + +// [[Rcpp::export]] +Rcpp::List update1_(Rcpp::NumericMatrix Q2, + Rcpp::NumericVector w, + int m, + int k) { + + update1(Q2.begin(), + w.begin(), + m, + k); + + return(Rcpp::List::create(Rcpp::Named("Q2") = Q2, + Rcpp::Named("w") = w)); +} + +// [[Rcpp::export]] +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, + Rcpp::NumericMatrix R, + int j0, + int m, + int n) { + + downdate1(Q1.begin(), + R.begin(), + j0, + m, + n); + + return(Rcpp::List::create(Rcpp::Named("Q1") = Q1, + Rcpp::Named("R") = R)); +} diff --git a/selectiveInference/src/RcppExports.cpp b/selectiveInference/src/RcppExports.cpp new file mode 100644 index 0000000..c05f594 --- /dev/null +++ b/selectiveInference/src/RcppExports.cpp @@ -0,0 +1,89 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// solve_QP +Rcpp::List solve_QP(Rcpp::NumericMatrix Sigma, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector linear_func, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol); +RcppExport SEXP _selectiveInference_solve_QP(SEXP SigmaSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP linear_funcSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); + Rcpp::traits::input_parameter< double >::type bound(boundSEXP); + Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type linear_func(linear_funcSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); + Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); + Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); + rcpp_result_gen = Rcpp::wrap(solve_QP(Sigma, bound, maxiter, theta, linear_func, gradient, ever_active, nactive, kkt_tol, objective_tol)); + return rcpp_result_gen; +END_RCPP +} +// find_one_row_debiasingM +Rcpp::List find_one_row_debiasingM(Rcpp::NumericMatrix Sigma, int row, double bound, int maxiter, Rcpp::NumericVector theta, Rcpp::NumericVector gradient, Rcpp::IntegerVector ever_active, Rcpp::IntegerVector nactive, double kkt_tol, double objective_tol); +RcppExport SEXP _selectiveInference_find_one_row_debiasingM(SEXP SigmaSEXP, SEXP rowSEXP, SEXP boundSEXP, SEXP maxiterSEXP, SEXP thetaSEXP, SEXP gradientSEXP, SEXP ever_activeSEXP, SEXP nactiveSEXP, SEXP kkt_tolSEXP, SEXP objective_tolSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Sigma(SigmaSEXP); + Rcpp::traits::input_parameter< int >::type row(rowSEXP); + Rcpp::traits::input_parameter< double >::type bound(boundSEXP); + Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type theta(thetaSEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type gradient(gradientSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ever_active(ever_activeSEXP); + Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type nactive(nactiveSEXP); + Rcpp::traits::input_parameter< double >::type kkt_tol(kkt_tolSEXP); + Rcpp::traits::input_parameter< double >::type objective_tol(objective_tolSEXP); + rcpp_result_gen = Rcpp::wrap(find_one_row_debiasingM(Sigma, row, bound, maxiter, theta, gradient, ever_active, nactive, kkt_tol, objective_tol)); + return rcpp_result_gen; +END_RCPP +} +// update1_ +Rcpp::List update1_(Rcpp::NumericMatrix Q2, Rcpp::NumericVector w, int m, int k); +RcppExport SEXP _selectiveInference_update1_(SEXP Q2SEXP, SEXP wSEXP, SEXP mSEXP, SEXP kSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q2(Q2SEXP); + Rcpp::traits::input_parameter< Rcpp::NumericVector >::type w(wSEXP); + Rcpp::traits::input_parameter< int >::type m(mSEXP); + Rcpp::traits::input_parameter< int >::type k(kSEXP); + rcpp_result_gen = Rcpp::wrap(update1_(Q2, w, m, k)); + return rcpp_result_gen; +END_RCPP +} +// downdate1_ +Rcpp::List downdate1_(Rcpp::NumericMatrix Q1, Rcpp::NumericMatrix R, int j0, int m, int n); +RcppExport SEXP _selectiveInference_downdate1_(SEXP Q1SEXP, SEXP RSEXP, SEXP j0SEXP, SEXP mSEXP, SEXP nSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type Q1(Q1SEXP); + Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type R(RSEXP); + Rcpp::traits::input_parameter< int >::type j0(j0SEXP); + Rcpp::traits::input_parameter< int >::type m(mSEXP); + Rcpp::traits::input_parameter< int >::type n(nSEXP); + rcpp_result_gen = Rcpp::wrap(downdate1_(Q1, R, j0, m, n)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_selectiveInference_solve_QP", (DL_FUNC) &_selectiveInference_solve_QP, 10}, + {"_selectiveInference_find_one_row_debiasingM", (DL_FUNC) &_selectiveInference_find_one_row_debiasingM, 10}, + {"_selectiveInference_update1_", (DL_FUNC) &_selectiveInference_update1_, 4}, + {"_selectiveInference_downdate1_", (DL_FUNC) &_selectiveInference_downdate1_, 5}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_selectiveInference(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/selectiveInference/src/debias.c b/selectiveInference/src/debias.c new file mode 100644 index 0000000..3e3efb5 --- /dev/null +++ b/selectiveInference/src/debias.c @@ -0,0 +1,332 @@ +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - e_i^T\theta + \mu \|\theta\|_1 + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +double objective(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 1-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *Sigma_ptr_tmp = Sigma_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + + } + + return(1); +} + +double update_one_coord(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* Sigma times theta */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int row, /* which row: 1-based */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this part of ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *Sigma_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr; + int icol = 0; + + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr = ((double *) theta + coord); + old_value = *theta_ptr; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // Sigma[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + if (row - 1 == coord) { // Row is 1-based + linear_term -= 1; + } + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol 0)) { + break; + } + + old_value = new_value; + } + } + return(iter); +} + diff --git a/selectiveInference/src/debias.h b/selectiveInference/src/debias.h new file mode 100644 index 0000000..5510adf --- /dev/null +++ b/selectiveInference/src/debias.h @@ -0,0 +1,48 @@ +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +int solve_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol); /* precision for checking relative decrease in objective value */ + + +int check_KKT_qp(double *theta, /* current theta */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double tol); /* precision for checking KKT conditions */ + +int find_one_row_(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *Sigma_diag_ptr, /* Diagonal entry of covariance matrix */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int maxiter, /* how many iterations */ + int row, /* which coordinate to update: 0-based */ + double kkt_tol, /* precision for checking KKT conditions */ + double objective_tol); /* precision for checking relative decrease in objective value */ + +int check_KKT(double *theta, /* current theta */ + double *gradient_ptr, /* Current gradient of quadratic loss */ + int nrow, /* how many rows in Sigma */ + int row, /* which row: 0-based */ + double bound, /* Lagrange multipler for \ell_1 */ + double kkt_tol); /* precision for checking KKT conditions */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/selectiveInference/src/matrixcomps.c b/selectiveInference/src/matrixcomps.c index 4a516a1..bec3506 100644 --- a/selectiveInference/src/matrixcomps.c +++ b/selectiveInference/src/matrixcomps.c @@ -53,11 +53,8 @@ void colrot(double *A, int j1, int j2, int m, int n, int i1, int i2, double c, d // where Q1 is m x n and R is n x n. The other part of // the Q matrix, Q2 m x (m-n), isn't needed so it isn't // passed for efficiency -void downdate1(double *Q1, double *R, int *j0p, int *mp, int *np) { - int j0,m,n,j; - j0 = *j0p; - m = *mp; - n = *np; +void downdate1(double *Q1, double *R, int j0, int m, int n) { + int j; double c,s; for (j=j0+1; j=1; j--) { diff --git a/selectiveInference/src/matrixcomps.h b/selectiveInference/src/matrixcomps.h new file mode 100644 index 0000000..8632e3e --- /dev/null +++ b/selectiveInference/src/matrixcomps.h @@ -0,0 +1,12 @@ +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + +void update1(double *Q2, double *w, int m, int k); + +void downdate1(double *Q1, double *R, int j0, int m, int n); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/selectiveInference/src/quadratic_program.c b/selectiveInference/src/quadratic_program.c new file mode 100644 index 0000000..ba14d02 --- /dev/null +++ b/selectiveInference/src/quadratic_program.c @@ -0,0 +1,323 @@ +#include // for fabs + +// Find an approximate row of \hat{Sigma}^{-1} + +// Solves a dual version of problem (4) of https://arxiv.org/pdf/1306.3171.pdf + +// Dual problem: \text{min}_{\theta} 1/2 \theta^T \Sigma \theta - l^T\theta + \mu \|\theta\|_1 +// where l is `linear_func` below + +// This is the "negative" of the problem as in https://gist.github.com/jonathan-taylor/07774d209173f8bc4e42aa37712339bf +// Therefore we don't have to negate the answer to get theta. +// Update one coordinate + +double objective_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + int *ever_active_ptr, /* Ever active set: 0-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* how many rows in Sigma */ + double bound, /* Lagrange multipler for \ell_1 */ + double *theta) /* current value */ +{ + int irow, icol; + double value = 0; + double *Sigma_ptr_tmp = Sigma_ptr; + double *linear_func_ptr_tmp = linear_func_ptr; + double *theta_row_ptr, *theta_col_ptr; + int *active_row_ptr, *active_col_ptr; + int active_row, active_col; + int nactive = *nactive_ptr; + + theta_row_ptr = theta; + theta_col_ptr = theta; + + for (irow=0; irow 0) && (fabs(gradient + bound) > tol * bound)) { + return(0); + } + else if ((*theta_ptr < 0) && (fabs(gradient - bound) > tol * bound)) { + return(0); + } + } + else { + if (fabs(gradient) > (1. + tol) * bound) { + return(0); + } + } + } + + return(1); +} + +double update_one_coord_qp(double *Sigma_ptr, /* A covariance matrix: X^TX/n */ + double *linear_func_ptr, /* Linear term in objective */ + double *Sigma_diag_ptr, /* Diagonal entries of Sigma */ + double *gradient_ptr, /* Sigma times theta */ + int *ever_active_ptr, /* Ever active set: 1-based */ + int *nactive_ptr, /* Size of ever active set */ + int nrow, /* How many rows in Sigma */ + double bound, /* feasibility parameter */ + double *theta, /* current value */ + int coord, /* which coordinate to update: 0-based */ + int is_active) /* Is this coord in ever_active */ +{ + + double delta; + double linear_term = 0; + double value = 0; + double old_value; + double *Sigma_ptr_tmp; + double *gradient_ptr_tmp; + double *theta_ptr; + int icol = 0; + + double *quadratic_ptr = ((double *) Sigma_diag_ptr + coord); + double quadratic_term = *quadratic_ptr; + + gradient_ptr_tmp = ((double *) gradient_ptr + coord); + linear_term = *gradient_ptr_tmp; + + theta_ptr = ((double *) theta + coord); + old_value = *theta_ptr; + + // The coord entry of gradient_ptr term has a diagonal term in it: + // Sigma[coord, coord] * theta[coord] + // This removes it. + + linear_term -= quadratic_term * old_value; + + // Now soft-threshold the coord entry of theta + + // Objective is t \mapsto q/2 * t^2 + l * t + bound |t| + // with q=quadratic_term and l=linear_term + + // With a negative linear term, solution should be + // positive + + if (linear_term < -bound) { + value = (-linear_term - bound) / quadratic_term; + } + else if (linear_term > bound) { + value = -(linear_term - bound) / quadratic_term; + } + + // Add to active set if necessary + + if ((is_active == 0) && (value != 0)) { + update_ever_active_qp(coord, ever_active_ptr, nactive_ptr); + } + + // Update the linear term + + if (fabs(old_value - value) > 1.e-6 * (fabs(value) + fabs(old_value))) { + + delta = value - old_value; + Sigma_ptr_tmp = ((double *) Sigma_ptr + coord * nrow); + gradient_ptr_tmp = ((double *) gradient_ptr); + + for (icol=0; icol 0)) { + break; + } + old_value = new_value; + } + } + return(iter); +} + diff --git a/selectiveInference/src/symbols.rds b/selectiveInference/src/symbols.rds deleted file mode 100644 index 06b0e85..0000000 Binary files a/selectiveInference/src/symbols.rds and /dev/null differ