Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions data/epactsMulti.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
args <- commandArgs(trailingOnly=TRUE)
bindir <- args[2] ## Directory containing the R scripts
phenof <- args[3] ## phenotype file
##covf <- args[4] ## covariate file
covf <- args[4] ## covariate file or null model index file
indf <- args[5] ## individual IDs (in order from VCF)
vcf <- args[6] ## VCF file - must be bgzipped and tabixed
region <- args[7] ## Genomic region to load
Expand All @@ -29,9 +29,20 @@ dyn.load(paste(bindir,"/lib/epactsR/libs/epactsR",.Platform$dynlib.ext, sep=""))
pnames <- scan(phenof,what=character(),nlines=1)[-1]
phenos <- as.matrix(read.table(phenof)[,-1])

if(test=="multi.b.sna2")
{
nullf<-substr(covf,1,nchar(covf)-4) #remove ".cov" to get outf
} else if ( covf == "NULL" ) {
cov <- NULL
} else {
cov <- as.matrix(read.table(covf)[,-1])
}


ind <- as.integer(read.table(indf)[,2])-1

G <- .Call("readVcf",vcf,region,field,passOnly,ind,NULL)

m <- nrow(G)
if ( m > 0 ) {
n <- ncol(G)
Expand Down Expand Up @@ -61,19 +72,23 @@ if ( m > 0 ) {

print("bar")

out <- matrix(NA,m,4+2*ncol(phenos))
out[vids,3+(1:ncol(phenos))*2] <- r$p
out[vids,4+(1:ncol(phenos))*2] <- r$b
out <- matrix(NA,m,4+ncol(r$p)+ncol(r$add))
out[vids,4+(1:ncol(r$p))] <- r$p
out[vids,4+ncol(r$p)+(1:ncol(r$add))] <- r$add
out[,1] <- NS
out[,2] <- AC
out[,3] <- CR
out[,4] <- MAF
colnames(out) <- c("NS","AC","CR","MAF",rep(NA,2*ncol(phenos)))
colnames(out)[3+(1:ncol(phenos))*2] <- paste(pnames,"P",sep=".")
colnames(out)[4+(1:ncol(phenos))*2] <- paste(pnames,"B",sep=".")
colnames(out) <- c("NS","AC","CR","MAF",rep(NA,ncol(r$p)),rep(NA,ncol(r$add)))
colnames(out)[4+(1:ncol(r$p))] <- paste(pnames,"P",sep=".")
colnames(out)[4+ncol(r$p)+(1:ncol(r$add))] <- r$cname
rownames(out) <- rownames(G)

print(warnings())
.Call("writeMatrix",out,outf)
} else {
write.table(NULL,outf,row.names=F,col.names=F)
}
rm(list=ls())
gc()

194 changes: 194 additions & 0 deletions data/epactsMultiNull.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
args <- commandArgs(trailingOnly=TRUE)
nullf <- args[2] ## output name
phenof <- args[3] ## phenotype file
covf <- args[4] ## covariate file or null model index file

pnames <- scan(phenof,what=character(),nlines=1)[-1]
phenos <- as.matrix(read.table(phenof)[,-1])

if ( covf == "NULL" ) {
cov <- NULL;
} else {
cov <- as.matrix(read.table(covf)[,-1])
}


fast.logistf.fit <- function (x, y, weight = NULL, offset = NULL, firth = TRUE, col.fit = NULL,
init = NULL, control) {
n <- nrow(x)
k <- ncol(x)
if (is.null(init))
init = rep(0, k)
if (is.null(col.fit))
col.fit = 1:k
if (is.null(offset))
offset = rep(0, n)
if (is.null(weight))
weight = rep(1, n)
if (col.fit[1] == 0)
maxit <- 0
if (missing(control))
control <- fast.logistf.control()
maxit <- control$maxit
maxstep <- control$maxstep
maxhs <- control$maxhs
lconv <- control$lconv
gconv <- control$gconv
xconv <- control$xconv
beta <- init
iter <- 0
pi <- as.vector(1/(1 + exp(-x %*% beta - offset)))
evals <- 1
repeat {
beta.old <- beta
XW2 <- t(x * (weight * pi * (1-pi))^0.5)
myQR <- qr(t(XW2))
Q <- qr.Q(myQR)
h <- (Q*Q) %*% rep(1, ncol(Q))
if (firth)
U.star <- crossprod(x, weight * (y - pi) + h * (0.5 - pi))
else U.star <- crossprod(x, weight * (y - pi))
XX.covs <- matrix(0, k, k)
if (col.fit[1] != 0) {
XX.XW2 <- t(x[, col.fit, drop=FALSE] * (weight * pi * (1-pi))^0.5)
XX.Fisher <- crossprod(t(XX.XW2))
XX.covs[col.fit, col.fit] <- fast.invFisher(XX.Fisher) ###### HERE IS THE PROBLEM!!!
}
if(all(is.na(XX.covs)) == T) {
break
}
delta <- as.vector(XX.covs %*% U.star)
delta[is.na(delta)] <- 0
mx <- max(abs(delta))/maxstep
if (mx > 1)
delta <- delta/mx
evals <- evals + 1
if (maxit > 0) {
iter <- iter + 1
beta <- beta + delta
pi <- as.vector(1/(1 + exp(-x %*% beta - offset)))


}
if (iter == maxit | ((max(abs(delta)) <= xconv) & (all(abs(U.star[col.fit]) <
gconv))))
break
}
# Error catching (if chol(x) not positive definite)
if(all(is.na(XX.covs))==T) {
var <- XX.covs
list(beta = NA, var = var, pi = NA, hat.diag = NA,
iter = NA, evals = NA, conv = c(NA,
NA, NA))
} else {
var <- XX.covs
list(beta = beta, var = var, pi = pi, hat.diag = h,
iter = iter, evals = evals, conv = c(max(abs(U.star)),
max(abs(delta))))
}
}


fast.logistf.control <- function (maxit = 50, maxhs = 15, maxstep = 15, lconv = 1e-05,
gconv = 1e-05, xconv = 1e-05)
{
list(maxit = maxit, maxhs = maxhs, maxstep = maxstep, lconv = lconv,
gconv = gconv, xconv = xconv)
}

fast.logDet <- function (x) {
my.chol <- tryCatch(chol(x),error=function(e) {NA})
if (all(is.na(my.chol))==T) {
return(NA)
} else {
return (2 * sum(log(diag(my.chol))))
}
}

fast.invFisher <- function(x) {
my.chol <- tryCatch(chol(x),error=function(e) {NA})
if (all(is.na(my.chol))==T) {
return(NA)
} else {
return (chol2inv(my.chol))
}
#ifelse(is.na(my.chol), NA, chol2inv(my.chol))
}


ScoreTest_wSaddleApprox_Get_X1 = function(X1)
{
q1<-ncol(X1)
if(q1>=2)
{
if(sum(abs(X1[,1]-X1[,2]))==0)
{
X1=X1[,-2]
q1<-q1-1
}
}
qr1<-qr(X1)
if(qr1$rank < q1){

X1.svd<-svd(X1)
X1 = X1.svd$u[,1:qr1$rank]
}

return(X1)
}


ScoreTest_wSaddleApprox_NULL_Model <- function(formula, data=NULL)
{
X1<-model.matrix(formula,data=data)
X1<-ScoreTest_wSaddleApprox_Get_X1(X1)

glmfit= glm(formula, data=data, family = "binomial")
convflag=0
if(glmfit$converged)
{
mu = glmfit$fitted.values
if(mean(mu)/mean(glmfit$y)>0.001 & (1-mean(mu))/(1-mean(glmfit$y))>0.001) convflag<-1 #Check that the null model converged properly with glm
}
if(convflag==0)
{
firthfit=fast.logistf.fit(x=X1,y=glmfit$y)
eta<-X1%*%firthfit$beta
mu = as.vector(exp(eta)/(1+exp(eta)))
}

V = mu*(1-mu)
res = glmfit$y- mu
n1<-length(res)

XV = t(X1 * V)
XVX_inv= solve(t(X1)%*%(X1 * V))
XXVX_inv= X1 %*% XVX_inv

re<-list(y=glmfit$y, mu=mu, res=res, V=V, X1=X1, XV=XV, XXVX_inv =XXVX_inv)
class(re)<-"SA_NULL"
return(re)
}

##################################################################
## MAIN FUNCTION: null.b.sna2
##################################################################

phenos <- phenos - min(phenos,na.rm=T)

k <- ncol(cov)

g<-ncol(phenos)

for(gind in 1:g)
{
nm<-which(is.na(phenos[,gind])==FALSE)
pheno<-phenos[nm,gind]
cov1<-cov[nm,]

obj.null<-ScoreTest_wSaddleApprox_NULL_Model(pheno ~as.matrix(cov1))
save.image(paste(nullf,".",pnames[gind],".null.RData",sep=""))

}


35 changes: 15 additions & 20 deletions data/epactsSingle.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@ if ( !require(epactsR,lib.loc=paste(bindir,"/lib/",sep="") ) ) {
}
## the DLL file has to be loaded explictly for some machine - don't know why
dyn.load(paste(bindir,"/lib/epactsR/libs/epactsR",.Platform$dynlib.ext, sep=""))

pheno <- as.double(read.table(phenof)[,2])
pheno <- pheno - min(pheno,na.rm=T)
if ( covf == "NULL" ) {
cov <- NULL;
if(test=="single.b.sna2")
{
nullf<-substr(covf,1,nchar(covf)-4) #remove ".cov" to get outf
} else if ( covf == "NULL" ) {
cov <- NULL
} else {
cov <- as.matrix(read.table(covf)[,-1])
}
Expand All @@ -54,8 +57,7 @@ binaryFlag <- is.binary(pheno)
ind <- as.integer(read.table(indf)[,2])-1

G <- .Call("readVcf",vcf,region,field,passOnly,ind,NULL)
rnames <- rownames(G)
rownames(G) <- seq_along(rnames)

m <- nrow(G)
if ( m > 0 ) {
n <- ncol(G)
Expand All @@ -72,7 +74,8 @@ if ( m > 0 ) {
rsq[rsq>1] <- 1
if ( minRSQ > 0 ) {
vids <- which((varAC > 0) & (MAF >= minMAF) & (MAF <= maxMAF ) & ( MAC >= minMAC ) & ( MAC <= maxMAC) & (CR >= minCallRate) & (rsq >= minRSQ))
} else {
}
else {
vids <- which((varAC > 0) & (MAF >= minMAF) & (MAF <= maxMAF) & ( MAC >= minMAC ) & ( MAC <= maxMAC) & (CR >= minCallRate))
}
genos <- G[vids,,drop=FALSE]
Expand All @@ -88,7 +91,6 @@ if ( m > 0 ) {

func <- match.fun(test)
r <- func()


if ( binaryFlag ) {
binary.cname <- c("NS.CASE","NS.CTRL","AF.CASE","AF.CTRL") #,"CNT.CASE","CNT.CTRL");
Expand All @@ -109,7 +111,8 @@ if ( m > 0 ) {
#binary.add[,4] <- rowSums((matrix(pheno,nrow(G),ncol(G),byrow=T) == 0) * G * (1-ina), na.rm=T)/binary.add[,2]
#binary.add[binary.add[,1] == 0,3] <- NA
#binary.add[binary.add[,2] == 0,4] <- NA
} else {
}
else {
binary.cname <- NULL
binary.ncols <- 0
}
Expand All @@ -123,25 +126,17 @@ if ( m > 0 ) {
if ( minRSQ > 0 ) {
colnames(out) <- c("NS","AC","CALLRATE","MAF","PVALUE",r$cname,"RSQ",binary.cname);
out[,6+ncol(r$add)] <- rsq
} else {
}
else {
colnames(out) <- c("NS","AC","CALLRATE","MAF","PVALUE",r$cname,binary.cname);
}
out[vids,5] <- r$p
out[vids,5+1:ncol(r$add)] <- r$add
if ( binaryFlag ) {
out[,5+ncol(r$add)+1:4] <- binary.add
}
rownames(out) <- rnames

if (class(try(rnames[seq_along(rnames)], silent = TRUE)) == "try-error") {
tmp <- rownames(out)
goodnames <- sapply(seq_along(tmp), function (iname) {
class(try(tmp[iname], silent = TRUE)) != "try-error"
})
out <- out[which(goodnames), ]
cat(paste(sum(!goodnames), 'variants have been excluded due to error: "Value of SET_STRING_ELT() must be a \'CHARSXP\' not a \'integer\'"'), file = paste0(outf, ".log"))
} else {}

rownames(out) <- rownames(G)

print(warnings())
.Call("writeMatrix",out,outf)
} else {
Expand Down
Loading