Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions simulationScripts/commands.sh
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,25 @@ cd .. #
# Run Sailfish #
mkdir sailfish #
cd sailfish #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A1/out1.fq -2 ../A1/out2.fq -o A1 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A2/out1.fq -2 ../A2/out2.fq -o A2 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A3/out1.fq -2 ../A3/out2.fq -o A3 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A4/out1.fq -2 ../A4/out2.fq -o A4 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l "T=PE" -1 ../A5/out1.fq -2 ../A5/out2.fq -o A5 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A1/out1.fq -2 ../A1/out2.fq -o A1 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A2/out1.fq -2 ../A2/out2.fq -o A2 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A3/out1.fq -2 ../A3/out2.fq -o A3 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A4/out1.fq -2 ../A4/out2.fq -o A4 #
sailfish quant -i ../../transcriptome_data/indDir -p 4 -l IU -1 ../A5/out1.fq -2 ../A5/out2.fq -o A5 #
# Process output #
# Fix the target names
echo "fixing output names"
python ../../processSailfishNames.py --input A1/quant.sf --output A1/quant.fixed.sf
python ../../processSailfishNames.py --input A2/quant.sf --output A2/quant.fixed.sf
python ../../processSailfishNames.py --input A3/quant.sf --output A3/quant.fixed.sf
python ../../processSailfishNames.py --input A4/quant.sf --output A4/quant.fixed.sf
python ../../processSailfishNames.py --input A5/quant.sf --output A5/quant.fixed.sf
mv A1/quant.fixed.sf A1/quant.sf
mv A2/quant.fixed.sf A2/quant.sf
mv A3/quant.fixed.sf A3/quant.sf
mv A4/quant.fixed.sf A4/quant.sf
mv A5/quant.fixed.sf A5/quant.sf
echo "done"
R CMD BATCH ../../sailfishNames.R #
R CMD BATCH ../../withinGeneSailfish.R #
cd .. #
Expand Down
2 changes: 1 addition & 1 deletion simulationScripts/generate-known-fa-files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ rm -r A1.tophat.out
cd transcriptome_data
rsem-prepare-reference --bowtie2 known.fa ref #this is for rsem
sed -e 's/\,.*//' known.fa > output.fasta #this is for making tigar names work
sailfish index -t known.fa -o indDir -k 20 -p 4 #this is for sailfish
sailfish index -t known.fa -o indDir -p 4 #this is for sailfish



33 changes: 33 additions & 0 deletions simulationScripts/processSailfishNames.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import print_function
import argparse
import os
import sys
import re

def main(args):
ifile = args.input
if (not os.path.isfile(ifile)):
print('Could not find input file {}'.format(ifile), file=sys.stderr)
sys.exit(1)

lines = open(ifile).readlines()
headers = [l for l in lines if l.startswith('#')]
data = [l for l in lines if not l.startswith('#')]

with open(args.output, 'w') as ofile:
for l in headers:
ofile.write(l)
for l in data:
toks = l.rstrip('\n').split('\t')
name = re.split('\s+', toks[0])[0]
newline = '\t'.join([name] + toks[1:])
ofile.write("{}\n".format(newline))

print("successfully modified file", file=sys.stderr)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process crazy transcript names')
parser.add_argument('--input', type=str, required=True, help='the quant.sf file')
parser.add_argument('--output', type=str, required=True, help='where the processed file should be placed')
args = parser.parse_args()
main(args)
20 changes: 10 additions & 10 deletions simulationScripts/sailfishNames.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,47 +9,47 @@ while (length(oneLine <- readLines(myFile, n = 1, warn = FALSE)) > 0){
}
close(myFile)
par(mfrow = c(2,3))
A1 <- read.table("A1/quant_bias_corrected.sf")
A1 <- read.table("A1/quant.sf", comment.char="#")
ind <- A1[,2]
groundTruth <- read.table("../A1/transcriptNames.txt")
sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1]))
colnames(sailA1) <- c("id","true","est")
for (i in 1:dim(groundTruth)[1]){
j <- which(sailNames[,2] == as.character(sailA1[i,1]))
k <- which(A1[,1] == sailNames[j,1])
sailA1[i,3] <- A1[k,7]
sailA1[i,3] <- A1[k,4]
if(i%%1000 == 0){print(i)}
}
sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2]))
sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3]))
#smoothScatter(sailA1[,-1]);points(c(-30,10),c(-30,10),type = "l",col = 2)
write.table(sailA1,file = "sailA1.txt")

A1 <- read.table("A2/quant_bias_corrected.sf")
A1 <- read.table("A2/quant.sf", comment.char="#")
ind <- A1[,2]
groundTruth <- read.table("../A2/transcriptNames.txt")
sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1]))
colnames(sailA1) <- c("id","true","est")
for (i in 1:dim(groundTruth)[1]){
j <- which(sailNames[,2] == as.character(sailA1[i,1]))
k <- which(A1[,1] == sailNames[j,1])
sailA1[i,3] <- A1[k,7]
sailA1[i,3] <- A1[k,4]
if(i%%1000 == 0){print(i)}
}
sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2]))
sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3]))
#smoothScatter(sailA1[,-1]);points(c(-30,10),c(-30,10),type = "l",col = 2)
write.table(sailA1,file = "sailA2.txt")

A1 <- read.table("A3/quant_bias_corrected.sf")
A1 <- read.table("A3/quant.sf", comment.char="#")
ind <- A1[,2]
groundTruth <- read.table("../A3/transcriptNames.txt")
sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1]))
colnames(sailA1) <- c("id","true","est")
for (i in 1:dim(groundTruth)[1]){
j <- which(sailNames[,2] == as.character(sailA1[i,1]))
k <- which(A1[,1] == sailNames[j,1])
sailA1[i,3] <- A1[k,7]
sailA1[i,3] <- A1[k,4]
if(i%%1000 == 0){print(i)}
}
sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2]))
Expand All @@ -58,31 +58,31 @@ sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3]))
write.table(sailA1,file = "sailA3.txt")


A1 <- read.table("A4/quant_bias_corrected.sf")
A1 <- read.table("A4/quant.sf", comment.char="#")
ind <- A1[,2]
groundTruth <- read.table("../A4/transcriptNames.txt")
sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1]))
colnames(sailA1) <- c("id","true","est")
for (i in 1:dim(groundTruth)[1]){
j <- which(sailNames[,2] == as.character(sailA1[i,1]))
k <- which(A1[,1] == sailNames[j,1])
sailA1[i,3] <- A1[k,7]
sailA1[i,3] <- A1[k,4]
if(i%%1000 == 0){print(i)}
}
sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2]))
sailA1[,3] <- log(sailA1[,3]/sum(sailA1[,3]))
#smoothScatter(sailA1[,-1]);points(c(-30,10),c(-30,10),type = "l",col = 2)
write.table(sailA1,file = "sailA4.txt")

A1 <- read.table("A5/quant_bias_corrected.sf")
A1 <- read.table("A5/quant.sf", comment.char="#")
ind <- A1[,2]
groundTruth <- read.table("../A5/transcriptNames.txt")
sailA1 <- cbind(groundTruth,numeric(dim(groundTruth)[1]))
colnames(sailA1) <- c("id","true","est")
for (i in 1:dim(groundTruth)[1]){
j <- which(sailNames[,2] == as.character(sailA1[i,1]))
k <- which(A1[,1] == sailNames[j,1])
sailA1[i,3] <- A1[k,7]
sailA1[i,3] <- A1[k,4]
if(i%%1000 == 0){print(i)}
}
sailA1[,2] <- log(sailA1[,2]/sum(sailA1[,2]))
Expand Down
4 changes: 2 additions & 2 deletions simulationScripts/set-path-variable.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ anaconda/1.9.1
rsem-1.2.15/
express/express-1.5.1-linux_x86_64
tigar2
export LD_LIBRARY_PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.6.3-Linux_x86-64/lib:$LD_LIBRARY_PATH
export PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.6.3-Linux_x86-64/bin:$PATH
export LD_LIBRARY_PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.7.3-Linux_x86-64/lib:$LD_LIBRARY_PATH
export PATH=YOUR_SAILFISH_PATH_HERE/sailfish/Sailfish-0.7.3-Linux_x86-64/bin:$PATH
compilers/gcc/4.8.2